opnsense-src/lib/libc/amd64/string/memcmp.S

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

421 lines
9.7 KiB
ArmAsm
Raw Normal View History

/*-
* Copyright (c) 2018, 2023 The FreeBSD Foundation
*
* This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Portions of this software were developed by Robert Clausecker
* <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include <machine/param.h>
#include "amd64_archlevel.h"
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
* better gets imported.
*/
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
#ifdef BCMP
#define memcmp bcmp
#endif
ARCHFUNCS(memcmp)
ARCHFUNC(memcmp, scalar)
ARCHFUNC(memcmp, baseline)
ENDARCHFUNCS(memcmp)
ARCHENTRY(memcmp, scalar)
xorl %eax,%eax
10:
cmpq $16,%rdx
ja 101632f
cmpb $8,%dl
jg 100816f
cmpb $4,%dl
jg 100408f
cmpb $2,%dl
jge 100204f
cmpb $1,%dl
jl 100000f
movzbl (%rdi),%eax
movzbl (%rsi),%r8d
subl %r8d,%eax
100000:
ret
ALIGN_TEXT
100816:
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
jne 80f
movq -8(%rdi,%rdx),%r8
movq -8(%rsi,%rdx),%r9
cmpq %r8,%r9
jne 10081608f
ret
ALIGN_TEXT
100408:
movl (%rdi),%r8d
movl (%rsi),%r9d
cmpl %r8d,%r9d
jne 80f
movl -4(%rdi,%rdx),%r8d
movl -4(%rsi,%rdx),%r9d
cmpl %r8d,%r9d
jne 10040804f
ret
ALIGN_TEXT
100204:
movzwl (%rdi),%r8d
movzwl (%rsi),%r9d
cmpl %r8d,%r9d
jne 1f
movzwl -2(%rdi,%rdx),%r8d
movzwl -2(%rsi,%rdx),%r9d
cmpl %r8d,%r9d
jne 1f
ret
ALIGN_TEXT
101632:
cmpq $32,%rdx
ja 103200f
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
jne 80f
movq 8(%rdi),%r8
movq 8(%rsi),%r9
cmpq %r8,%r9
jne 10163208f
movq -16(%rdi,%rdx),%r8
movq -16(%rsi,%rdx),%r9
cmpq %r8,%r9
jne 10163216f
movq -8(%rdi,%rdx),%r8
movq -8(%rsi,%rdx),%r9
cmpq %r8,%r9
jne 10163224f
ret
ALIGN_TEXT
103200:
movq (%rdi),%r8
movq 8(%rdi),%r9
subq (%rsi),%r8
subq 8(%rsi),%r9
orq %r8,%r9
jnz 10320000f
movq 16(%rdi),%r8
movq 24(%rdi),%r9
subq 16(%rsi),%r8
subq 24(%rsi),%r9
orq %r8,%r9
jnz 10320016f
leaq 32(%rdi),%rdi
leaq 32(%rsi),%rsi
subq $32,%rdx
cmpq $32,%rdx
jae 103200b
cmpb $0,%dl
jne 10b
ret
/*
* Mismatch was found.
*/
#ifdef BCMP
ALIGN_TEXT
10320016:
10320000:
10081608:
10163224:
10163216:
10163208:
10040804:
80:
1:
leal 1(%eax),%eax
ret
#else
/*
* We need to compute the difference between strings.
* Start with narrowing the range down (16 -> 8 -> 4 bytes).
*/
ALIGN_TEXT
10320016:
leaq 16(%rdi),%rdi
leaq 16(%rsi),%rsi
10320000:
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
jne 80f
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
ALIGN_TEXT
10081608:
10163224:
leaq -8(%rdi,%rdx),%rdi
leaq -8(%rsi,%rdx),%rsi
jmp 80f
ALIGN_TEXT
10163216:
leaq -16(%rdi,%rdx),%rdi
leaq -16(%rsi,%rdx),%rsi
jmp 80f
ALIGN_TEXT
10163208:
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
ALIGN_TEXT
10040804:
leaq -4(%rdi,%rdx),%rdi
leaq -4(%rsi,%rdx),%rsi
jmp 1f
ALIGN_TEXT
80:
movl (%rdi),%r8d
movl (%rsi),%r9d
cmpl %r8d,%r9d
jne 1f
leaq 4(%rdi),%rdi
leaq 4(%rsi),%rsi
/*
* We have up to 4 bytes to inspect.
*/
1:
movzbl (%rdi),%eax
movzbl (%rsi),%r8d
cmpb %r8b,%al
jne 2f
movzbl 1(%rdi),%eax
movzbl 1(%rsi),%r8d
cmpb %r8b,%al
jne 2f
movzbl 2(%rdi),%eax
movzbl 2(%rsi),%r8d
cmpb %r8b,%al
jne 2f
movzbl 3(%rdi),%eax
movzbl 3(%rsi),%r8d
2:
subl %r8d,%eax
ret
#endif
ARCHEND(memcmp, scalar)
ARCHENTRY(memcmp, baseline)
cmp $32, %rdx # enough to permit use of the long kernel?
ja .Llong
test %rdx, %rdx # zero bytes buffer?
je .L0
/*
* Compare strings of 1--32 bytes. We want to do this by
* loading into two xmm registers and then comparing. To avoid
* crossing into unmapped pages, we either load 32 bytes from
* the start of the buffer or 32 bytes before its end, depending
* on whether there is a page boundary between the overread area
* or not.
*/
/* check for page boundaries overreads */
lea 31(%rdi), %eax # end of overread
lea 31(%rsi), %r8d
lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
lea -1(%rsi, %rdx, 1), %r9d
xor %ecx, %eax
xor %r9d, %r8d
test $PAGE_SIZE, %eax # are they on different pages?
jz 0f
/* fix up rdi */
movdqu -32(%rdi, %rdx, 1), %xmm0
movdqu -16(%rdi, %rdx, 1), %xmm1
lea -8(%rsp), %rdi # end of replacement buffer
sub %rdx, %rdi # start of replacement buffer
movdqa %xmm0, -40(%rsp) # copy to replacement buffer
movdqa %xmm1, -24(%rsp)
0: test $PAGE_SIZE, %r8d
jz 0f
/* fix up rsi */
movdqu -32(%rsi, %rdx, 1), %xmm0
movdqu -16(%rsi, %rdx, 1), %xmm1
lea -40(%rsp), %rsi # end of replacement buffer
sub %rdx, %rsi # start of replacement buffer
movdqa %xmm0, -72(%rsp) # copy to replacement buffer
movdqa %xmm1, -56(%rsp)
/* load data and compare properly */
0: movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm3
movdqu (%rdi), %xmm0
movdqu (%rsi), %xmm2
mov %edx, %ecx
mov $-1, %edx
shl %cl, %rdx # ones where the buffer is not
pcmpeqb %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm1, %ecx
pmovmskb %xmm0, %eax
shl $16, %ecx
or %ecx, %eax # ones where the buffers match
or %edx, %eax # including where the buffer is not
not %eax # ones where there is a mismatch
#ifndef BCMP
bsf %eax, %edx # location of the first mismatch
cmovz %eax, %edx # including if there is no mismatch
movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
movzbl (%rsi, %rdx, 1), %edx
sub %edx, %eax
#endif
ret
/* empty input */
.L0: xor %eax, %eax
ret
/* compare 33+ bytes */
ALIGN_TEXT
.Llong: movdqu (%rdi), %xmm0 # load head
movdqu (%rsi), %xmm2
mov %rdi, %rcx
sub %rdi, %rsi # express rsi as distance from rdi
and $~0xf, %rdi # align rdi to 16 bytes
movdqu 16(%rsi, %rdi, 1), %xmm1
pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
add %rcx, %rdx # pointer to last byte in buffer
jc .Loverflow # did this overflow?
0: pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
xor $0xffff, %eax # any mismatch?
jne .Lmismatch_head
add $64, %rdi # advance to next iteration
jmp 1f # and get going with the loop
/*
* If we got here, a buffer length was passed to memcmp(a, b, len)
* such that a + len < a. While this sort of usage is illegal,
* it is plausible that a caller tries to do something like
* memcmp(a, b, SIZE_MAX) if a and b are known to differ, intending
* for memcmp() to stop comparing at the first mismatch. This
* behaviour is not guaranteed by any version of ISO/IEC 9899,
* but usually works out in practice. Let's try to make this
* case work by comparing until the end of the address space.
*/
.Loverflow:
mov $-1, %rdx # compare until the end of memory
jmp 0b
/* process buffer 32 bytes at a time */
ALIGN_TEXT
0: movdqu -32(%rsi, %rdi, 1), %xmm0
movdqu -16(%rsi, %rdi, 1), %xmm1
pcmpeqb -32(%rdi), %xmm0
pcmpeqb -16(%rdi), %xmm1
add $32, %rdi # advance to next iteration
1: pand %xmm0, %xmm1 # 0xff where both halves matched
pmovmskb %xmm1, %eax
cmp $0xffff, %eax # all bytes matched?
jne .Lmismatch
cmp %rdx, %rdi # end of buffer reached?
jb 0b
/* less than 32 bytes left to compare */
movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
movdqu -16(%rdx, %rsi, 1), %xmm3
movdqu -32(%rdx), %xmm0
movdqu -32(%rdx, %rsi, 1), %xmm2
pcmpeqb %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm1, %ecx
pmovmskb %xmm0, %eax
shl $16, %ecx
or %ecx, %eax # ones where the buffers match
not %eax # ones where there is a mismatch
#ifndef BCMP
bsf %eax, %ecx # location of the first mismatch
cmovz %eax, %ecx # including if there is no mismatch
add %rcx, %rdx # pointer to potential mismatch
movzbl -32(%rdx), %eax # mismatching bytes
movzbl -32(%rdx, %rsi, 1), %edx
sub %edx, %eax
#endif
ret
#ifdef BCMP
.Lmismatch:
mov $1, %eax
.Lmismatch_head:
ret
#else /* memcmp */
.Lmismatch_head:
tzcnt %eax, %eax # location of mismatch
add %rax, %rcx # pointer to mismatch
movzbl (%rcx), %eax # mismatching bytes
movzbl (%rcx, %rsi, 1), %ecx
sub %ecx, %eax
ret
.Lmismatch:
movdqu -48(%rsi, %rdi, 1), %xmm1
pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
pmovmskb %xmm0, %eax # mismatches in first 16 bytes
pmovmskb %xmm1, %edx # mismatches in second 16 bytes
shl $16, %edx
or %edx, %eax # mismatches in both
not %eax # matches in both
tzcnt %eax, %eax # location of mismatch
add %rax, %rdi # pointer to mismatch
movzbl -64(%rdi), %eax # mismatching bytes
movzbl -64(%rdi, %rsi, 1), %ecx
sub %ecx, %eax
ret
#endif
ARCHEND(memcmp, baseline)
.section .note.GNU-stack,"",%progbits