mirror of
https://github.com/opnsense/src.git
synced 2026-04-15 14:29:58 -04:00
When matching against a set of 17--32 characters, strcspn() uses two invocations of PCMPISTRI to match against the first 16 characters of the set and then the remaining characters. If a match was found in the first half of the set, the code originally immediately returned that match. However, it is possible for a match in the second half of the set to occur earlier in the vector, leading to that match being overlooked. Fix the code by checking if there is a match in the second half of the set and taking the earlier of the two matches. The correctness of the function has been verified with extended unit tests and test runs against the glibc test suite. Approved by: mjg (implicit, via IRC) MFC after: 1 week MFC to: stable/14 (cherry picked from commit c91cd7d03a9dee649ba3a1b9b4014df9de111bb8)
394 lines
10 KiB
ArmAsm
394 lines
10 KiB
ArmAsm
/*
|
|
* Copyright (c) 2023 The FreeBSD Foundation
|
|
*
|
|
* This software was developed by Robert Clausecker <fuz@FreeBSD.org>
|
|
* under sponsorship from the FreeBSD Foundation.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE
|
|
*/
|
|
|
|
#include <machine/asm.h>
|
|
#include <machine/param.h>
|
|
|
|
#include "amd64_archlevel.h"
|
|
|
|
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
|
|
|
|
ARCHFUNCS(strcspn)
|
|
ARCHFUNC(strcspn, scalar)
|
|
NOARCHFUNC
|
|
ARCHFUNC(strcspn, x86_64_v2)
|
|
ENDARCHFUNCS(strcspn)
|
|
|
|
ARCHENTRY(strcspn, scalar)
|
|
push %rbp # align stack to enable function call
|
|
mov %rsp, %rbp
|
|
sub $256, %rsp # allocate space for lookup table
|
|
|
|
/* check for special cases */
|
|
movzbl (%rsi), %eax # first character in the set
|
|
test %eax, %eax
|
|
jz .Lstrlen
|
|
|
|
movzbl 1(%rsi), %edx # second character in the set
|
|
test %edx, %edx
|
|
jz .Lstrchr
|
|
|
|
/* no special case matches -- prepare lookup table */
|
|
xor %r8d, %r8d
|
|
mov $28, %ecx
|
|
0: mov %r8, (%rsp, %rcx, 8)
|
|
mov %r8, 8(%rsp, %rcx, 8)
|
|
mov %r8, 16(%rsp, %rcx, 8)
|
|
mov %r8, 24(%rsp, %rcx, 8)
|
|
sub $4, %ecx
|
|
jnc 0b
|
|
|
|
add $2, %rsi
|
|
movb $1, (%rsp, %rax, 1) # register first chars in set
|
|
movb $1, (%rsp, %rdx, 1)
|
|
mov %rdi, %rax # a copy of the source to iterate over
|
|
|
|
/* process remaining chars in set */
|
|
ALIGN_TEXT
|
|
0: movzbl (%rsi), %ecx
|
|
movb $1, (%rsp, %rcx, 1)
|
|
test %ecx, %ecx
|
|
jz 1f
|
|
|
|
movzbl 1(%rsi), %ecx
|
|
movb $1, (%rsp, %rcx, 1)
|
|
test %ecx, %ecx
|
|
jz 1f
|
|
|
|
add $2, %rsi
|
|
jmp 0b
|
|
|
|
/* find match */
|
|
ALIGN_TEXT
|
|
1: movzbl (%rax), %ecx
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
jne 2f
|
|
|
|
movzbl 1(%rax), %ecx
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
jne 3f
|
|
|
|
movzbl 2(%rax), %ecx
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
jne 4f
|
|
|
|
movzbl 3(%rax), %ecx
|
|
add $4, %rax
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
je 1b
|
|
|
|
sub $3, %rax
|
|
4: dec %rdi
|
|
3: inc %rax
|
|
2: sub %rdi, %rax # number of characters preceding match
|
|
leave
|
|
ret
|
|
|
|
/* set is empty, degrades to strlen */
|
|
.Lstrlen:
|
|
leave
|
|
jmp CNAME(strlen)
|
|
|
|
/* just one character in set, degrades to strchr */
|
|
.Lstrchr:
|
|
mov %rdi, (%rsp) # stash a copy of the string
|
|
mov %eax, %esi # find the character in the set
|
|
call CNAME(strchrnul)
|
|
sub (%rsp), %rax # length of prefix before match
|
|
leave
|
|
ret
|
|
ARCHEND(strcspn, scalar)
|
|
|
|
/*
|
|
* This kernel uses pcmpistri to do the heavy lifting.
|
|
* We provide five code paths, depending on set size:
|
|
*
|
|
* 0: call strlen()
|
|
* 1: call strchr()
|
|
* 2--16: one pcmpistri per 16 bytes of input
|
|
* 17--32: two pcmpistri per 16 bytes of input
|
|
* >=33: fall back to look up table
|
|
*/
|
|
ARCHENTRY(strcspn, x86_64_v2)
|
|
push %rbp
|
|
mov %rsp, %rbp
|
|
sub $256, %rsp
|
|
|
|
/* check for special cases */
|
|
movzbl (%rsi), %eax
|
|
test %eax, %eax # empty string?
|
|
jz .Lstrlenv2
|
|
|
|
cmpb $0, 1(%rsi) # single character string?
|
|
jz .Lstrchrv2
|
|
|
|
/* find set size and copy up to 32 bytes to (%rsp) */
|
|
mov %esi, %ecx
|
|
and $~0xf, %rsi # align set pointer
|
|
movdqa (%rsi), %xmm0
|
|
pxor %xmm1, %xmm1
|
|
and $0xf, %ecx # amount of bytes rsi is past alignment
|
|
xor %edx, %edx
|
|
pcmpeqb %xmm0, %xmm1 # end of string reached?
|
|
movdqa %xmm0, 32(%rsp) # transfer head of set to stack
|
|
pmovmskb %xmm1, %eax
|
|
shr %cl, %eax # clear out junk before string
|
|
test %eax, %eax # end of set reached?
|
|
jnz 0f
|
|
|
|
movdqa 16(%rsi), %xmm0 # second chunk of the set
|
|
mov $16, %edx
|
|
sub %ecx, %edx # length of set preceding xmm0
|
|
pxor %xmm1, %xmm1
|
|
pcmpeqb %xmm0, %xmm1
|
|
movdqa %xmm0, 48(%rsp)
|
|
movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set
|
|
pmovmskb %xmm1, %eax
|
|
test %eax, %eax
|
|
jnz 1f
|
|
|
|
movdqa 32(%rsi), %xmm0 # third chunk
|
|
add $16, %edx
|
|
pxor %xmm1, %xmm1
|
|
pcmpeqb %xmm0, %xmm1
|
|
movdqa %xmm0, 64(%rsp)
|
|
pmovmskb %xmm1, %eax
|
|
test %eax, %eax # still not done?
|
|
jz .Lgt32v2
|
|
|
|
0: movdqu 32(%rsp, %rcx, 1), %xmm2 # head of set
|
|
1: tzcnt %eax, %eax
|
|
add %eax, %edx # length of set (excluding NUL byte)
|
|
cmp $32, %edx # above 32 bytes?
|
|
ja .Lgt32v2
|
|
|
|
/*
|
|
* At this point we know that we want to use pcmpistri.
|
|
* one last problem obtains: the head of the string is not
|
|
* aligned and may cross a cacheline. If this is the case,
|
|
* we take the part before the page boundary and repeat the
|
|
* last byte to fill up the xmm register.
|
|
*/
|
|
mov %rdi, %rax # save original string pointer
|
|
lea 15(%rdi), %esi # last byte of the head
|
|
xor %edi, %esi
|
|
test $PAGE_SIZE, %esi # does the head cross a page?
|
|
jz 0f
|
|
|
|
/* head crosses page: copy to stack to fix up */
|
|
and $~0xf, %rax # align head pointer temporarily
|
|
movzbl 15(%rax), %esi # last head byte on the page
|
|
movdqa (%rax), %xmm0
|
|
movabs $0x0101010101010101, %r8
|
|
imul %r8, %rsi # repeated 8 times
|
|
movdqa %xmm0, (%rsp) # head word on stack
|
|
mov %rsi, 16(%rsp) # followed by filler (last byte x8)
|
|
mov %rsi, 24(%rsp)
|
|
mov %edi, %eax
|
|
and $0xf, %eax # offset of head from alignment
|
|
add %rsp, %rax # pointer to fake head
|
|
|
|
0: movdqu (%rax), %xmm0 # load head (fake or real)
|
|
lea 16(%rdi), %rax
|
|
and $~0xf, %rax # second 16 bytes of string (aligned)
|
|
1: cmp $16, %edx # 16--32 bytes?
|
|
ja .Lgt16v2
|
|
|
|
|
|
/* set is 2--16 bytes in size */
|
|
|
|
/* _SIDD_UBYTE_OPS|_SIDD_CMP_EQUAL_ANY|_SIDD_LEAST_SIGNIFICANT */
|
|
pcmpistri $0, %xmm0, %xmm2 # match in head?
|
|
jbe .Lheadmatchv2
|
|
|
|
ALIGN_TEXT
|
|
0: pcmpistri $0, (%rax), %xmm2
|
|
jbe 1f # match or end of string?
|
|
pcmpistri $0, 16(%rax), %xmm2
|
|
lea 32(%rax), %rax
|
|
ja 0b # match or end of string?
|
|
|
|
3: lea -16(%rax), %rax # go back to second half
|
|
1: jc 2f # jump if match found
|
|
movdqa (%rax), %xmm0 # reload string piece
|
|
pxor %xmm1, %xmm1
|
|
pcmpeqb %xmm1, %xmm0 # where is the NUL byte?
|
|
pmovmskb %xmm0, %ecx
|
|
tzcnt %ecx, %ecx # location of NUL byte in (%rax)
|
|
2: sub %rdi, %rax # offset of %xmm0 from beginning of string
|
|
add %rcx, %rax # prefix length before match/NUL
|
|
leave
|
|
ret
|
|
|
|
.Lheadmatchv2:
|
|
jc 2f # jump if match found
|
|
pxor %xmm1, %xmm1
|
|
pcmpeqb %xmm1, %xmm0
|
|
pmovmskb %xmm0, %ecx
|
|
tzcnt %ecx, %ecx # location of NUL byte
|
|
2: mov %ecx, %eax # prefix length before match/NUL
|
|
leave
|
|
ret
|
|
|
|
/* match in first set half during head */
|
|
.Lheadmatchv2first:
|
|
mov %ecx, %eax
|
|
pcmpistri $0, %xmm0, %xmm3 # match in second set half?
|
|
cmp %ecx, %eax # before the first half match?
|
|
cmova %ecx, %eax # use the earlier match
|
|
leave
|
|
ret
|
|
|
|
.Lgt16v2:
|
|
movdqu 48(%rsp, %rcx, 1), %xmm3 # second part of set
|
|
|
|
/* set is 17--32 bytes in size */
|
|
pcmpistri $0, %xmm0, %xmm2 # match in first set half?
|
|
jb .Lheadmatchv2first
|
|
pcmpistri $0, %xmm0, %xmm3 # match in second set half or end of string?
|
|
jbe .Lheadmatchv2
|
|
|
|
ALIGN_TEXT
|
|
0: movdqa (%rax), %xmm0
|
|
pcmpistri $0, %xmm0, %xmm2
|
|
jb 4f # match in first set half?
|
|
pcmpistri $0, %xmm0, %xmm3
|
|
jbe 1f # match in second set half or end of string?
|
|
movdqa 16(%rax), %xmm0
|
|
add $32, %rax
|
|
pcmpistri $0, %xmm0, %xmm2
|
|
jb 3f # match in first set half?
|
|
pcmpistri $0, %xmm0, %xmm3
|
|
ja 0b # neither match in 2nd half nor string end?
|
|
|
|
/* match in second half or NUL */
|
|
lea -16(%rax), %rax # go back to second half
|
|
1: jc 2f # jump if match found
|
|
pxor %xmm1, %xmm1
|
|
pcmpeqb %xmm1, %xmm0 # where is the NUL byte?
|
|
pmovmskb %xmm0, %ecx
|
|
tzcnt %ecx, %ecx # location of NUL byte in (%rax)
|
|
2: sub %rdi, %rax # offset of %xmm0 from beginning of string
|
|
add %rcx, %rax # prefix length before match/NUL
|
|
leave
|
|
ret
|
|
|
|
/* match in first half */
|
|
3: sub $16, %rax # go back to second half
|
|
4: sub %rdi, %rax # offset of %xmm0 from beginning of string
|
|
mov %ecx, %edx
|
|
pcmpistri $0, %xmm0, %xmm3 # match in second set half?
|
|
cmp %ecx, %edx # before the first half match?
|
|
cmova %ecx, %edx # use the earlier match
|
|
add %rdx, %rax # return full ofset
|
|
leave
|
|
ret
|
|
|
|
/* set is empty, degrades to strlen */
|
|
.Lstrlenv2:
|
|
leave
|
|
jmp CNAME(strlen)
|
|
|
|
/* just one character in set, degrades to strchr */
|
|
.Lstrchrv2:
|
|
mov %rdi, (%rsp) # stash a copy of the string
|
|
mov %eax, %esi # find this character
|
|
call CNAME(strchrnul)
|
|
sub (%rsp), %rax # length of prefix before match
|
|
leave
|
|
ret
|
|
|
|
/* set is >=33 bytes in size */
|
|
.Lgt32v2:
|
|
xorps %xmm0, %xmm0
|
|
mov $256-64, %edx
|
|
|
|
/* clear out look up table */
|
|
0: movaps %xmm0, (%rsp, %rdx, 1)
|
|
movaps %xmm0, 16(%rsp, %rdx, 1)
|
|
movaps %xmm0, 32(%rsp, %rdx, 1)
|
|
movaps %xmm0, 48(%rsp, %rdx, 1)
|
|
sub $64, %edx
|
|
jnc 0b
|
|
|
|
add %rcx, %rsi # restore string pointer
|
|
mov %rdi, %rax # keep a copy of the string
|
|
|
|
/* initialise look up table */
|
|
ALIGN_TEXT
|
|
0: movzbl (%rsi), %ecx
|
|
movb $1, (%rsp, %rcx, 1)
|
|
test %ecx, %ecx
|
|
jz 1f
|
|
|
|
movzbl 1(%rsi), %ecx
|
|
movb $1, (%rsp, %rcx, 1)
|
|
test %ecx, %ecx
|
|
jz 1f
|
|
|
|
movzbl 2(%rsi), %ecx
|
|
movb $1, (%rsp, %rcx, 1)
|
|
test %ecx, %ecx
|
|
jz 1f
|
|
|
|
movzbl 3(%rsi), %ecx
|
|
movb $1, (%rsp, %rcx, 1)
|
|
test %ecx, %ecx
|
|
jz 1f
|
|
|
|
add $4, %rsi
|
|
jmp 0b
|
|
|
|
/* find match */
|
|
ALIGN_TEXT
|
|
1: movzbl (%rax), %ecx
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
jne 2f
|
|
|
|
movzbl 1(%rax), %ecx
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
jne 3f
|
|
|
|
movzbl 2(%rax), %ecx
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
jne 4f
|
|
|
|
movzbl 3(%rax), %ecx
|
|
add $4, %rax
|
|
cmpb $0, (%rsp, %rcx, 1)
|
|
je 1b
|
|
|
|
sub $3, %rax
|
|
4: dec %rdi
|
|
3: inc %rax
|
|
2: sub %rdi, %rax # number of characters preceding match
|
|
leave
|
|
ret
|
|
ARCHEND(strcspn, x86_64_v2)
|
|
|
|
.section .note.GNU-stack,"",%progbits
|