mirror of
https://github.com/opnsense/src.git
synced 2026-04-15 14:29:58 -04:00
This is conceptually similar to strchr(3), but there are slight changes to account for the buffer having an explicit buffer length. this includes the bug fix from b2618b6. Sponsored by: The FreeBSD Foundation Reported by: yuri, des Tested by: des Approved by: mjg MFC after: 1 week MFC to: stable/14 PR: 273652 Differential Revision: https://reviews.freebsd.org/D41598 (cherry picked from commit de12a689fad271f5a2ba7c188b0b5fb5cabf48e7) (cherry picked from commit b2618b651b28fd29e62a4e285f5be09ea30a85d4)
207 lines
5.9 KiB
ArmAsm
207 lines
5.9 KiB
ArmAsm
/*-
|
|
* Copyright (c) 2023 The FreeBSD Foundation
|
|
*
|
|
* This software was developed by Robert Clausecker <fuz@FreeBSD.org>
|
|
* under sponsorship from the FreeBSD Foundation.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE
|
|
*/
|
|
|
|
#include <machine/asm.h>
|
|
|
|
#include "amd64_archlevel.h"
|
|
|
|
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
|
|
|
|
.weak memchr
|
|
.set memchr, __memchr
|
|
ARCHFUNCS(__memchr)
|
|
ARCHFUNC(__memchr, scalar)
|
|
ARCHFUNC(__memchr, baseline)
|
|
ENDARCHFUNCS(__memchr)
|
|
|
|
ARCHENTRY(__memchr, scalar)
|
|
test %rdx, %rdx # empty input?
|
|
je .Lnomatch
|
|
|
|
lea (, %rdi, 8), %ecx
|
|
mov $-1, %rax
|
|
add %rdi, %rdx # pointer to end of buffer or to end of
|
|
cmovc %rax, %rdx # address space (whichever comes first)
|
|
and $~7, %rdi # align to 8 bytes
|
|
mov (%rdi), %rax # load first word
|
|
movzbl %sil, %esi # clear stray high bits
|
|
movabs $0x0101010101010101, %r8
|
|
imul %r8, %rsi # replicate char 8 times
|
|
|
|
/* compute head and tail masks */
|
|
mov %r8, %r10
|
|
movabs $0x8080808080808080, %r9
|
|
shl %cl, %r10 # 0x01 where string head is
|
|
lea (, %rdx, 8), %ecx
|
|
xor %r8, %r10 # 0x01 where it is not
|
|
neg %r8 # negate 01..01 so we can use lea
|
|
mov %r9, %r11
|
|
xor %rsi, %rax # str ^ c (0x00 where str[i] == c)
|
|
neg %ecx
|
|
or %r10, %rax # except before the string
|
|
shr %cl, %r11 # 0x80 where string tail is
|
|
|
|
add $8, %rdi # advance to next 8 bytes
|
|
cmp %rdx, %rdi # end of buffer reached during head?
|
|
jae .Ltail # and go to tail-processing code
|
|
|
|
/* main loop, unrolled twice */
|
|
ALIGN_TEXT
|
|
0: lea (%rax, %r8, 1), %rcx # (str ^ c) - 0x01..01
|
|
not %rax # ~(str ^ c)
|
|
and %r9, %rax # ((str^c) - 0x01..01) & ~(str^c)
|
|
and %rcx, %rax # not including junk bytes
|
|
jnz .Lmatch
|
|
|
|
mov (%rdi), %rax
|
|
add $8, %rdi
|
|
xor %rsi, %rax # str ^ c
|
|
cmp %rdx, %rdi
|
|
jae .Ltail
|
|
|
|
lea (%rax, %r8, 1), %rcx # (str ^ c) - 0x01..01
|
|
not %rax # ~(str ^ c)
|
|
and %r9, %rax # ((str^c) - 0x01..01) & ~(str^c)
|
|
and %rcx, %rax # not including junk bytes
|
|
jnz .Lmatch
|
|
|
|
mov (%rdi), %rax
|
|
add $8, %rdi
|
|
xor %rsi, %rax # str ^ c
|
|
cmp %rdx, %rdi
|
|
jb 0b
|
|
|
|
.Ltail: lea (%rax, %r8, 1), %rcx # (str ^ c) - 0x01..01
|
|
not %rax # ~(str ^ c)
|
|
and %r11, %rax # ((str^c) - 0x01..01) & ~(str^c)
|
|
and %rcx, %rax # not including junk bytes or bytes past buffer
|
|
jz .Lnomatch
|
|
|
|
.Lmatch:
|
|
tzcnt %rax, %rax # first match
|
|
shr $3, %eax # scale from bit to byte index
|
|
lea -8(%rdi, %rax), %rax # pointer to found c
|
|
ret
|
|
|
|
/* no match found */
|
|
.Lnomatch:
|
|
xor %eax, %eax # return null pointer
|
|
ret
|
|
ARCHEND(__memchr, scalar)
|
|
|
|
ARCHENTRY(__memchr, baseline)
|
|
test %rdx, %rdx # empty input?
|
|
je .Lnomatchb
|
|
|
|
movd %esi, %xmm2
|
|
mov %edi, %ecx
|
|
mov $-1, %r9
|
|
add %rdi, %rdx # pointer to end of buffer or to end of
|
|
cmovc %r9, %rdx # address space (whichever comes first)
|
|
and $~0x1f, %rdi # align to 32 bytes
|
|
movdqa (%rdi), %xmm0 # load first 32 bytes
|
|
movdqa 16(%rdi), %xmm1
|
|
|
|
punpcklbw %xmm2, %xmm2 # c -> cc
|
|
|
|
shl %cl, %r9d # mask with zeroes before the string
|
|
|
|
punpcklwd %xmm2, %xmm2 # cc -> cccc
|
|
|
|
mov $-1, %r8d
|
|
xor %ecx, %ecx
|
|
sub %edx, %ecx # edx = -ecx
|
|
shr %cl, %r8d # bytes in tail that are part of the buffer
|
|
|
|
pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc
|
|
|
|
add $32, %rdi # advance to next 32 bytes
|
|
mov $-1, %eax
|
|
cmp %rdx, %rdi # end of buffer reached during head?
|
|
cmovae %r8d, %eax # if yes, do combined head/tail processing
|
|
and %r9d, %eax # mask of bytes in head part of string
|
|
|
|
/* process head */
|
|
pcmpeqb %xmm2, %xmm1
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm1, %esi
|
|
pmovmskb %xmm0, %ecx
|
|
shl $16, %esi
|
|
or %esi, %ecx # locations of matches
|
|
and %ecx, %eax # any match inside buffer?
|
|
jnz .Lprecisematchb
|
|
|
|
cmp %rdx, %rdi # did the buffer end here?
|
|
jae .Lnomatchb # if yes we are done
|
|
|
|
/* main loop */
|
|
ALIGN_TEXT
|
|
0: movdqa (%rdi), %xmm0 # load next string chunk
|
|
movdqa 16(%rdi), %xmm1
|
|
add $32, %rdi
|
|
cmp %rdx, %rdi # ready for main loop?
|
|
jae .Ltailb
|
|
|
|
pcmpeqb %xmm2, %xmm0
|
|
pcmpeqb %xmm2, %xmm1
|
|
por %xmm1, %xmm0 # match in either half?
|
|
pmovmskb %xmm0, %eax
|
|
test %eax, %eax
|
|
jz 0b
|
|
|
|
.Lmatchb:
|
|
pcmpeqb -32(%rdi), %xmm2 # redo comparison of first 16 bytes
|
|
pmovmskb %xmm1, %ecx
|
|
pmovmskb %xmm2, %eax
|
|
shl $16, %ecx
|
|
or %ecx, %eax # location of matches
|
|
|
|
.Lprecisematchb:
|
|
tzcnt %eax, %eax # find location of match
|
|
lea -32(%rdi, %rax, 1), %rax # point to matching byte
|
|
ret
|
|
|
|
.Ltailb:
|
|
pcmpeqb %xmm2, %xmm1
|
|
pcmpeqb %xmm2, %xmm0
|
|
pmovmskb %xmm1, %edx
|
|
pmovmskb %xmm0, %eax
|
|
shl $16, %edx
|
|
or %edx, %eax # location of matches
|
|
and %r8d, %eax # mask out matches beyond buffer
|
|
bsf %eax, %edx # location of match
|
|
lea -32(%rdi, %rdx, 1), %rdx # pointer to match (if any)
|
|
cmovnz %rdx, %rax # point to match if present,
|
|
ret # else null pointer
|
|
|
|
.Lnomatchb:
|
|
xor %eax, %eax # return null pointer
|
|
ret
|
|
ARCHEND(__memchr, baseline)
|
|
|
|
.section .note.GNU-stack,"",%progbits
|