opnsense-src/lib/libc/amd64/string/strlen.S
Warner Losh 1d386b48a5 Remove $FreeBSD$: one-line .c pattern
Remove /^[\s*]*__FBSDID\("\$FreeBSD\$"\);?\s*\n/
2023-08-16 11:54:42 -06:00

131 lines
2.9 KiB
ArmAsm

/*-
* Written by Mateusz Guzik <mjg@freebsd.org>
* Copyright (c) 2023 The FreeBSD Foundation
*
* Portions of this software were developed by Robert Clausecker
* <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
*
* Public domain.
*/
#include <machine/asm.h>
#include "amd64_archlevel.h"
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
* better gets imported.
*/
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
ARCHFUNCS(strlen)
ARCHFUNC(strlen, scalar)
ARCHFUNC(strlen, baseline)
ENDARCHFUNCS(strlen)
/*
* strlen(string)
* %rdi
*
* Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
*
* 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
* with leaq.
*
* For a description see either:
* - "Hacker's Delight" by Henry S. Warren, Jr.
* - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
* by Agner Fog
*
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
*/
ARCHENTRY(strlen, scalar)
movabsq $0xfefefefefefefeff,%r8
movabsq $0x8080808080808080,%r9
movq %rdi,%r10
movq %rdi,%rcx
testb $7,%dil
jz 2f
/*
* Handle misaligned reads: align to 8 and fill
* the spurious bytes.
*/
andq $~7,%rdi
movq (%rdi),%r11
shlq $3,%rcx
movq $-1,%rdx
shlq %cl,%rdx
notq %rdx
orq %rdx,%r11
leaq (%r11,%r8),%rcx
notq %r11
andq %r11,%rcx
andq %r9,%rcx
jnz 3f
/*
* Main loop.
*/
ALIGN_TEXT
1:
leaq 8(%rdi),%rdi
2:
movq (%rdi),%r11
leaq (%r11,%r8),%rcx
notq %r11
andq %r11,%rcx
andq %r9,%rcx
jz 1b
3:
bsfq %rcx,%rcx
shrq $3,%rcx
leaq (%rcx,%rdi),%rax
subq %r10,%rax
ret
ARCHEND(strlen, scalar)
ARCHENTRY(strlen, baseline)
mov %rdi, %rcx
pxor %xmm1, %xmm1
and $~0xf, %rdi # align string
pcmpeqb (%rdi), %xmm1 # compare head (with junk before string)
mov %rcx, %rsi # string pointer copy for later
and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment
pmovmskb %xmm1, %eax
add $32, %rdi # advance to next iteration
shr %cl, %eax # clear out matches in junk bytes
test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible)
jnz 2f
ALIGN_TEXT
1: pxor %xmm1, %xmm1
pcmpeqb -16(%rdi), %xmm1 # find NUL bytes
pmovmskb %xmm1, %eax
test %eax, %eax # were any NUL bytes present?
jnz 3f
/* the same unrolled once more */
pxor %xmm1, %xmm1
pcmpeqb (%rdi), %xmm1
pmovmskb %xmm1, %eax
add $32, %rdi # advance to next iteration
test %eax, %eax
jz 1b
/* match found in loop body */
sub $16, %rdi # undo half the advancement
3: tzcnt %eax, %eax # find the first NUL byte
sub %rsi, %rdi # string length until beginning of (%rdi)
lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length
ret
/* match found in head */
2: tzcnt %eax, %eax # compute string length
ret
ARCHEND(strlen, baseline)
.section .note.GNU-stack,"",%progbits