mirror of
https://github.com/opnsense/src.git
synced 2026-05-25 02:35:01 -04:00
amd64: import asm strlen into libc
Reviewed by: kib
Differential Revision: https://reviews.freebsd.org/D28845
(cherry picked from commit 7f06b217c5)
This commit is contained in:
parent
9535440569
commit
c16fc9eae3
2 changed files with 82 additions and 0 deletions
|
|
@ -8,4 +8,5 @@ MDSRCS+= \
|
|||
memset.S \
|
||||
strcat.S \
|
||||
strcmp.S \
|
||||
strlen.S \
|
||||
stpcpy.S
|
||||
|
|
|
|||
81
lib/libc/amd64/string/strlen.S
Normal file
81
lib/libc/amd64/string/strlen.S
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Written by Mateusz Guzik <mjg@freebsd.org>
|
||||
* Public domain.
|
||||
*/
|
||||
|
||||
#include <machine/asm.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
/*
|
||||
* Note: this routine was written with kernel use in mind (read: no simd),
|
||||
* it is only present in userspace as a temporary measure until something
|
||||
* better gets imported.
|
||||
*/
|
||||
|
||||
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
|
||||
|
||||
/*
|
||||
* strlen(string)
|
||||
* %rdi
|
||||
*
|
||||
* Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
|
||||
*
|
||||
* 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
|
||||
* with leaq.
|
||||
*
|
||||
* For a description see either:
|
||||
* - "Hacker's Delight" by Henry S. Warren, Jr.
|
||||
* - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
|
||||
* by Agner Fog
|
||||
*
|
||||
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
|
||||
*/
|
||||
ENTRY(strlen)
|
||||
movabsq $0xfefefefefefefeff,%r8
|
||||
movabsq $0x8080808080808080,%r9
|
||||
|
||||
movq %rdi,%r10
|
||||
movq %rdi,%rcx
|
||||
testb $7,%dil
|
||||
jz 2f
|
||||
|
||||
/*
|
||||
* Handle misaligned reads: align to 8 and fill
|
||||
* the spurious bytes.
|
||||
*/
|
||||
andq $~7,%rdi
|
||||
movq (%rdi),%r11
|
||||
shlq $3,%rcx
|
||||
movq $-1,%rdx
|
||||
shlq %cl,%rdx
|
||||
notq %rdx
|
||||
orq %rdx,%r11
|
||||
|
||||
leaq (%r11,%r8),%rcx
|
||||
notq %r11
|
||||
andq %r11,%rcx
|
||||
andq %r9,%rcx
|
||||
jnz 3f
|
||||
|
||||
/*
|
||||
* Main loop.
|
||||
*/
|
||||
ALIGN_TEXT
|
||||
1:
|
||||
leaq 8(%rdi),%rdi
|
||||
2:
|
||||
movq (%rdi),%r11
|
||||
leaq (%r11,%r8),%rcx
|
||||
notq %r11
|
||||
andq %r11,%rcx
|
||||
andq %r9,%rcx
|
||||
jz 1b
|
||||
3:
|
||||
bsfq %rcx,%rcx
|
||||
shrq $3,%rcx
|
||||
leaq (%rcx,%rdi),%rax
|
||||
subq %r10,%rax
|
||||
ret
|
||||
END(strlen)
|
||||
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
Loading…
Reference in a new issue