opnsense-src/lib/libc/amd64/string/memcmp.S
Robert Clausecker 0666c6fc03 lib/libc/amd64/string/memcmp.S: harden against phony buffer lengths
When memcmp(a, b, len) (or equally, bcmp) is called with a phony length
such that a + len < a, the code would malfunction and not compare the
two buffers correctly.  While such arguments are illegal (buffers do not
wrap around the end of the address space), it is neverthless conceivable
that people try things like memcmp(a, b, SIZE_MAX) to compare a and b
until the first mismatch, in the knowledge that such a mismatch exists,
expecting memcmp() to stop comparing somewhere around the mismatch.
While memcmp() is usually written to confirm to this assumption, no
version of ISO/IEC 9899 guarantees this behaviour (in contrast to
memchr() for which it is).

Neverthless it appears sensible to at least not grossly misbehave on
phony lengths.  This change hardens memcmp() against this case by
comparing at least until the end of the address space if a + len
overflows a 64 bit integer.

Sponsored by:	The FreeBSD Foundation
Approved by:	mjg (blanket, via IRC)
See also:	b2618b651b28fd29e62a4e285f5be09ea30a85d4
MFC after:	1 week

(cherry picked from commit 953b93cf24d8871c62416c9bcfca935f1f1853b6)
2023-09-23 14:21:42 -04:00

420 lines
9.7 KiB
ArmAsm

/*-
* Copyright (c) 2018, 2023 The FreeBSD Foundation
*
* This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Portions of this software were developed by Robert Clausecker
* <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <machine/asm.h>
#include <machine/param.h>
#include "amd64_archlevel.h"
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
* better gets imported.
*/
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
#ifdef BCMP
#define memcmp bcmp
#endif
ARCHFUNCS(memcmp)
ARCHFUNC(memcmp, scalar)
ARCHFUNC(memcmp, baseline)
ENDARCHFUNCS(memcmp)
ARCHENTRY(memcmp, scalar)
xorl %eax,%eax
10:
cmpq $16,%rdx
ja 101632f
cmpb $8,%dl
jg 100816f
cmpb $4,%dl
jg 100408f
cmpb $2,%dl
jge 100204f
cmpb $1,%dl
jl 100000f
movzbl (%rdi),%eax
movzbl (%rsi),%r8d
subl %r8d,%eax
100000:
ret
ALIGN_TEXT
100816:
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
jne 80f
movq -8(%rdi,%rdx),%r8
movq -8(%rsi,%rdx),%r9
cmpq %r8,%r9
jne 10081608f
ret
ALIGN_TEXT
100408:
movl (%rdi),%r8d
movl (%rsi),%r9d
cmpl %r8d,%r9d
jne 80f
movl -4(%rdi,%rdx),%r8d
movl -4(%rsi,%rdx),%r9d
cmpl %r8d,%r9d
jne 10040804f
ret
ALIGN_TEXT
100204:
movzwl (%rdi),%r8d
movzwl (%rsi),%r9d
cmpl %r8d,%r9d
jne 1f
movzwl -2(%rdi,%rdx),%r8d
movzwl -2(%rsi,%rdx),%r9d
cmpl %r8d,%r9d
jne 1f
ret
ALIGN_TEXT
101632:
cmpq $32,%rdx
ja 103200f
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
jne 80f
movq 8(%rdi),%r8
movq 8(%rsi),%r9
cmpq %r8,%r9
jne 10163208f
movq -16(%rdi,%rdx),%r8
movq -16(%rsi,%rdx),%r9
cmpq %r8,%r9
jne 10163216f
movq -8(%rdi,%rdx),%r8
movq -8(%rsi,%rdx),%r9
cmpq %r8,%r9
jne 10163224f
ret
ALIGN_TEXT
103200:
movq (%rdi),%r8
movq 8(%rdi),%r9
subq (%rsi),%r8
subq 8(%rsi),%r9
orq %r8,%r9
jnz 10320000f
movq 16(%rdi),%r8
movq 24(%rdi),%r9
subq 16(%rsi),%r8
subq 24(%rsi),%r9
orq %r8,%r9
jnz 10320016f
leaq 32(%rdi),%rdi
leaq 32(%rsi),%rsi
subq $32,%rdx
cmpq $32,%rdx
jae 103200b
cmpb $0,%dl
jne 10b
ret
/*
* Mismatch was found.
*/
#ifdef BCMP
ALIGN_TEXT
10320016:
10320000:
10081608:
10163224:
10163216:
10163208:
10040804:
80:
1:
leal 1(%eax),%eax
ret
#else
/*
* We need to compute the difference between strings.
* Start with narrowing the range down (16 -> 8 -> 4 bytes).
*/
ALIGN_TEXT
10320016:
leaq 16(%rdi),%rdi
leaq 16(%rsi),%rsi
10320000:
movq (%rdi),%r8
movq (%rsi),%r9
cmpq %r8,%r9
jne 80f
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
ALIGN_TEXT
10081608:
10163224:
leaq -8(%rdi,%rdx),%rdi
leaq -8(%rsi,%rdx),%rsi
jmp 80f
ALIGN_TEXT
10163216:
leaq -16(%rdi,%rdx),%rdi
leaq -16(%rsi,%rdx),%rsi
jmp 80f
ALIGN_TEXT
10163208:
leaq 8(%rdi),%rdi
leaq 8(%rsi),%rsi
jmp 80f
ALIGN_TEXT
10040804:
leaq -4(%rdi,%rdx),%rdi
leaq -4(%rsi,%rdx),%rsi
jmp 1f
ALIGN_TEXT
80:
movl (%rdi),%r8d
movl (%rsi),%r9d
cmpl %r8d,%r9d
jne 1f
leaq 4(%rdi),%rdi
leaq 4(%rsi),%rsi
/*
* We have up to 4 bytes to inspect.
*/
1:
movzbl (%rdi),%eax
movzbl (%rsi),%r8d
cmpb %r8b,%al
jne 2f
movzbl 1(%rdi),%eax
movzbl 1(%rsi),%r8d
cmpb %r8b,%al
jne 2f
movzbl 2(%rdi),%eax
movzbl 2(%rsi),%r8d
cmpb %r8b,%al
jne 2f
movzbl 3(%rdi),%eax
movzbl 3(%rsi),%r8d
2:
subl %r8d,%eax
ret
#endif
ARCHEND(memcmp, scalar)
ARCHENTRY(memcmp, baseline)
cmp $32, %rdx # enough to permit use of the long kernel?
ja .Llong
test %rdx, %rdx # zero bytes buffer?
je .L0
/*
* Compare strings of 1--32 bytes. We want to do this by
* loading into two xmm registers and then comparing. To avoid
* crossing into unmapped pages, we either load 32 bytes from
* the start of the buffer or 32 bytes before its end, depending
* on whether there is a page boundary between the overread area
* or not.
*/
/* check for page boundaries overreads */
lea 31(%rdi), %eax # end of overread
lea 31(%rsi), %r8d
lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
lea -1(%rsi, %rdx, 1), %r9d
xor %ecx, %eax
xor %r9d, %r8d
test $PAGE_SIZE, %eax # are they on different pages?
jz 0f
/* fix up rdi */
movdqu -32(%rdi, %rdx, 1), %xmm0
movdqu -16(%rdi, %rdx, 1), %xmm1
lea -8(%rsp), %rdi # end of replacement buffer
sub %rdx, %rdi # start of replacement buffer
movdqa %xmm0, -40(%rsp) # copy to replacement buffer
movdqa %xmm1, -24(%rsp)
0: test $PAGE_SIZE, %r8d
jz 0f
/* fix up rsi */
movdqu -32(%rsi, %rdx, 1), %xmm0
movdqu -16(%rsi, %rdx, 1), %xmm1
lea -40(%rsp), %rsi # end of replacement buffer
sub %rdx, %rsi # start of replacement buffer
movdqa %xmm0, -72(%rsp) # copy to replacement buffer
movdqa %xmm1, -56(%rsp)
/* load data and compare properly */
0: movdqu 16(%rdi), %xmm1
movdqu 16(%rsi), %xmm3
movdqu (%rdi), %xmm0
movdqu (%rsi), %xmm2
mov %edx, %ecx
mov $-1, %edx
shl %cl, %rdx # ones where the buffer is not
pcmpeqb %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm1, %ecx
pmovmskb %xmm0, %eax
shl $16, %ecx
or %ecx, %eax # ones where the buffers match
or %edx, %eax # including where the buffer is not
not %eax # ones where there is a mismatch
#ifndef BCMP
bsf %eax, %edx # location of the first mismatch
cmovz %eax, %edx # including if there is no mismatch
movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
movzbl (%rsi, %rdx, 1), %edx
sub %edx, %eax
#endif
ret
/* empty input */
.L0: xor %eax, %eax
ret
/* compare 33+ bytes */
ALIGN_TEXT
.Llong: movdqu (%rdi), %xmm0 # load head
movdqu (%rsi), %xmm2
mov %rdi, %rcx
sub %rdi, %rsi # express rsi as distance from rdi
and $~0xf, %rdi # align rdi to 16 bytes
movdqu 16(%rsi, %rdi, 1), %xmm1
pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
add %rcx, %rdx # pointer to last byte in buffer
jc .Loverflow # did this overflow?
0: pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %eax
xor $0xffff, %eax # any mismatch?
jne .Lmismatch_head
add $64, %rdi # advance to next iteration
jmp 1f # and get going with the loop
/*
* If we got here, a buffer length was passed to memcmp(a, b, len)
* such that a + len < a. While this sort of usage is illegal,
* it is plausible that a caller tries to do something like
* memcmp(a, b, SIZE_MAX) if a and b are known to differ, intending
* for memcmp() to stop comparing at the first mismatch. This
* behaviour is not guaranteed by any version of ISO/IEC 9899,
* but usually works out in practice. Let's try to make this
* case work by comparing until the end of the address space.
*/
.Loverflow:
mov $-1, %rdx # compare until the end of memory
jmp 0b
/* process buffer 32 bytes at a time */
ALIGN_TEXT
0: movdqu -32(%rsi, %rdi, 1), %xmm0
movdqu -16(%rsi, %rdi, 1), %xmm1
pcmpeqb -32(%rdi), %xmm0
pcmpeqb -16(%rdi), %xmm1
add $32, %rdi # advance to next iteration
1: pand %xmm0, %xmm1 # 0xff where both halves matched
pmovmskb %xmm1, %eax
cmp $0xffff, %eax # all bytes matched?
jne .Lmismatch
cmp %rdx, %rdi # end of buffer reached?
jb 0b
/* less than 32 bytes left to compare */
movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
movdqu -16(%rdx, %rsi, 1), %xmm3
movdqu -32(%rdx), %xmm0
movdqu -32(%rdx, %rsi, 1), %xmm2
pcmpeqb %xmm3, %xmm1
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm1, %ecx
pmovmskb %xmm0, %eax
shl $16, %ecx
or %ecx, %eax # ones where the buffers match
not %eax # ones where there is a mismatch
#ifndef BCMP
bsf %eax, %ecx # location of the first mismatch
cmovz %eax, %ecx # including if there is no mismatch
add %rcx, %rdx # pointer to potential mismatch
movzbl -32(%rdx), %eax # mismatching bytes
movzbl -32(%rdx, %rsi, 1), %edx
sub %edx, %eax
#endif
ret
#ifdef BCMP
.Lmismatch:
mov $1, %eax
.Lmismatch_head:
ret
#else /* memcmp */
.Lmismatch_head:
tzcnt %eax, %eax # location of mismatch
add %rax, %rcx # pointer to mismatch
movzbl (%rcx), %eax # mismatching bytes
movzbl (%rcx, %rsi, 1), %ecx
sub %ecx, %eax
ret
.Lmismatch:
movdqu -48(%rsi, %rdi, 1), %xmm1
pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
pmovmskb %xmm0, %eax # mismatches in first 16 bytes
pmovmskb %xmm1, %edx # mismatches in second 16 bytes
shl $16, %edx
or %edx, %eax # mismatches in both
not %eax # matches in both
tzcnt %eax, %eax # location of mismatch
add %rax, %rdi # pointer to mismatch
movzbl -64(%rdi), %eax # mismatching bytes
movzbl -64(%rdi, %rsi, 1), %ecx
sub %ecx, %eax
ret
#endif
ARCHEND(memcmp, baseline)
.section .note.GNU-stack,"",%progbits