redis/src/crc64.c
Moti Cohen cf83803880
CRC64 perf improvements (#13638)
Improve the performance of crc64 for large batches by processing large
number
of bytes in parallel and combining the results.

---------
Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Josiah Carlson <josiah.carlson@gmail.com>
2024-11-12 09:21:22 +02:00

368 lines
14 KiB
C

/* Copyright (c) 2014, Matt Stancliff <matt@genges.com>
* Copyright (c) 2020, Amazon Web Services
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. */
#include <stdlib.h>
#include "crc64.h"
#include "crcspeed.h"
#include "redisassert.h"
#include "testhelp.h"
static uint64_t crc64_table[8][256] = {{0}};
#define POLY UINT64_C(0xad93d23594c935a9)
/******************** BEGIN GENERATED PYCRC FUNCTIONS ********************/
/**
* Generated on Sun Dec 21 14:14:07 2014,
* by pycrc v0.8.2, https://www.tty1.net/pycrc/
*
* LICENSE ON GENERATED CODE:
* ==========================
* As of version 0.6, pycrc is released under the terms of the MIT licence.
* The code generated by pycrc is not considered a substantial portion of the
* software, therefore the author of pycrc will not claim any copyright on
* the generated code.
* ==========================
*
* CRC configuration:
* Width = 64
* Poly = 0xad93d23594c935a9
* XorIn = 0xffffffffffffffff
* ReflectIn = True
* XorOut = 0x0000000000000000
* ReflectOut = True
* Algorithm = bit-by-bit-fast
*
* Modifications after generation (by matt):
* - included finalize step in-line with update for single-call generation
* - re-worked some inner variable architectures
* - adjusted function parameters to match expected prototypes.
*****************************************************************************/
/**
* Reflect all bits of a \a data word of \a data_len bytes.
*
* \param data The data word to be reflected.
* \param data_len The width of \a data expressed in number of bits.
* \return The reflected data.
*****************************************************************************/
static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
/* only ever called for data_len == 64 in this codebase
*
* Borrowed from bit twiddling hacks, original in the public domain.
* https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
* Extended to 64 bits, and added byteswap for final 3 steps.
* 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C)
*/
assert(data_len <= 64);
/* swap odd and even bits */
data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1);
/* swap consecutive pairs */
data = ((data >> 2) & 0x3333333333333333ULL) | ((data & 0x3333333333333333ULL) << 2);
/* swap nibbles ... */
data = ((data >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((data & 0x0F0F0F0F0F0F0F0FULL) << 4);
#if defined(__GNUC__) || defined(__clang__)
data = __builtin_bswap64(data);
#else
/* swap bytes */
data = ((data >> 8) & 0x00FF00FF00FF00FFULL) | ((data & 0x00FF00FF00FF00FFULL) << 8);
/* swap 2-byte long pairs */
data = ( data >> 16 & 0xFFFF0000FFFFULL) | ((data & 0xFFFF0000FFFFULL) << 16);
/* swap 4-byte quads */
data = ( data >> 32 & 0xFFFFFFFFULL) | ((data & 0xFFFFFFFFULL) << 32);
#endif
/* adjust for non-64-bit reversals */
return data >> (64 - data_len);
}
/**
* Update the crc value with new data.
*
* \param crc The current crc value.
* \param data Pointer to a buffer of \a data_len bytes.
* \param data_len Number of bytes in the \a data buffer.
* \return The updated crc value.
******************************************************************************/
uint64_t _crc64(uint_fast64_t crc, const void *in_data, const uint64_t len) {
const uint8_t *data = in_data;
unsigned long long bit;
for (uint64_t offset = 0; offset < len; offset++) {
uint8_t c = data[offset];
for (uint_fast8_t i = 0x01; i & 0xff; i <<= 1) {
bit = crc & 0x8000000000000000;
if (c & i) {
bit = !bit;
}
crc <<= 1;
if (bit) {
crc ^= POLY;
}
}
crc &= 0xffffffffffffffff;
}
crc = crc & 0xffffffffffffffff;
return crc_reflect(crc, 64) ^ 0x0000000000000000;
}
/******************** END GENERATED PYCRC FUNCTIONS ********************/
/* Initializes the 16KB lookup tables. */
void crc64_init(void) {
crcspeed64native_init(_crc64, crc64_table);
}
/* Compute crc64 */
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
return crcspeed64native(crc64_table, crc, (void *) s, l);
}
/* Test main */
#ifdef REDIS_TEST
#include <stdio.h>
static void genBenchmarkRandomData(char *data, int count);
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv);
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv);
long long _ustime(void);
#include <inttypes.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include "zmalloc.h"
#include "crccombine.h"
long long _ustime(void) {
struct timeval tv;
long long ust;
gettimeofday(&tv, NULL);
ust = ((long long)tv.tv_sec)*1000000;
ust += tv.tv_usec;
return ust;
}
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv) {
uint64_t min = size, hash;
long long original_start = _ustime(), original_end;
for (long long i=passes; i > 0; i--) {
hash = crc64(0, data, size);
}
original_end = _ustime();
min = (original_end - original_start) * 1000 / passes;
/* approximate nanoseconds without nstime */
if (csv) {
printf("%s,%" PRIu64 ",%" PRIu64 ",%d\n",
name, size, (1000 * size) / min, hash == check);
} else {
printf("test size=%" PRIu64 " algorithm=%s %" PRIu64 " M/sec matches=%d\n",
size, name, (1000 * size) / min, hash == check);
}
return hash != check;
}
const uint64_t BENCH_RPOLY = UINT64_C(0x95ac9329ac4bc9b5);
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv) {
uint64_t min = size, start = expect, thash = expect ^ (expect >> 17);
long long original_start = _ustime(), original_end;
for (int i=0; i < 1000; i++) {
crc64_combine(thash, start, size, BENCH_RPOLY, 64);
}
original_end = _ustime();
/* ran 1000 times, want ns per, counted us per 1000 ... */
min = original_end - original_start;
if (csv) {
printf("%s,%" PRIu64 ",%" PRIu64 "\n", label, size, min);
} else {
printf("%s size=%" PRIu64 " in %" PRIu64 " nsec\n", label, size, min);
}
}
static void genBenchmarkRandomData(char *data, int count) {
static uint32_t state = 1234;
int i = 0;
while (count--) {
state = (state*1103515245+12345);
data[i++] = '0'+((state>>16)&63);
}
}
#define UNUSED(x) (void)(x)
int crc64Test(int argc, char *argv[], int flags) {
uint64_t crc64_test_size = 0;
int i, lastarg, csv = 0, loop = 0, combine = 0, testAll = 0;
again:
if ((argc>=4) && (!strcmp(argv[3],"custom"))) {
for (i = 4; i < argc; i++) {
lastarg = (i == (argc - 1));
if (!strcmp(argv[i], "--help")) {
goto usage;
} else if (!strcmp(argv[i], "--csv")) {
csv = 1;
} else if (!strcmp(argv[i], "-l")) {
loop = 1;
} else if (!strcmp(argv[i], "--crc")) {
if (lastarg) goto invalid;
crc64_test_size = atoll(argv[++i]);
} else if (!strcmp(argv[i], "--combine")) {
combine = 1;
} else {
invalid:
printf("Invalid option \"%s\" or option argument missing\n\n",
argv[i]);
usage:
printf(
"Usage: crc64 [OPTIONS]\n\n"
" --csv Output in CSV format\n"
" -l Loop. Run the tests forever\n"
" --crc <bytes> Benchmark crc64 faster options, using a buffer this big, and quit when done.\n"
" --combine Benchmark crc64 combine value ranges and timings.\n"
);
return 1;
}
}
} else {
crc64_test_size = 50000;
testAll = 1;
if (flags & REDIS_TEST_ACCURATE) crc64_test_size = 5000000;
}
if ((crc64_test_size == 0 && combine == 0) || testAll) {
crc64_init();
printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
(uint64_t)_crc64(0, "123456789", 9));
printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
(uint64_t)crc64(0, (unsigned char*)"123456789", 9));
char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed "
"do eiusmod tempor incididunt ut labore et dolore magna "
"aliqua. Ut enim ad minim veniam, quis nostrud exercitation "
"ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis "
"aute irure dolor in reprehenderit in voluptate velit esse "
"cillum dolore eu fugiat nulla pariatur. Excepteur sint "
"occaecat cupidatat non proident, sunt in culpa qui officia "
"deserunt mollit anim id est laborum.";
printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n",
(uint64_t)_crc64(0, li, sizeof(li)));
printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n",
(uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));
if (!testAll) return 0;
}
int init_this_loop = 1;
long long init_start, init_end;
do {
unsigned char* data = NULL;
uint64_t passes = 0;
if (crc64_test_size) {
data = zmalloc(crc64_test_size);
genBenchmarkRandomData((char*)data, crc64_test_size);
/* We want to hash about 1 gig of data in total, looped, to get a good
* idea of our performance.
*/
passes = (UINT64_C(0x100000000) / crc64_test_size);
passes = passes >= 2 ? passes : 2;
passes = passes <= 1000 ? passes : 1000;
}
crc64_init();
/* warm up the cache */
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
uint64_t expect = crc64(0, data, crc64_test_size);
if ((!combine || testAll) && crc64_test_size) {
if (csv && init_this_loop) printf("algorithm,buffer,performance,crc64_matches\n");
/* get the single-character version for single-byte Redis behavior */
set_crc64_cutoffs(0, crc64_test_size+1);
assert(!bench_crc64(data, crc64_test_size, passes, expect, "crc_1byte", csv));
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
/* run with 8-byte "single" path, crcfaster */
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcspeed", csv)));
/* run with dual 8-byte paths */
set_crc64_cutoffs(1, crc64_test_size+1);
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcdual", csv)));
/* run with tri 8-byte paths */
set_crc64_cutoffs(1, 1);
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crctri", csv)));
/* Be free memory region, be free. */
zfree(data);
data = NULL;
}
uint64_t INIT_SIZE = UINT64_C(0xffffffffffffffff);
if (combine || testAll) {
if (init_this_loop) {
init_start = _ustime();
crc64_combine(
UINT64_C(0xdeadbeefdeadbeef),
UINT64_C(0xfeebdaedfeebdaed),
INIT_SIZE,
BENCH_RPOLY, 64);
init_end = _ustime();
init_end -= init_start;
init_end *= 1000;
if (csv) {
printf("operation,size,nanoseconds\n");
printf("init_64,%" PRIu64 ",%" PRIu64 "\n", INIT_SIZE, (uint64_t)init_end);
} else {
printf("init_64 size=%" PRIu64 " in %" PRIu64 " nsec\n", INIT_SIZE, (uint64_t)init_end);
}
/* use the hash itself as the size (unpredictable) */
bench_combine("hash_as_size_combine", crc64_test_size, expect, csv);
/* let's do something big (predictable, so fast) */
bench_combine("largest_combine", INIT_SIZE, expect, csv);
}
bench_combine("combine", crc64_test_size, expect, csv);
}
init_this_loop = 0;
/* step down by ~1.641 for a range of test sizes */
crc64_test_size -= (crc64_test_size >> 2) + (crc64_test_size >> 3) + (crc64_test_size >> 6);
} while (crc64_test_size > 3);
if (loop) goto again;
return 0;
}
#endif