mirror of
https://github.com/redis/redis.git
synced 2026-02-03 20:39:54 -05:00
Improve the performance of crc64 for large batches by processing large number of bytes in parallel and combining the results. --------- Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech> Co-authored-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Josiah Carlson <josiah.carlson@gmail.com>
368 lines
14 KiB
C
368 lines
14 KiB
C
/* Copyright (c) 2014, Matt Stancliff <matt@genges.com>
|
|
* Copyright (c) 2020, Amazon Web Services
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE. */
|
|
|
|
#include <stdlib.h>
|
|
#include "crc64.h"
|
|
#include "crcspeed.h"
|
|
#include "redisassert.h"
|
|
#include "testhelp.h"
|
|
static uint64_t crc64_table[8][256] = {{0}};
|
|
|
|
#define POLY UINT64_C(0xad93d23594c935a9)
|
|
/******************** BEGIN GENERATED PYCRC FUNCTIONS ********************/
|
|
/**
|
|
* Generated on Sun Dec 21 14:14:07 2014,
|
|
* by pycrc v0.8.2, https://www.tty1.net/pycrc/
|
|
*
|
|
* LICENSE ON GENERATED CODE:
|
|
* ==========================
|
|
* As of version 0.6, pycrc is released under the terms of the MIT licence.
|
|
* The code generated by pycrc is not considered a substantial portion of the
|
|
* software, therefore the author of pycrc will not claim any copyright on
|
|
* the generated code.
|
|
* ==========================
|
|
*
|
|
* CRC configuration:
|
|
* Width = 64
|
|
* Poly = 0xad93d23594c935a9
|
|
* XorIn = 0xffffffffffffffff
|
|
* ReflectIn = True
|
|
* XorOut = 0x0000000000000000
|
|
* ReflectOut = True
|
|
* Algorithm = bit-by-bit-fast
|
|
*
|
|
* Modifications after generation (by matt):
|
|
* - included finalize step in-line with update for single-call generation
|
|
* - re-worked some inner variable architectures
|
|
* - adjusted function parameters to match expected prototypes.
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
* Reflect all bits of a \a data word of \a data_len bytes.
|
|
*
|
|
* \param data The data word to be reflected.
|
|
* \param data_len The width of \a data expressed in number of bits.
|
|
* \return The reflected data.
|
|
*****************************************************************************/
|
|
static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
|
|
/* only ever called for data_len == 64 in this codebase
|
|
*
|
|
* Borrowed from bit twiddling hacks, original in the public domain.
|
|
* https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
|
|
* Extended to 64 bits, and added byteswap for final 3 steps.
|
|
* 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C)
|
|
*/
|
|
|
|
assert(data_len <= 64);
|
|
/* swap odd and even bits */
|
|
data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1);
|
|
/* swap consecutive pairs */
|
|
data = ((data >> 2) & 0x3333333333333333ULL) | ((data & 0x3333333333333333ULL) << 2);
|
|
/* swap nibbles ... */
|
|
data = ((data >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((data & 0x0F0F0F0F0F0F0F0FULL) << 4);
|
|
#if defined(__GNUC__) || defined(__clang__)
|
|
data = __builtin_bswap64(data);
|
|
#else
|
|
/* swap bytes */
|
|
data = ((data >> 8) & 0x00FF00FF00FF00FFULL) | ((data & 0x00FF00FF00FF00FFULL) << 8);
|
|
/* swap 2-byte long pairs */
|
|
data = ( data >> 16 & 0xFFFF0000FFFFULL) | ((data & 0xFFFF0000FFFFULL) << 16);
|
|
/* swap 4-byte quads */
|
|
data = ( data >> 32 & 0xFFFFFFFFULL) | ((data & 0xFFFFFFFFULL) << 32);
|
|
#endif
|
|
/* adjust for non-64-bit reversals */
|
|
return data >> (64 - data_len);
|
|
}
|
|
|
|
/**
|
|
* Update the crc value with new data.
|
|
*
|
|
* \param crc The current crc value.
|
|
* \param data Pointer to a buffer of \a data_len bytes.
|
|
* \param data_len Number of bytes in the \a data buffer.
|
|
* \return The updated crc value.
|
|
******************************************************************************/
|
|
uint64_t _crc64(uint_fast64_t crc, const void *in_data, const uint64_t len) {
|
|
const uint8_t *data = in_data;
|
|
unsigned long long bit;
|
|
|
|
for (uint64_t offset = 0; offset < len; offset++) {
|
|
uint8_t c = data[offset];
|
|
for (uint_fast8_t i = 0x01; i & 0xff; i <<= 1) {
|
|
bit = crc & 0x8000000000000000;
|
|
if (c & i) {
|
|
bit = !bit;
|
|
}
|
|
|
|
crc <<= 1;
|
|
if (bit) {
|
|
crc ^= POLY;
|
|
}
|
|
}
|
|
|
|
crc &= 0xffffffffffffffff;
|
|
}
|
|
|
|
crc = crc & 0xffffffffffffffff;
|
|
return crc_reflect(crc, 64) ^ 0x0000000000000000;
|
|
}
|
|
|
|
/******************** END GENERATED PYCRC FUNCTIONS ********************/
|
|
|
|
/* Initializes the 16KB lookup tables. */
|
|
void crc64_init(void) {
|
|
crcspeed64native_init(_crc64, crc64_table);
|
|
}
|
|
|
|
/* Compute crc64 */
|
|
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
|
|
return crcspeed64native(crc64_table, crc, (void *) s, l);
|
|
}
|
|
|
|
/* Test main */
|
|
#ifdef REDIS_TEST
|
|
#include <stdio.h>
|
|
|
|
static void genBenchmarkRandomData(char *data, int count);
|
|
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv);
|
|
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv);
|
|
long long _ustime(void);
|
|
|
|
#include <inttypes.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include <unistd.h>
|
|
|
|
#include "zmalloc.h"
|
|
#include "crccombine.h"
|
|
|
|
long long _ustime(void) {
|
|
struct timeval tv;
|
|
long long ust;
|
|
|
|
gettimeofday(&tv, NULL);
|
|
ust = ((long long)tv.tv_sec)*1000000;
|
|
ust += tv.tv_usec;
|
|
return ust;
|
|
}
|
|
|
|
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv) {
|
|
uint64_t min = size, hash;
|
|
long long original_start = _ustime(), original_end;
|
|
for (long long i=passes; i > 0; i--) {
|
|
hash = crc64(0, data, size);
|
|
}
|
|
original_end = _ustime();
|
|
min = (original_end - original_start) * 1000 / passes;
|
|
/* approximate nanoseconds without nstime */
|
|
if (csv) {
|
|
printf("%s,%" PRIu64 ",%" PRIu64 ",%d\n",
|
|
name, size, (1000 * size) / min, hash == check);
|
|
} else {
|
|
printf("test size=%" PRIu64 " algorithm=%s %" PRIu64 " M/sec matches=%d\n",
|
|
size, name, (1000 * size) / min, hash == check);
|
|
}
|
|
return hash != check;
|
|
}
|
|
|
|
const uint64_t BENCH_RPOLY = UINT64_C(0x95ac9329ac4bc9b5);
|
|
|
|
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv) {
|
|
uint64_t min = size, start = expect, thash = expect ^ (expect >> 17);
|
|
long long original_start = _ustime(), original_end;
|
|
for (int i=0; i < 1000; i++) {
|
|
crc64_combine(thash, start, size, BENCH_RPOLY, 64);
|
|
}
|
|
original_end = _ustime();
|
|
/* ran 1000 times, want ns per, counted us per 1000 ... */
|
|
min = original_end - original_start;
|
|
if (csv) {
|
|
printf("%s,%" PRIu64 ",%" PRIu64 "\n", label, size, min);
|
|
} else {
|
|
printf("%s size=%" PRIu64 " in %" PRIu64 " nsec\n", label, size, min);
|
|
}
|
|
}
|
|
|
|
static void genBenchmarkRandomData(char *data, int count) {
|
|
static uint32_t state = 1234;
|
|
int i = 0;
|
|
|
|
while (count--) {
|
|
state = (state*1103515245+12345);
|
|
data[i++] = '0'+((state>>16)&63);
|
|
}
|
|
}
|
|
|
|
#define UNUSED(x) (void)(x)
|
|
int crc64Test(int argc, char *argv[], int flags) {
|
|
|
|
uint64_t crc64_test_size = 0;
|
|
int i, lastarg, csv = 0, loop = 0, combine = 0, testAll = 0;
|
|
|
|
again:
|
|
if ((argc>=4) && (!strcmp(argv[3],"custom"))) {
|
|
for (i = 4; i < argc; i++) {
|
|
lastarg = (i == (argc - 1));
|
|
if (!strcmp(argv[i], "--help")) {
|
|
goto usage;
|
|
} else if (!strcmp(argv[i], "--csv")) {
|
|
csv = 1;
|
|
} else if (!strcmp(argv[i], "-l")) {
|
|
loop = 1;
|
|
} else if (!strcmp(argv[i], "--crc")) {
|
|
if (lastarg) goto invalid;
|
|
crc64_test_size = atoll(argv[++i]);
|
|
} else if (!strcmp(argv[i], "--combine")) {
|
|
combine = 1;
|
|
} else {
|
|
invalid:
|
|
printf("Invalid option \"%s\" or option argument missing\n\n",
|
|
argv[i]);
|
|
usage:
|
|
printf(
|
|
"Usage: crc64 [OPTIONS]\n\n"
|
|
" --csv Output in CSV format\n"
|
|
" -l Loop. Run the tests forever\n"
|
|
" --crc <bytes> Benchmark crc64 faster options, using a buffer this big, and quit when done.\n"
|
|
" --combine Benchmark crc64 combine value ranges and timings.\n"
|
|
);
|
|
return 1;
|
|
}
|
|
}
|
|
} else {
|
|
crc64_test_size = 50000;
|
|
testAll = 1;
|
|
if (flags & REDIS_TEST_ACCURATE) crc64_test_size = 5000000;
|
|
}
|
|
|
|
if ((crc64_test_size == 0 && combine == 0) || testAll) {
|
|
crc64_init();
|
|
printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
|
|
(uint64_t)_crc64(0, "123456789", 9));
|
|
printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
|
|
(uint64_t)crc64(0, (unsigned char*)"123456789", 9));
|
|
char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed "
|
|
"do eiusmod tempor incididunt ut labore et dolore magna "
|
|
"aliqua. Ut enim ad minim veniam, quis nostrud exercitation "
|
|
"ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis "
|
|
"aute irure dolor in reprehenderit in voluptate velit esse "
|
|
"cillum dolore eu fugiat nulla pariatur. Excepteur sint "
|
|
"occaecat cupidatat non proident, sunt in culpa qui officia "
|
|
"deserunt mollit anim id est laborum.";
|
|
printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n",
|
|
(uint64_t)_crc64(0, li, sizeof(li)));
|
|
printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n",
|
|
(uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));
|
|
|
|
if (!testAll) return 0;
|
|
}
|
|
|
|
int init_this_loop = 1;
|
|
long long init_start, init_end;
|
|
|
|
do {
|
|
unsigned char* data = NULL;
|
|
uint64_t passes = 0;
|
|
if (crc64_test_size) {
|
|
data = zmalloc(crc64_test_size);
|
|
genBenchmarkRandomData((char*)data, crc64_test_size);
|
|
/* We want to hash about 1 gig of data in total, looped, to get a good
|
|
* idea of our performance.
|
|
*/
|
|
passes = (UINT64_C(0x100000000) / crc64_test_size);
|
|
passes = passes >= 2 ? passes : 2;
|
|
passes = passes <= 1000 ? passes : 1000;
|
|
}
|
|
|
|
crc64_init();
|
|
/* warm up the cache */
|
|
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
|
|
uint64_t expect = crc64(0, data, crc64_test_size);
|
|
|
|
if ((!combine || testAll) && crc64_test_size) {
|
|
if (csv && init_this_loop) printf("algorithm,buffer,performance,crc64_matches\n");
|
|
|
|
/* get the single-character version for single-byte Redis behavior */
|
|
set_crc64_cutoffs(0, crc64_test_size+1);
|
|
assert(!bench_crc64(data, crc64_test_size, passes, expect, "crc_1byte", csv));
|
|
|
|
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
|
|
/* run with 8-byte "single" path, crcfaster */
|
|
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcspeed", csv)));
|
|
|
|
/* run with dual 8-byte paths */
|
|
set_crc64_cutoffs(1, crc64_test_size+1);
|
|
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcdual", csv)));
|
|
|
|
/* run with tri 8-byte paths */
|
|
set_crc64_cutoffs(1, 1);
|
|
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crctri", csv)));
|
|
|
|
/* Be free memory region, be free. */
|
|
zfree(data);
|
|
data = NULL;
|
|
}
|
|
|
|
uint64_t INIT_SIZE = UINT64_C(0xffffffffffffffff);
|
|
if (combine || testAll) {
|
|
if (init_this_loop) {
|
|
init_start = _ustime();
|
|
crc64_combine(
|
|
UINT64_C(0xdeadbeefdeadbeef),
|
|
UINT64_C(0xfeebdaedfeebdaed),
|
|
INIT_SIZE,
|
|
BENCH_RPOLY, 64);
|
|
init_end = _ustime();
|
|
|
|
init_end -= init_start;
|
|
init_end *= 1000;
|
|
if (csv) {
|
|
printf("operation,size,nanoseconds\n");
|
|
printf("init_64,%" PRIu64 ",%" PRIu64 "\n", INIT_SIZE, (uint64_t)init_end);
|
|
} else {
|
|
printf("init_64 size=%" PRIu64 " in %" PRIu64 " nsec\n", INIT_SIZE, (uint64_t)init_end);
|
|
}
|
|
/* use the hash itself as the size (unpredictable) */
|
|
bench_combine("hash_as_size_combine", crc64_test_size, expect, csv);
|
|
|
|
/* let's do something big (predictable, so fast) */
|
|
bench_combine("largest_combine", INIT_SIZE, expect, csv);
|
|
}
|
|
bench_combine("combine", crc64_test_size, expect, csv);
|
|
}
|
|
init_this_loop = 0;
|
|
/* step down by ~1.641 for a range of test sizes */
|
|
crc64_test_size -= (crc64_test_size >> 2) + (crc64_test_size >> 3) + (crc64_test_size >> 6);
|
|
} while (crc64_test_size > 3);
|
|
if (loop) goto again;
|
|
return 0;
|
|
}
|
|
|
|
#endif
|