mirror of
https://github.com/redis/redis.git
synced 2026-02-03 20:39:54 -05:00
Optimize peak memory stats by switching from per-command checks to threshold-based (#14692)
Some checks are pending
CI / test-ubuntu-latest (push) Waiting to run
CI / test-sanitizer-address (push) Waiting to run
CI / build-debian-old (push) Waiting to run
CI / build-macos-latest (push) Waiting to run
CI / build-32bit (push) Waiting to run
CI / build-libc-malloc (push) Waiting to run
CI / build-centos-jemalloc (push) Waiting to run
CI / build-old-chain-jemalloc (push) Waiting to run
Codecov / code-coverage (push) Waiting to run
External Server Tests / test-external-standalone (push) Waiting to run
External Server Tests / test-external-cluster (push) Waiting to run
External Server Tests / test-external-nodebug (push) Waiting to run
Spellcheck / Spellcheck (push) Waiting to run
Some checks are pending
CI / test-ubuntu-latest (push) Waiting to run
CI / test-sanitizer-address (push) Waiting to run
CI / build-debian-old (push) Waiting to run
CI / build-macos-latest (push) Waiting to run
CI / build-32bit (push) Waiting to run
CI / build-libc-malloc (push) Waiting to run
CI / build-centos-jemalloc (push) Waiting to run
CI / build-old-chain-jemalloc (push) Waiting to run
Codecov / code-coverage (push) Waiting to run
External Server Tests / test-external-standalone (push) Waiting to run
External Server Tests / test-external-cluster (push) Waiting to run
External Server Tests / test-external-nodebug (push) Waiting to run
Spellcheck / Spellcheck (push) Waiting to run
This PR optimizes peak memory tracking by moving from **per-command checks** to a **threshold-based mechanism** in `zmalloc`. Instead of updating peak memory on every command, peak tracking is now triggered only when a thread's memory delta exceeds **100KB**. This reduces runtime overhead while keeping peak memory accuracy acceptable. ## Implementation Details - Peak memory is tracked atomically in `zmalloc` when a thread's memory delta exceeds 100KB - Thread-safe peak updates using CAS - Peak tracking considers both: - current used memory - zmalloc-reported peak memory ## Performance Results (ARM AArch64) All performance numbers were obtained on an **AWS m8g.metal (ARM AArch64)** instance. The database was pre-populated with **1M keys**, each holding a **1KB value**. Benchmarks were executed using memtier with a **10 SET : 90 GET ratio** and **pipeline = 10** ([full benchmark spec. here](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-1Mkeys-string-setget200c-1KiB-pipeline-10.yml)). | Environment | Baseline `redis/redis` unstable (median ± std.dev) | Comparison `paulorsousa/redis` `f05a4bd273cb4d63ff03d33e6207837b6e51de86` (median) | % change (higher better) | Note | |------------------------------|----------------------------------------------------|----------------------------------------------------------------------------------:|--------------------------|-----------------------| | oss-standalone | 802,830 ± 0.2% (7 datapoints) | 796,660 | -0.8% | No change | | oss-standalone-02-io-threads | 982,698 ± 0.6% (7 datapoints) | 980,520 | -0.2% | No change | | oss-standalone-04-io-threads | 2,573,244 ± 1.9% (7 datapoints) | 2,630,931 | +2.2% | Potential improvement | | oss-standalone-08-io-threads | 2,343,609 ± 1.6% (7 datapoints) | 2,455,630 | +4.8% | Improvement |
This commit is contained in:
parent
e3c38aab66
commit
c4baa64ea8
7 changed files with 89 additions and 18 deletions
|
|
@ -11,6 +11,7 @@
|
|||
* atomicSet(var,value) -- Set the atomic counter value
|
||||
* atomicGetWithSync(var,value) -- 'atomicGet' with inter-thread synchronization
|
||||
* atomicSetWithSync(var,value) -- 'atomicSet' with inter-thread synchronization
|
||||
* atomicCompareExchange(type,var,expected_var,desired) -- Compare and exchange (CAS) operation
|
||||
*
|
||||
* Atomic operations on flags.
|
||||
* Flag type can be int, long, long long or their unsigned counterparts.
|
||||
|
|
@ -110,6 +111,8 @@
|
|||
} while(0)
|
||||
#define atomicSetWithSync(var,value) \
|
||||
atomic_store_explicit(&var,value,memory_order_seq_cst)
|
||||
#define atomicCompareExchange(type,var,expected_var,desired) \
|
||||
atomic_compare_exchange_weak_explicit(&var,&expected_var,desired,memory_order_relaxed,memory_order_relaxed)
|
||||
#define atomicFlagGetSet(var,oldvalue_var) \
|
||||
oldvalue_var = atomic_exchange_explicit(&var,1,memory_order_relaxed)
|
||||
#define REDIS_ATOMIC_API "c11-builtin"
|
||||
|
|
@ -135,6 +138,8 @@
|
|||
} while(0)
|
||||
#define atomicSetWithSync(var,value) \
|
||||
__atomic_store_n(&var,value,__ATOMIC_SEQ_CST)
|
||||
#define atomicCompareExchange(type,var,expected_var,desired) \
|
||||
__atomic_compare_exchange_n(&var,&expected_var,desired,1,__ATOMIC_RELAXED,__ATOMIC_RELAXED)
|
||||
#define atomicFlagGetSet(var,oldvalue_var) \
|
||||
oldvalue_var = __atomic_exchange_n(&var,1,__ATOMIC_RELAXED)
|
||||
#define REDIS_ATOMIC_API "atomic-builtin"
|
||||
|
|
@ -164,6 +169,12 @@
|
|||
ANNOTATE_HAPPENS_BEFORE(&var); \
|
||||
while(!__sync_bool_compare_and_swap(&var,var,value,__sync_synchronize)); \
|
||||
} while(0)
|
||||
#define atomicCompareExchange(type,var,expected_var,desired) ({ \
|
||||
type _old = __sync_val_compare_and_swap(&var,expected_var,desired); \
|
||||
int _success = (_old == expected_var); \
|
||||
if (!_success) expected_var = _old; \
|
||||
_success; \
|
||||
})
|
||||
#define atomicFlagGetSet(var,oldvalue_var) \
|
||||
oldvalue_var = __sync_val_compare_and_swap(&var,0,1)
|
||||
#define REDIS_ATOMIC_API "sync-builtin"
|
||||
|
|
|
|||
|
|
@ -3531,13 +3531,13 @@ void startLoadingFile(size_t size, char* filename, int rdbflags) {
|
|||
/* Refresh the absolute loading progress info */
|
||||
void loadingAbsProgress(off_t pos) {
|
||||
server.loading_loaded_bytes = pos;
|
||||
updatePeakMemory(zmalloc_used_memory());
|
||||
updatePeakMemory();
|
||||
}
|
||||
|
||||
/* Refresh the incremental loading progress info */
|
||||
void loadingIncrProgress(off_t size) {
|
||||
server.loading_loaded_bytes += size;
|
||||
updatePeakMemory(zmalloc_used_memory());
|
||||
updatePeakMemory();
|
||||
}
|
||||
|
||||
/* Update the file name currently being loaded */
|
||||
|
|
|
|||
23
src/server.c
23
src/server.c
|
|
@ -1400,16 +1400,23 @@ void checkChildrenDone(void) {
|
|||
}
|
||||
|
||||
/* Record the max memory used since the server was started. */
|
||||
void updatePeakMemory(size_t used_memory) {
|
||||
if (unlikely(used_memory > server.stat_peak_memory)) {
|
||||
server.stat_peak_memory = used_memory;
|
||||
void updatePeakMemory(void) {
|
||||
size_t zmalloc_used = zmalloc_used_memory();
|
||||
if (zmalloc_used > server.stat_peak_memory) {
|
||||
server.stat_peak_memory = zmalloc_used;
|
||||
server.stat_peak_memory_time = server.unixtime;
|
||||
}
|
||||
|
||||
size_t zmalloc_peak = zmalloc_get_peak_memory();
|
||||
if (zmalloc_peak > server.stat_peak_memory) {
|
||||
server.stat_peak_memory = zmalloc_peak;
|
||||
server.stat_peak_memory_time = zmalloc_get_peak_memory_time();
|
||||
}
|
||||
}
|
||||
|
||||
/* Called from serverCron and cronUpdateMemoryStats to update cached memory metrics. */
|
||||
void cronUpdateMemoryStats(void) {
|
||||
updatePeakMemory(zmalloc_used_memory());
|
||||
updatePeakMemory();
|
||||
|
||||
run_with_period(100) {
|
||||
/* Sample the RSS and other metrics here since this is a relatively slow call.
|
||||
|
|
@ -1843,7 +1850,7 @@ extern int ProcessingEventsWhileBlocked;
|
|||
void beforeSleep(struct aeEventLoop *eventLoop) {
|
||||
UNUSED(eventLoop);
|
||||
|
||||
updatePeakMemory(zmalloc_used_memory());
|
||||
updatePeakMemory();
|
||||
|
||||
/* Just call a subset of vital functions in case we are re-entering
|
||||
* the event loop from processEventsWhileBlocked(). Note that in this
|
||||
|
|
@ -4027,10 +4034,6 @@ void call(client *c, int flags) {
|
|||
server.stat_numcommands++;
|
||||
}
|
||||
|
||||
/* Record peak memory after each command and before the eviction that runs
|
||||
* before the next command. */
|
||||
updatePeakMemory(zmalloc_used_memory());
|
||||
|
||||
/* Do some maintenance job and cleanup */
|
||||
afterCommand(c);
|
||||
|
||||
|
|
@ -6192,7 +6195,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
|
|||
* may happen that the instantaneous value is slightly bigger than
|
||||
* the peak value. This may confuse users, so we update the peak
|
||||
* if found smaller than the current memory usage. */
|
||||
updatePeakMemory(zmalloc_used);
|
||||
updatePeakMemory();
|
||||
|
||||
bytesToHuman(hmem,sizeof(hmem),zmalloc_used);
|
||||
bytesToHuman(peak_hmem,sizeof(peak_hmem),server.stat_peak_memory);
|
||||
|
|
|
|||
|
|
@ -3560,7 +3560,7 @@ int zslLexValueLteMax(sds value, zlexrangespec *spec);
|
|||
|
||||
/* Core functions */
|
||||
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
|
||||
void updatePeakMemory(size_t used_memory);
|
||||
void updatePeakMemory(void);
|
||||
size_t freeMemoryGetNotCountedMemory(void);
|
||||
int overMaxmemoryAfterAlloc(size_t moremem);
|
||||
uint64_t getCommandFlags(client *c);
|
||||
|
|
|
|||
|
|
@ -81,14 +81,18 @@ void je_free_with_usize(void *ptr, size_t *usize);
|
|||
|
||||
#define MAX_THREADS 16 /* Keep it a power of 2 so we can use '&' instead of '%'. */
|
||||
#define THREAD_MASK (MAX_THREADS - 1)
|
||||
#define PEAK_CHECK_THRESHOLD (1024 * 100) /* 100KB */
|
||||
|
||||
typedef struct used_memory_entry {
|
||||
redisAtomic long long used_memory;
|
||||
char padding[CACHE_LINE_SIZE - sizeof(long long)];
|
||||
redisAtomic long long last_peak_check;
|
||||
char padding[CACHE_LINE_SIZE - sizeof(long long) - sizeof(long long)];
|
||||
} used_memory_entry;
|
||||
|
||||
static __attribute__((aligned(CACHE_LINE_SIZE))) used_memory_entry used_memory[MAX_THREADS];
|
||||
static redisAtomic size_t num_active_threads = 0;
|
||||
static redisAtomic size_t zmalloc_peak = 0;
|
||||
static redisAtomic time_t zmalloc_peak_time = 0;
|
||||
static __thread long my_thread_index = -1;
|
||||
|
||||
static inline void init_my_thread_index(void) {
|
||||
|
|
@ -98,9 +102,46 @@ static inline void init_my_thread_index(void) {
|
|||
}
|
||||
}
|
||||
|
||||
static void update_zmalloc_stat_alloc(long long num) {
|
||||
static void update_zmalloc_stat_alloc(long long bytes_delta) {
|
||||
init_my_thread_index();
|
||||
atomicIncr(used_memory[my_thread_index].used_memory, num);
|
||||
|
||||
/* Per-thread allocation counter and the last counter value at which we ran a
|
||||
* global peak check (throttles how often we call zmalloc_used_memory()). */
|
||||
long long thread_used, thread_last_peak_check_used;
|
||||
atomicIncrGet(used_memory[my_thread_index].used_memory, thread_used, bytes_delta);
|
||||
atomicGet(used_memory[my_thread_index].last_peak_check, thread_last_peak_check_used);
|
||||
|
||||
/* Only run the (expensive) global used/peak check after this thread's
|
||||
* allocation counter has advanced enough since the last check. */
|
||||
if (unlikely(thread_used - thread_last_peak_check_used > PEAK_CHECK_THRESHOLD)) {
|
||||
/* Snapshot of global used memory across all threads. */
|
||||
size_t used_mem = zmalloc_used_memory();
|
||||
|
||||
/* Current published global peak. */
|
||||
size_t published_peak;
|
||||
atomicGet(zmalloc_peak, published_peak);
|
||||
|
||||
if (used_mem > published_peak) {
|
||||
/* Try to publish `used_mem` as the new global peak.
|
||||
*
|
||||
* Another thread may update `zmalloc_peak` concurrently. Use a CAS loop:
|
||||
* on failure, `old_peak` is refreshed with the latest peak value, and we
|
||||
* retry only while our snapshot still exceeds it. */
|
||||
size_t old_peak = published_peak;
|
||||
while (used_mem > old_peak && !atomicCompareExchange(size_t, zmalloc_peak, old_peak, used_mem)) {
|
||||
/* CAS failed: `old_peak` now holds the current `zmalloc_peak`. */
|
||||
}
|
||||
|
||||
/* If we raised the peak, record when it was reached. */
|
||||
if (used_mem > old_peak) {
|
||||
atomicSet(zmalloc_peak_time, time(NULL));
|
||||
}
|
||||
}
|
||||
|
||||
/* Record the thread counter value at which we last ran a global peak check,
|
||||
* to throttle future checks for this thread. */
|
||||
atomicSet(used_memory[my_thread_index].last_peak_check, thread_used);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_zmalloc_stat_free(long long num) {
|
||||
|
|
@ -183,7 +224,7 @@ void *zmalloc_usable(size_t size, size_t *usable) {
|
|||
void *ptr = ztrymalloc_usable_internal(size, &usable_size);
|
||||
if (!ptr) zmalloc_oom_handler(size);
|
||||
#ifdef HAVE_MALLOC_SIZE
|
||||
ptr = extend_to_usable(ptr, usable_size);
|
||||
if (ptr) ptr = extend_to_usable(ptr, usable_size);
|
||||
#endif
|
||||
if (usable) *usable = usable_size;
|
||||
return ptr;
|
||||
|
|
@ -538,6 +579,18 @@ size_t zmalloc_used_memory(void) {
|
|||
return total_mem;
|
||||
}
|
||||
|
||||
size_t zmalloc_get_peak_memory(void) {
|
||||
size_t peak;
|
||||
atomicGet(zmalloc_peak, peak);
|
||||
return peak;
|
||||
}
|
||||
|
||||
time_t zmalloc_get_peak_memory_time(void) {
|
||||
time_t t;
|
||||
atomicGet(zmalloc_peak_time, t);
|
||||
return t;
|
||||
}
|
||||
|
||||
void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {
|
||||
zmalloc_oom_handler = oom_handler;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,6 +87,8 @@
|
|||
#define HAVE_ALLOC_WITH_USIZE
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
|
||||
/* 'noinline' attribute is intended to prevent the `-Wstringop-overread` warning
|
||||
* when using gcc-12 later with LTO enabled. It may be removed once the
|
||||
* bug[https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96503] is fixed. */
|
||||
|
|
@ -108,6 +110,8 @@ void zfree_usable(void *ptr, size_t *usable);
|
|||
__attribute__((malloc)) char *zstrdup(const char *s);
|
||||
__attribute__((malloc)) char *zstrdup_usable(const char *s, size_t *usable);
|
||||
size_t zmalloc_used_memory(void);
|
||||
size_t zmalloc_get_peak_memory(void);
|
||||
time_t zmalloc_get_peak_memory_time(void);
|
||||
void zmalloc_set_oom_handler(void (*oom_handler)(size_t));
|
||||
size_t zmalloc_get_rss(void);
|
||||
int zmalloc_get_allocator_info(int refresh_stats, size_t *allocated, size_t *active, size_t *resident,
|
||||
|
|
|
|||
|
|
@ -798,7 +798,7 @@ test {Replicas that was marked as CLIENT_CLOSE_ASAP should not keep the replicat
|
|||
# exceed the replica soft limit. Furthermore, as the replica release its reference to
|
||||
# replication backlog, it should be properly trimmed, the memory usage of replication
|
||||
# backlog should not significantly exceed repl-backlog-size (default 1MB). */
|
||||
assert_lessthan [getInfoProperty $res used_memory_peak] 10000000;# less than 10mb
|
||||
assert_lessthan [getInfoProperty $res used_memory_peak] 20000000;# less than 20mb
|
||||
assert_lessthan [getInfoProperty $res mem_replication_backlog] 2000000;# less than 2mb
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue