mirror of
https://github.com/haproxy/haproxy.git
synced 2026-02-11 06:43:42 -05:00
MINOR: activity: collect CPU time spent on memory allocations for each task
When task profiling is enabled, the pool alloc/free code will measure the
time it takes to perform memory allocation after a cache miss or memory
freeing to the shared cache or OS. The time taken with the thread-local
cache is never measured as measuring that time is very expensive compared
to the pool access time. Here doing so costs around 2% performance at 2M
req/s, only when task profiling is enabled, so this remains reasonable.
The scheduler takes care of collecting that time and updating the
sched_activity entry corresponding to the current task when task profiling
is enabled.
The goal clearly is to track places that are wasting CPU time allocating
and releasing too often, or causing large evictions. This appears like
this in "show profiling tasks aggr":
Tasks activity over 11.428 sec till 0.000 sec ago:
function calls cpu_tot cpu_avg lkw_avg lkd_avg mem_avg lat_avg
process_stream 44183891 16.47m 22.36us 491.0ns 1.154us 1.000ns 101.1us
h1_io_cb 57386064 4.011m 4.193us 20.00ns 16.00ns - 29.47us
sc_conn_io_cb 42088024 49.04s 1.165us - - - 54.67us
h1_timeout_task 438171 196.5ms 448.0ns - - - 100.1us
srv_cleanup_toremove_conns 65 1.468ms 22.58us 184.0ns 87.00ns - 101.3us
task_process_applet 3 508.0us 169.3us - 107.0us 1.847us 29.67us
srv_cleanup_idle_conns 6 225.3us 37.55us 15.74us 36.84us - 49.47us
accept_queue_process 2 45.62us 22.81us - - 4.949us 54.33us
This commit is contained in:
parent
195794eb59
commit
9d8c2a888b
3 changed files with 44 additions and 5 deletions
|
|
@ -161,6 +161,7 @@ struct thread_ctx {
|
|||
uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
|
||||
uint64_t sched_call_date; /* current task/tasklet's call date in ns */
|
||||
uint64_t lock_wait_total; /* total time in ns spent waiting for a lock (task prof) */
|
||||
uint64_t mem_wait_total; /* total time in ns spent allocating/freeing memory (task prof) */
|
||||
uint64_t lock_start_date; /* date when first locked was obtained (task prof) */
|
||||
uint64_t locked_total; /* total time in ns spent with at least one lock held (task prof) */
|
||||
|
||||
|
|
|
|||
45
src/pool.c
45
src/pool.c
|
|
@ -20,6 +20,7 @@
|
|||
#include <haproxy/cfgparse.h>
|
||||
#include <haproxy/channel.h>
|
||||
#include <haproxy/cli.h>
|
||||
#include <haproxy/clock.h>
|
||||
#include <haproxy/errors.h>
|
||||
#include <haproxy/global.h>
|
||||
#include <haproxy/list.h>
|
||||
|
|
@ -787,10 +788,18 @@ void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
|
|||
pool_cache_bytes += pool->size;
|
||||
|
||||
if (unlikely(pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) {
|
||||
uint64_t mem_wait_start = 0;
|
||||
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE)
|
||||
pool_evict_from_local_cache(pool, 0);
|
||||
if (pool_cache_bytes > global.tune.pool_cache_size)
|
||||
pool_evict_from_local_caches();
|
||||
|
||||
if (unlikely(mem_wait_start))
|
||||
th_ctx->mem_wait_total += now_mono_time() - mem_wait_start;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -941,8 +950,12 @@ void pool_flush(struct pool_head *pool)
|
|||
void pool_gc(struct pool_head *pool_ctx)
|
||||
{
|
||||
struct pool_head *entry;
|
||||
uint64_t mem_wait_start = 0;
|
||||
int isolated = thread_isolated();
|
||||
|
||||
if (th_ctx->flags & TH_FL_TASK_PROFILING)
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
if (!isolated)
|
||||
thread_isolate();
|
||||
|
||||
|
|
@ -975,6 +988,9 @@ void pool_gc(struct pool_head *pool_ctx)
|
|||
|
||||
if (!isolated)
|
||||
thread_release();
|
||||
|
||||
if (mem_wait_start)
|
||||
th_ctx->mem_wait_total += now_mono_time() - mem_wait_start;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -995,9 +1011,19 @@ void *__pool_alloc(struct pool_head *pool, unsigned int flags)
|
|||
if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p)
|
||||
p = pool_get_from_cache(pool, caller);
|
||||
|
||||
if (unlikely(!p))
|
||||
if (unlikely(!p)) {
|
||||
/* count allocation time only for cache misses */
|
||||
uint64_t mem_wait_start = 0;
|
||||
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
p = pool_alloc_nocache(pool, caller);
|
||||
|
||||
if (unlikely(mem_wait_start))
|
||||
th_ctx->mem_wait_total += now_mono_time() - mem_wait_start;
|
||||
}
|
||||
|
||||
if (likely(p)) {
|
||||
#ifdef USE_MEMORY_PROFILING
|
||||
if (unlikely(profiling & HA_PROF_MEMORY)) {
|
||||
|
|
@ -1061,11 +1087,20 @@ void __pool_free(struct pool_head *pool, void *ptr)
|
|||
if (unlikely((pool_debugging & POOL_DBG_NO_CACHE) ||
|
||||
(pool->flags & MEM_F_UAF) ||
|
||||
global.tune.pool_cache_size < pool->size)) {
|
||||
pool_free_nocache(pool, ptr);
|
||||
return;
|
||||
}
|
||||
uint64_t mem_wait_start = 0;
|
||||
|
||||
pool_put_to_cache(pool, ptr, caller);
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
pool_free_nocache(pool, ptr);
|
||||
|
||||
if (unlikely(mem_wait_start))
|
||||
th_ctx->mem_wait_total += now_mono_time() - mem_wait_start;
|
||||
}
|
||||
else {
|
||||
/* this one will count its own time itself */
|
||||
pool_put_to_cache(pool, ptr, caller);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -570,6 +570,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
|
|||
t->calls++;
|
||||
|
||||
th_ctx->lock_wait_total = 0;
|
||||
th_ctx->mem_wait_total = 0;
|
||||
th_ctx->locked_total = 0;
|
||||
th_ctx->sched_wake_date = t->wake_date;
|
||||
if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
|
||||
|
|
@ -684,6 +685,8 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
|
|||
HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
|
||||
if (th_ctx->lock_wait_total)
|
||||
HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total);
|
||||
if (th_ctx->mem_wait_total)
|
||||
HA_ATOMIC_ADD(&profile_entry->mem_time, th_ctx->mem_wait_total);
|
||||
if (th_ctx->locked_total)
|
||||
HA_ATOMIC_ADD(&profile_entry->lkd_time, th_ctx->locked_total);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue