mirror of
https://github.com/haproxy/haproxy.git
synced 2026-02-10 14:23:37 -05:00
MEDIUM: activity: apply and use new finegrained task profiling settings
In continuity of previous patch, this one makes use of the new profiling flags. For this, based on the global "profiling" setting, when switching profiling on, we set or clear two flags on the thread context, TH_FL_TASK_PROFILING_L and TH_FL_TASK_PROFILING_M to indicate whether lock profiling and/or malloc profiling are desired when profiling is enabled. These flags are checked along with TH_FL_TASK_PROFILING to decide when to collect time around a lock or a malloc. And by default we're back to the behavior of 3.2 in that neither lock nor malloc times are collected anymore. This is sufficient to see the CPU usage spent in the VDSO to significantly drop from 22% to 2.2% on a highly loaded system. This should be backported to 3.3 along with the previous patch.
This commit is contained in:
parent
a7b2353cb3
commit
e2631ee5f7
4 changed files with 32 additions and 9 deletions
|
|
@ -362,15 +362,19 @@ static inline unsigned long thread_isolated()
|
|||
extern uint64_t now_mono_time(void); \
|
||||
if (_LK_ != _LK_UN) { \
|
||||
th_ctx->lock_level += bal; \
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
|
||||
if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) \
|
||||
lock_start = now_mono_time(); \
|
||||
} \
|
||||
(void)(expr); \
|
||||
if (_LK_ == _LK_UN) { \
|
||||
th_ctx->lock_level += bal; \
|
||||
if (th_ctx->lock_level == 0 && unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
|
||||
if (th_ctx->lock_level == 0 &&\
|
||||
unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) \
|
||||
th_ctx->locked_total += now_mono_time() - th_ctx->lock_start_date; \
|
||||
} else if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) { \
|
||||
} else if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) { \
|
||||
uint64_t now = now_mono_time(); \
|
||||
if (lock_start) \
|
||||
th_ctx->lock_wait_total += now - lock_start; \
|
||||
|
|
@ -384,7 +388,8 @@ static inline unsigned long thread_isolated()
|
|||
typeof(expr) _expr = (expr); \
|
||||
if (_expr == 0) { \
|
||||
th_ctx->lock_level += bal; \
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) { \
|
||||
if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) { \
|
||||
if (_LK_ == _LK_UN && th_ctx->lock_level == 0) \
|
||||
th_ctx->locked_total += now_mono_time() - th_ctx->lock_start_date; \
|
||||
else if (_LK_ != _LK_UN && th_ctx->lock_level == 1) \
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ enum {
|
|||
#define TH_FL_IN_DBG_HANDLER 0x00000100 /* thread currently in the debug signal handler */
|
||||
#define TH_FL_IN_WDT_HANDLER 0x00000200 /* thread currently in the wdt signal handler */
|
||||
#define TH_FL_IN_ANY_HANDLER 0x00000380 /* mask to test if the thread is in any signal handler */
|
||||
#define TH_FL_TASK_PROFILING_L 0x00000400 /* task profiling in locks (also requires TASK_PROFILING) */
|
||||
#define TH_FL_TASK_PROFILING_M 0x00000800 /* task profiling in mem alloc (also requires TASK_PROFILING) */
|
||||
|
||||
/* we have 4 buffer-wait queues, in highest to lowest emergency order */
|
||||
#define DYNBUF_NBQ 4
|
||||
|
|
|
|||
|
|
@ -659,8 +659,20 @@ void activity_count_runtime(uint32_t run_time)
|
|||
if (!(_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)) {
|
||||
if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
|
||||
((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON &&
|
||||
swrate_avg(run_time, TIME_STATS_SAMPLES) >= up)))
|
||||
swrate_avg(run_time, TIME_STATS_SAMPLES) >= up))) {
|
||||
|
||||
if (profiling & HA_PROF_TASKS_LOCK)
|
||||
_HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING_L);
|
||||
else
|
||||
_HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING_L);
|
||||
|
||||
if (profiling & HA_PROF_TASKS_MEM)
|
||||
_HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING_M);
|
||||
else
|
||||
_HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING_M);
|
||||
|
||||
_HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING);
|
||||
}
|
||||
} else {
|
||||
if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
|
||||
((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF &&
|
||||
|
|
|
|||
12
src/pool.c
12
src/pool.c
|
|
@ -806,7 +806,8 @@ void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
|
|||
if (unlikely(pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) {
|
||||
uint64_t mem_wait_start = 0;
|
||||
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE)
|
||||
|
|
@ -969,7 +970,8 @@ void pool_gc(struct pool_head *pool_ctx)
|
|||
uint64_t mem_wait_start = 0;
|
||||
int isolated = thread_isolated();
|
||||
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
if (!isolated)
|
||||
|
|
@ -1031,7 +1033,8 @@ void *__pool_alloc(struct pool_head *pool, unsigned int flags)
|
|||
/* count allocation time only for cache misses */
|
||||
uint64_t mem_wait_start = 0;
|
||||
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
p = pool_alloc_nocache(pool, caller);
|
||||
|
|
@ -1109,7 +1112,8 @@ void __pool_free(struct pool_head *pool, void *ptr)
|
|||
global.tune.pool_cache_size < pool->size)) {
|
||||
uint64_t mem_wait_start = 0;
|
||||
|
||||
if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
|
||||
if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
|
||||
(TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
|
||||
mem_wait_start = now_mono_time();
|
||||
|
||||
pool_free_nocache(pool, ptr);
|
||||
|
|
|
|||
Loading…
Reference in a new issue