mirror of
https://github.com/isc-projects/bind9.git
synced 2026-04-21 14:17:27 -04:00
Simplify histogram quantiles
The `isc_histosummary_t` functions were written in the early days of `hg64` and carried over when I brought `hg64` into BIND. They were intended to be useful for graphing cumulative frequency distributions and the like, but in practice whatever draws charts is better off with a raw histogram export. Especially because of the poor performance of the old functions. The replacement `isc_histo_quantiles()` function is intended for providing a few quantile values in BIND's stats channel, when the user does not want the full histogram. Unlike the old functions, the caller provides all the query fractions up-front, so that the values can be found in a single scan instead of a scan per value. The scan is from larger values to smaller, since larger quantiles are usually more interesting, so the scan can bail out early.
This commit is contained in:
parent
bc2389b828
commit
cd0e7f853a
3 changed files with 326 additions and 488 deletions
367
lib/isc/histo.c
367
lib/isc/histo.c
|
|
@ -28,12 +28,22 @@
|
|||
#include <isc/tid.h>
|
||||
|
||||
/*
|
||||
* XXXFANF to be added to isc/util.h by a commmit in a qp-trie
|
||||
* XXXFANF to be added to <isc/util.h> by a commmit in a qp-trie
|
||||
* feature branch
|
||||
*/
|
||||
#define STRUCT_FLEX_SIZE(pointer, member, count) \
|
||||
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
|
||||
|
||||
/*
|
||||
* XXXFANF this should probably be in <isc/util.h> too
|
||||
*/
|
||||
#define OUTARG(ptr, val) \
|
||||
({ \
|
||||
if ((ptr) != NULL) { \
|
||||
*(ptr) = (val); \
|
||||
} \
|
||||
})
|
||||
|
||||
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
|
||||
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
|
||||
#define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't')
|
||||
|
|
@ -72,38 +82,13 @@
|
|||
typedef atomic_uint_fast64_t hg_bucket_t;
|
||||
typedef atomic_ptr(hg_bucket_t) hg_chunk_t;
|
||||
|
||||
#define ISC_HISTO_FIELDS \
|
||||
uint magic; \
|
||||
uint sigbits; \
|
||||
isc_mem_t *mctx
|
||||
|
||||
struct isc_histo {
|
||||
ISC_HISTO_FIELDS;
|
||||
/* chunk array must be first after common fields */
|
||||
uint magic;
|
||||
uint sigbits;
|
||||
isc_mem_t *mctx;
|
||||
hg_chunk_t chunk[CHUNKS];
|
||||
};
|
||||
|
||||
/*
|
||||
* To convert between ranks and values, we scan the histogram to find the
|
||||
* required rank. Each per-chunk total contains the sum of all the buckets
|
||||
* in that chunk, so we can scan a chunk at a time rather than a bucket at
|
||||
* a time.
|
||||
*
|
||||
* XXXFANF When `sigbits` is large, the chunks get large and slow to scan.
|
||||
* If this turns out to be a problem, we could store ranks as well as
|
||||
* values in the summary, and use a binary search.
|
||||
*/
|
||||
struct isc_histosummary {
|
||||
ISC_HISTO_FIELDS;
|
||||
/* chunk array must be first after common fields */
|
||||
uint64_t *chunk[CHUNKS];
|
||||
uint64_t total[CHUNKS];
|
||||
uint64_t population;
|
||||
uint64_t maximum;
|
||||
size_t size;
|
||||
uint64_t buckets[];
|
||||
};
|
||||
|
||||
struct isc_histomulti {
|
||||
uint magic;
|
||||
uint size;
|
||||
|
|
@ -112,21 +97,6 @@ struct isc_histomulti {
|
|||
|
||||
/**********************************************************************/
|
||||
|
||||
#define OUTARG(ptr, val) \
|
||||
({ \
|
||||
if ((ptr) != NULL) { \
|
||||
*(ptr) = (val); \
|
||||
} \
|
||||
})
|
||||
|
||||
static inline uint64_t
|
||||
interpolate(uint64_t span, uint64_t mul, uint64_t div) {
|
||||
double frac = div > 0 ? (double)mul / (double)div : mul > 0 ? 1 : 0;
|
||||
return ((uint64_t)round(span * frac));
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
|
||||
void
|
||||
isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) {
|
||||
REQUIRE(sigbits >= ISC_HISTO_MINBITS);
|
||||
|
|
@ -163,9 +133,9 @@ isc_histo_destroy(isc_histo_t **hgp) {
|
|||
/**********************************************************************/
|
||||
|
||||
uint
|
||||
isc_histo_sigbits(isc_historead_t hr) {
|
||||
REQUIRE(HISTO_VALID(hr.hg));
|
||||
return (hr.hg->sigbits);
|
||||
isc_histo_sigbits(isc_histo_t *hg) {
|
||||
REQUIRE(HISTO_VALID(hg));
|
||||
return (hg->sigbits);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -221,11 +191,11 @@ isc_histo_digits_to_bits(uint digits) {
|
|||
*
|
||||
* This branchless conversion is due to Paul Khuong: see bin_down_of() in
|
||||
* https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/
|
||||
*
|
||||
* This function is in the `isc_histo_inc()` fast path.
|
||||
*/
|
||||
static inline uint
|
||||
value_to_key(isc_historead_t hr, uint64_t value) {
|
||||
/* fast path */
|
||||
const isc_histo_t *hg = hr.hg;
|
||||
value_to_key(const isc_histo_t *hg, uint64_t value) {
|
||||
/* ensure that denormal numbers are all in chunk zero */
|
||||
uint64_t chunked = value | CHUNKSIZE(hg);
|
||||
int clz = __builtin_clzll((unsigned long long)(chunked));
|
||||
|
|
@ -259,16 +229,16 @@ value_to_key(isc_historead_t hr, uint64_t value) {
|
|||
*/
|
||||
|
||||
static inline uint64_t
|
||||
key_to_minval(isc_historead_t hr, uint key) {
|
||||
uint chunksize = CHUNKSIZE(hr.hg);
|
||||
key_to_minval(const isc_histo_t *hg, uint key) {
|
||||
uint chunksize = CHUNKSIZE(hg);
|
||||
uint exponent = (key / chunksize) - 1;
|
||||
uint64_t mantissa = (key % chunksize) + chunksize;
|
||||
return (key < chunksize ? key : mantissa << exponent);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
key_to_maxval(isc_historead_t hr, uint key) {
|
||||
return (key_to_minval(hr, key + 1) - 1);
|
||||
key_to_maxval(const isc_histo_t *hg, uint key) {
|
||||
return (key_to_minval(hg, key + 1) - 1);
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
|
|
@ -293,29 +263,33 @@ key_to_new_bucket(isc_histo_t *hg, uint key) {
|
|||
}
|
||||
|
||||
static hg_bucket_t *
|
||||
get_chunk(isc_historead_t hr, uint chunk) {
|
||||
const hg_chunk_t *cpp = &hr.hg->chunk[chunk];
|
||||
return (atomic_load_acquire(cpp));
|
||||
get_chunk(const isc_histo_t *hg, uint chunk) {
|
||||
return (atomic_load_acquire(&hg->chunk[chunk]));
|
||||
}
|
||||
|
||||
static inline hg_bucket_t *
|
||||
key_to_bucket(isc_historead_t hr, uint key) {
|
||||
key_to_bucket(const isc_histo_t *hg, uint key) {
|
||||
/* fast path */
|
||||
uint chunksize = CHUNKSIZE(hr.hg);
|
||||
uint chunksize = CHUNKSIZE(hg);
|
||||
uint chunk = key / chunksize;
|
||||
uint bucket = key % chunksize;
|
||||
hg_bucket_t *cp = get_chunk(hr, chunk);
|
||||
hg_bucket_t *cp = get_chunk(hg, chunk);
|
||||
return (cp == NULL ? NULL : &cp[bucket]);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
get_key_count(isc_historead_t hr, uint key) {
|
||||
hg_bucket_t *bp = key_to_bucket(hr, key);
|
||||
bucket_count(const hg_bucket_t *bp) {
|
||||
return (bp == NULL ? 0 : atomic_load_relaxed(bp));
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
get_key_count(const isc_histo_t *hg, uint key) {
|
||||
return (bucket_count(key_to_bucket(hg, key)));
|
||||
}
|
||||
|
||||
static inline void
|
||||
add_key_count(isc_histo_t *hg, uint key, uint64_t inc) {
|
||||
/* fast path */
|
||||
if (inc > 0) {
|
||||
hg_bucket_t *bp = key_to_bucket(hg, key);
|
||||
bp = bp != NULL ? bp : key_to_new_bucket(hg, key);
|
||||
|
|
@ -355,14 +329,14 @@ isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) {
|
|||
}
|
||||
|
||||
isc_result_t
|
||||
isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
|
||||
isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
|
||||
uint64_t *countp) {
|
||||
REQUIRE(HISTO_VALID(hr.hg));
|
||||
REQUIRE(HISTO_VALID(hg));
|
||||
|
||||
if (key < BUCKETS(hr.hg)) {
|
||||
OUTARG(minp, key_to_minval(hr, key));
|
||||
OUTARG(maxp, key_to_maxval(hr, key));
|
||||
OUTARG(countp, get_key_count(hr, key));
|
||||
if (key < BUCKETS(hg)) {
|
||||
OUTARG(minp, key_to_minval(hg, key));
|
||||
OUTARG(maxp, key_to_maxval(hg, key));
|
||||
OUTARG(countp, get_key_count(hg, key));
|
||||
return (ISC_R_SUCCESS);
|
||||
} else {
|
||||
return (ISC_R_RANGE);
|
||||
|
|
@ -370,9 +344,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
|
|||
}
|
||||
|
||||
void
|
||||
isc_histo_next(isc_historead_t hr, uint *keyp) {
|
||||
const isc_histo_t *hg = hr.hg;
|
||||
|
||||
isc_histo_next(const isc_histo_t *hg, uint *keyp) {
|
||||
REQUIRE(HISTO_VALID(hg));
|
||||
REQUIRE(keyp != NULL);
|
||||
|
||||
|
|
@ -382,7 +354,7 @@ isc_histo_next(isc_historead_t hr, uint *keyp) {
|
|||
|
||||
key++;
|
||||
while (key < buckets && key % chunksize == 0 &&
|
||||
key_to_bucket(hr, key) == NULL)
|
||||
key_to_bucket(hg, key) == NULL)
|
||||
{
|
||||
key += chunksize;
|
||||
}
|
||||
|
|
@ -390,14 +362,14 @@ isc_histo_next(isc_historead_t hr, uint *keyp) {
|
|||
}
|
||||
|
||||
void
|
||||
isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) {
|
||||
REQUIRE(HISTO_VALID(source.hg));
|
||||
isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) {
|
||||
REQUIRE(HISTO_VALID(source));
|
||||
REQUIRE(targetp != NULL);
|
||||
|
||||
if (*targetp != NULL) {
|
||||
REQUIRE(HISTO_VALID(*targetp));
|
||||
} else {
|
||||
isc_histo_create(source.hg->mctx, source.hg->sigbits, targetp);
|
||||
isc_histo_create(source->mctx, source->sigbits, targetp);
|
||||
}
|
||||
|
||||
uint64_t min, max, count;
|
||||
|
|
@ -450,7 +422,7 @@ isc_histomulti_destroy(isc_histomulti_t **hmp) {
|
|||
}
|
||||
|
||||
void
|
||||
isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) {
|
||||
isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) {
|
||||
REQUIRE(HISTOMULTI_VALID(hm));
|
||||
|
||||
for (uint i = 0; i < hm->size; i++) {
|
||||
|
|
@ -477,17 +449,18 @@ isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
|
|||
* equation 4 (incremental mean) and equation 44 (incremental variance)
|
||||
*/
|
||||
void
|
||||
isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) {
|
||||
REQUIRE(HISTO_VALID(hr.hg));
|
||||
isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1,
|
||||
double *pm2) {
|
||||
REQUIRE(HISTO_VALID(hg));
|
||||
|
||||
double pop = 0.0;
|
||||
uint64_t pop = 0;
|
||||
double mean = 0.0;
|
||||
double sigma = 0.0;
|
||||
|
||||
uint64_t min, max, count;
|
||||
for (uint key = 0;
|
||||
isc_histo_get(hr, key, &min, &max, &count) == ISC_R_SUCCESS;
|
||||
isc_histo_next(hr, &key))
|
||||
isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
|
||||
isc_histo_next(hg, &key))
|
||||
{
|
||||
if (count == 0) { /* avoid division by zero */
|
||||
continue;
|
||||
|
|
@ -504,166 +477,112 @@ isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) {
|
|||
OUTARG(pm2, sqrt(sigma / pop));
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
/*
|
||||
* Clamped linear interpolation
|
||||
*
|
||||
* `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger
|
||||
* than 53, `outrange` can get rounded up to a power of 2, so we clamp the
|
||||
* result to keep within bounds (extra important when `max == UINT64_MAX`)
|
||||
*/
|
||||
static inline uint64_t
|
||||
lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) {
|
||||
double inrange = (double)(hi - lo);
|
||||
double inpart = (double)(in - lo);
|
||||
double outrange = (double)(max - min);
|
||||
double outpart = round(outrange * inpart / inrange);
|
||||
return (min + ISC_MIN((uint64_t)outpart, max - min));
|
||||
}
|
||||
|
||||
void
|
||||
isc_histosummary_create(isc_historead_t hr, isc_histosummary_t **hsp) {
|
||||
const isc_histo_t *hg = hr.hg;
|
||||
/*
|
||||
* There is non-zero space for the inner value, and it is inside the bounds
|
||||
*/
|
||||
static inline bool
|
||||
inside(uint64_t lo, uint64_t in, uint64_t hi) {
|
||||
return (lo < hi && lo <= in && in <= hi);
|
||||
}
|
||||
|
||||
isc_result_t
|
||||
isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
|
||||
uint64_t *value) {
|
||||
hg_bucket_t *chunk[CHUNKS];
|
||||
uint64_t total[CHUNKS];
|
||||
uint64_t rank[ISC_HISTO_MAXQUANTILES];
|
||||
|
||||
REQUIRE(HISTO_VALID(hg));
|
||||
REQUIRE(hsp != NULL);
|
||||
REQUIRE(*hsp == NULL);
|
||||
REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES);
|
||||
REQUIRE(fraction != NULL);
|
||||
REQUIRE(value != NULL);
|
||||
|
||||
uint chunksize = CHUNKSIZE(hg);
|
||||
hg_bucket_t *chunk[CHUNKS] = { NULL };
|
||||
const uint maxchunk = MAXCHUNK(hg);
|
||||
const uint chunksize = CHUNKSIZE(hg);
|
||||
|
||||
/*
|
||||
* First, find out which chunks we will copy across and how much
|
||||
* space they need. We take a copy of the chunk pointers because
|
||||
* concurrent threads may add new chunks before we have finished.
|
||||
* Find out which chunks exist and what their totals are. We take a
|
||||
* copy of the chunk pointers to reduce the need for atomic ops
|
||||
* later on. Scan from low to high so that higher buckets are more
|
||||
* likely to be in the CPU cache when we scan from high to low.
|
||||
*/
|
||||
uint size = 0;
|
||||
for (uint c = 0; c < CHUNKS; c++) {
|
||||
uint64_t population = 0;
|
||||
for (uint c = 0; c < maxchunk; c++) {
|
||||
chunk[c] = get_chunk(hg, c);
|
||||
total[c] = 0;
|
||||
if (chunk[c] != NULL) {
|
||||
size += chunksize;
|
||||
for (uint b = chunksize; b-- > 0;) {
|
||||
total[c] += bucket_count(&chunk[c][b]);
|
||||
}
|
||||
population += total[c];
|
||||
}
|
||||
}
|
||||
|
||||
isc_histosummary_t *hs =
|
||||
isc_mem_get(hg->mctx, STRUCT_FLEX_SIZE(hs, buckets, size));
|
||||
*hs = (isc_histosummary_t){
|
||||
.magic = HISTO_MAGIC,
|
||||
.sigbits = hg->sigbits,
|
||||
.size = size,
|
||||
};
|
||||
isc_mem_attach(hg->mctx, &hs->mctx);
|
||||
|
||||
/*
|
||||
* Second, copy the contents of the buckets. The copied pointers
|
||||
* are faster than get_key_count() because get_chunk()'s atomics
|
||||
* would require re-fetching the chunk pointer for every bucket.
|
||||
* Now we know the population, we can convert fractions to ranks.
|
||||
* Also ensure they are within bounds and in decreasing order.
|
||||
*/
|
||||
uint maxkey = 0;
|
||||
uint chunkbase = 0;
|
||||
for (uint c = 0; c < CHUNKS; c++) {
|
||||
if (chunk[c] == NULL) {
|
||||
continue;
|
||||
}
|
||||
hs->chunk[c] = &hs->buckets[chunkbase];
|
||||
chunkbase += chunksize;
|
||||
for (uint b = 0; b < chunksize; b++) {
|
||||
uint64_t count = atomic_load_relaxed(&chunk[c][b]);
|
||||
hs->chunk[c][b] = count;
|
||||
hs->total[c] += count;
|
||||
hs->population += count;
|
||||
maxkey = (count == 0) ? maxkey : chunksize * c + b;
|
||||
for (uint i = 0; i < size; i++) {
|
||||
REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0);
|
||||
REQUIRE(i == 0 || fraction[i - 1] > fraction[i]);
|
||||
rank[i] = round(fraction[i] * population);
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan chunks from high to low, keeping track of the bounds on
|
||||
* each chunk's ranks. Each time we match `rank[i]`, move on to the
|
||||
* next rank and continue the scan from the same place.
|
||||
*/
|
||||
uint i = 0;
|
||||
uint64_t chunk_lo = population;
|
||||
for (uint c = maxchunk; c-- > 0;) {
|
||||
uint64_t chunk_hi = chunk_lo;
|
||||
chunk_lo = chunk_hi - total[c];
|
||||
|
||||
/*
|
||||
* Scan buckets backwards within this chunk, in a similar
|
||||
* manner to the chunk scan. Skip all or part of the loop
|
||||
* if the current rank is not in the chunk.
|
||||
*/
|
||||
uint64_t bucket_lo = chunk_hi;
|
||||
for (uint b = chunksize;
|
||||
b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);)
|
||||
{
|
||||
uint64_t bucket_hi = bucket_lo;
|
||||
bucket_lo = bucket_hi - bucket_count(&chunk[c][b]);
|
||||
|
||||
/*
|
||||
* Convert all ranks that fall in this bucket.
|
||||
*/
|
||||
while (inside(bucket_lo, rank[i], bucket_hi)) {
|
||||
uint key = chunksize * c + b;
|
||||
value[i] = lerp(key_to_minval(hg, key),
|
||||
key_to_maxval(hg, key),
|
||||
bucket_lo, rank[i], bucket_hi);
|
||||
if (++i == size) {
|
||||
return (ISC_R_SUCCESS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
hs->maximum = key_to_maxval(hs, maxkey);
|
||||
|
||||
*hsp = hs;
|
||||
}
|
||||
|
||||
void
|
||||
isc_histosummary_destroy(isc_histosummary_t **hsp) {
|
||||
REQUIRE(hsp != NULL);
|
||||
REQUIRE(HISTO_VALID(*hsp));
|
||||
|
||||
isc_histosummary_t *hs = *hsp;
|
||||
*hsp = NULL;
|
||||
|
||||
isc_mem_putanddetach(&hs->mctx, hs,
|
||||
STRUCT_FLEX_SIZE(hs, buckets, hs->size));
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
|
||||
isc_result_t
|
||||
isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank,
|
||||
uint64_t *valuep) {
|
||||
REQUIRE(HISTO_VALID(hs));
|
||||
REQUIRE(valuep != NULL);
|
||||
|
||||
uint maxchunk = MAXCHUNK(hs);
|
||||
uint chunksize = CHUNKSIZE(hs);
|
||||
uint64_t count = 0;
|
||||
uint b, c;
|
||||
|
||||
if (rank > hs->population) {
|
||||
return (ISC_R_RANGE);
|
||||
}
|
||||
if (rank == hs->population) {
|
||||
*valuep = hs->maximum;
|
||||
return (ISC_R_SUCCESS);
|
||||
}
|
||||
|
||||
for (c = 0; c < maxchunk; c++) {
|
||||
count = hs->total[c];
|
||||
if (rank < count) {
|
||||
break;
|
||||
}
|
||||
rank -= count;
|
||||
}
|
||||
INSIST(c < maxchunk);
|
||||
|
||||
for (b = 0; b < chunksize; b++) {
|
||||
count = hs->chunk[c][b];
|
||||
if (rank < count) {
|
||||
break;
|
||||
}
|
||||
rank -= count;
|
||||
}
|
||||
INSIST(b < chunksize);
|
||||
|
||||
uint key = chunksize * c + b;
|
||||
uint64_t min = key_to_minval(hs, key);
|
||||
uint64_t max = key_to_maxval(hs, key);
|
||||
*valuep = min + interpolate(max - min, rank, count);
|
||||
|
||||
return (ISC_R_SUCCESS);
|
||||
}
|
||||
|
||||
void
|
||||
isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value,
|
||||
uint64_t *rankp) {
|
||||
REQUIRE(HISTO_VALID(hs));
|
||||
REQUIRE(rankp != NULL);
|
||||
|
||||
uint key = value_to_key(hs, value);
|
||||
uint chunksize = CHUNKSIZE(hs);
|
||||
uint kc = key / chunksize;
|
||||
uint kb = key % chunksize;
|
||||
uint64_t rank = 0;
|
||||
|
||||
for (uint c = 0; c < kc; c++) {
|
||||
rank += hs->total[c];
|
||||
}
|
||||
for (uint b = 0; b < kb; b++) {
|
||||
rank += hs->chunk[kc][b];
|
||||
}
|
||||
|
||||
uint64_t count = hs->chunk[kc][kb];
|
||||
uint64_t min = key_to_minval(hs, key);
|
||||
uint64_t max = key_to_maxval(hs, key);
|
||||
|
||||
*rankp = rank + interpolate(count, value - min, max - min);
|
||||
}
|
||||
|
||||
isc_result_t
|
||||
isc_histo_quantile(const isc_histosummary_t *hs, double p, uint64_t *valuep) {
|
||||
if (p < 0.0 || p > 1.0) {
|
||||
return (ISC_R_RANGE);
|
||||
}
|
||||
double rank = round(hs->population * p);
|
||||
return (isc_histo_value_at_rank(hs, (uint64_t)rank, valuep));
|
||||
}
|
||||
|
||||
void
|
||||
isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, double *pp) {
|
||||
uint64_t rank;
|
||||
isc_histo_rank_of_value(hs, value, &rank);
|
||||
*pp = (double)rank / (double)hs->population;
|
||||
return (ISC_R_UNSET);
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
|
|
|
|||
|
|
@ -28,17 +28,16 @@
|
|||
* The bits <-> digits functions convert betwen decimal significant
|
||||
* digits (as in scientific notation) and binary significant bits.
|
||||
*
|
||||
* At the low end (near zero) there is one value per bucket, then two
|
||||
* values, four, etc.
|
||||
*
|
||||
* You can use the `isc_histo_get()` function to export data from the
|
||||
* histogram. The range of a bucket is returned as its minimum and
|
||||
* maximum values, inclusive, i.e. a closed interval. Closed intervals
|
||||
* are more inconvenient than half-open intervals, and half-open
|
||||
* intervals are more common in C. We use closed intervals so we are
|
||||
* able to express the maximum of the last bucket, UINT64_MAX, and
|
||||
* because OpenMetrics histograms describe buckets as
|
||||
* less-than-or-equal to a particular value.
|
||||
* maximum values, inclusive, i.e. a closed interval. We use closed
|
||||
* intervals so we are able to express the maximum of the last bucket,
|
||||
* UINT64_MAX, although half-open intervals are more common in C.
|
||||
*
|
||||
* You can calculate some basic statistics directly from a histogram.
|
||||
* The `isc_histo_quantiles()` function can get a histogram's median,
|
||||
* 99th percentile, etc. The `isc_histo_moments()` function gets a
|
||||
* histogram's population, mean, and standard deviation.
|
||||
*
|
||||
* The size of a histogram depends on the range of values in the
|
||||
* stream of samples, not the number of samples. Bucket counters are
|
||||
|
|
@ -47,45 +46,32 @@
|
|||
* most 64 chunks, one for each bit of a 64 bit value. Histograms with
|
||||
* greater precision have larger chunks.
|
||||
*
|
||||
* The number of values that map to a bucket (1, 2, 4, 8, ...) is the
|
||||
* same in each chunk. Chunks 0 and 1 have one value per bucket, (see
|
||||
* `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2 values per bucket,
|
||||
* and they increase by a factor of 2 in each successive bucket.
|
||||
* At the low end (values near zero) there is one value per bucket,
|
||||
* then two values, four, eight, etc. The number of values that map to
|
||||
* a bucket is the same in each chunk. Chunks 0 and 1 have one value
|
||||
* per bucket, (see `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2
|
||||
* values per bucket, chunk 3 has 4, etc.
|
||||
*
|
||||
* The update cost is roughly constant and very small (not much more
|
||||
* than an atomic increment). It mostly depends on cache locality and
|
||||
* thread contention.
|
||||
*
|
||||
* To get statistical properties of a histogram (population, mean,
|
||||
* standard deviation, CDF, quantiles, ranks) you must first construct
|
||||
* an `isc_histosummary_t`. A summary is a read-only snapshot of a
|
||||
* histogram augmented with information for calculating statistics
|
||||
* more efficiently.
|
||||
*
|
||||
* There is no overflow checking for bucket counters. It takes a few
|
||||
* nanoseconds to add a sample to the histogram, so it would take at
|
||||
* least a few CPU-centuries to cause an overflow. Aggregate
|
||||
* statistics from a quarter of a million CPUs might overflow in a
|
||||
* day. (Provided that in both examples the CPUs are doing nothing
|
||||
* apart from repeatedly adding 1 to histogram buckets.)
|
||||
* There is no overflow checking for the 64 bit bucket counters. It
|
||||
* takes a few nanoseconds to add a sample to the histogram, so it
|
||||
* would take at least a few CPU-centuries to cause an overflow.
|
||||
* Aggregate statistics from a quarter of a million CPUs might
|
||||
* overflow in a day. (Provided that in both examples the CPUs are
|
||||
* doing nothing apart from repeatedly adding 1 to histogram buckets.)
|
||||
*/
|
||||
|
||||
typedef struct isc_histo isc_histo_t;
|
||||
typedef struct isc_histosummary isc_histosummary_t;
|
||||
typedef struct isc_histomulti isc_histomulti_t;
|
||||
typedef struct isc_histo isc_histo_t;
|
||||
typedef struct isc_histomulti isc_histomulti_t;
|
||||
|
||||
/*
|
||||
* For functions that can take either type.
|
||||
*/
|
||||
typedef union isc_historead {
|
||||
const isc_histo_t *hg;
|
||||
const isc_histosummary_t *hs;
|
||||
} isc_historead_t __attribute__((__transparent_union__));
|
||||
|
||||
#define ISC_HISTO_MINBITS 1
|
||||
#define ISC_HISTO_MAXBITS 18
|
||||
#define ISC_HISTO_MINDIGITS 1
|
||||
#define ISC_HISTO_MAXDIGITS 6
|
||||
#define ISC_HISTO_MINBITS 1
|
||||
#define ISC_HISTO_MAXBITS 18
|
||||
#define ISC_HISTO_MINDIGITS 1
|
||||
#define ISC_HISTO_MAXDIGITS 6
|
||||
#define ISC_HISTO_MAXQUANTILES 101 /* enough for all the percentiles */
|
||||
|
||||
/*
|
||||
* How many values map 1:1 to buckets for a given number of sigbits?
|
||||
|
|
@ -126,7 +112,7 @@ isc_histo_destroy(isc_histo_t **hgp);
|
|||
*/
|
||||
|
||||
uint
|
||||
isc_histo_sigbits(isc_historead_t hr);
|
||||
isc_histo_sigbits(isc_histo_t *hg);
|
||||
/*%<
|
||||
* Get the histogram's `sigbits` setting
|
||||
*
|
||||
|
|
@ -195,7 +181,7 @@ isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count);
|
|||
*/
|
||||
|
||||
isc_result_t
|
||||
isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
|
||||
isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
|
||||
uint64_t *countp);
|
||||
/*%<
|
||||
* Export information about a bucket.
|
||||
|
|
@ -218,7 +204,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
|
|||
* which can be zero.
|
||||
*
|
||||
* Requires:
|
||||
*\li `hr` is a pointer to a valid histogram or summary
|
||||
*\li `hg` is a pointer to a valid histogram
|
||||
*
|
||||
* Returns:
|
||||
*\li ISC_R_SUCCESS, if `key` is valid
|
||||
|
|
@ -226,7 +212,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
|
|||
*/
|
||||
|
||||
void
|
||||
isc_histo_next(isc_historead_t hr, uint *keyp);
|
||||
isc_histo_next(const isc_histo_t *hg, uint *keyp);
|
||||
/*%<
|
||||
* Skip to the next key, omitting chunks of unallocated buckets.
|
||||
*
|
||||
|
|
@ -245,12 +231,12 @@ isc_histo_next(isc_historead_t hr, uint *keyp);
|
|||
* }
|
||||
*
|
||||
* Requires:
|
||||
*\li `hr` is a pointer to a valid histogram or summary
|
||||
*\li `hg` is a pointer to a valid histogram
|
||||
*\li `keyp != NULL`
|
||||
*/
|
||||
|
||||
void
|
||||
isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
|
||||
isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source);
|
||||
/*%<
|
||||
* Increase the counts in `*ptarget` by the counts recorded in `source`
|
||||
*
|
||||
|
|
@ -264,7 +250,7 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
|
|||
* Requires:
|
||||
*\li `targetp != NULL`
|
||||
*\li `*targetp` is NULL or a pointer to a valid histogram
|
||||
*\li `source` is a pointer to a valid histogram or summary
|
||||
*\li `source` is a pointer to a valid histogram
|
||||
*
|
||||
* Ensures:
|
||||
*\li `*targetp` is a pointer to a valid histogram
|
||||
|
|
@ -307,7 +293,7 @@ isc_histomulti_destroy(isc_histomulti_t **hmp);
|
|||
*/
|
||||
|
||||
void
|
||||
isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source);
|
||||
isc_histomulti_merge(isc_histo_t **targetp, const isc_histomulti_t *source);
|
||||
/*%<
|
||||
* Increase the counts in `*targetp` by the counts recorded in `source`
|
||||
*
|
||||
|
|
@ -343,37 +329,9 @@ isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
|
|||
/**********************************************************************/
|
||||
|
||||
void
|
||||
isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp);
|
||||
isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, double *pm2);
|
||||
/*%<
|
||||
* Summarize a histogram for rank and quantile calculations.
|
||||
*
|
||||
* Requires:
|
||||
*\li `hg` is a pointer to a valid histogram
|
||||
*\li `hsp != NULL`
|
||||
*\li `*hsp == NULL`
|
||||
*
|
||||
* Ensures:
|
||||
*\li `*hsp` is a pointer to a histogram summary
|
||||
*/
|
||||
|
||||
void
|
||||
isc_histosummary_destroy(isc_histosummary_t **hsp);
|
||||
/*%<
|
||||
* Destroy a histogram summary
|
||||
*
|
||||
* Requires:
|
||||
*\li `hsp != NULL`
|
||||
*\li `*hsp` is a pointer to a valid histogram summary
|
||||
*
|
||||
* Ensures:
|
||||
*\li all memory allocated by the summary has been released
|
||||
*\li `*hsp == NULL`
|
||||
*/
|
||||
|
||||
void
|
||||
isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2);
|
||||
/*%<
|
||||
* Get the population, mean, and standard deviation of a histogram
|
||||
* Get the population, mean, and standard deviation of a histogram.
|
||||
*
|
||||
* If `pm0` is non-NULL it is set to the population of the histogram.
|
||||
* (Strictly speaking, the zeroth moment is `pop / pop == 1`.)
|
||||
|
|
@ -385,99 +343,49 @@ isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2);
|
|||
* recorded data. The standard deviation is the square root of the
|
||||
* variance, which is the second moment about the mean.
|
||||
*
|
||||
* It is safe if the histogram is concurrently modified.
|
||||
*
|
||||
* Requires:
|
||||
*\li `hr` is a pointer to a valid histogram or summary
|
||||
*\li `hg` is a pointer to a valid histogram
|
||||
*/
|
||||
|
||||
isc_result_t
|
||||
isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank,
|
||||
uint64_t *valuep);
|
||||
/*%<
|
||||
* Get the approximate value at a given rank in the recorded data.
|
||||
*
|
||||
* The value at rank 0 is the minimum of the bucket containing the
|
||||
* smallest value added to the histogram.
|
||||
*
|
||||
* The value at rank equal to the population is the maximum of the
|
||||
* bucket containing the largest value added to the histogram.
|
||||
*
|
||||
* Greater ranks return a range error.
|
||||
*
|
||||
* Note: this function is slow for high-precision histograms
|
||||
* (more than 3 significant digits).
|
||||
*
|
||||
* Requires:
|
||||
*\li `hs` is a pointer to a valid histogram summary
|
||||
*\li `valuep != NULL`
|
||||
*
|
||||
* Returns:
|
||||
*\li ISC_R_SUCCESS, if `rank` is within bounds
|
||||
*\li ISC_R_RANGE, otherwise
|
||||
*/
|
||||
|
||||
void
|
||||
isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value,
|
||||
uint64_t *rankp);
|
||||
/*%<
|
||||
* Get the approximate rank of a value in the recorded data.
|
||||
*
|
||||
* You can query the rank of any value.
|
||||
*
|
||||
* Note: this function is slow for high-precision histograms
|
||||
* (more than 3 significant digits).
|
||||
*
|
||||
* Requires:
|
||||
*\li `hs` is a pointer to a valid histogram summary
|
||||
*\li `rankp != NULL`
|
||||
*/
|
||||
|
||||
isc_result_t
|
||||
isc_histo_quantile(const isc_histosummary_t *hs, double proportion,
|
||||
uint64_t *valuep);
|
||||
isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
|
||||
uint64_t *value);
|
||||
/*%<
|
||||
* The quantile function (aka inverse cumulative distribution function)
|
||||
* of the histogram. What value is greater than the given proportion of
|
||||
* of the histogram. What value is greater than the given fraction of
|
||||
* the population?
|
||||
*
|
||||
* A proportion of 0.5 gets the median value: it is greater than half
|
||||
* A fraction of 0.5 gets the median value: it is greater than half
|
||||
* the population. 0.75 gets the third quartile value, and 0.99 gets
|
||||
* the 99th percentile value. The proportion should be between 0.0 and
|
||||
* 1.0 inclusive.
|
||||
* the 99th percentile value. The fraction must be between 0.0 and 1.0
|
||||
* inclusive.
|
||||
*
|
||||
* https://enwp.org/Quantile_function
|
||||
*
|
||||
* Note: this function is slow for high-precision histograms
|
||||
* (more than 3 significant digits).
|
||||
* This implementation allows you to query quantile values for
|
||||
* multiple fractions in one function call. Internally, it makes one
|
||||
* linear scan over the histogram's buckets to find all the fractions.
|
||||
* Buckets are scanned from high to low, so that querying large
|
||||
* quantiles is more efficient. The `fraction` array must be sorted in
|
||||
* decreasing order. The results are stored in the `value` array. Both
|
||||
* arrays have `size` elements.
|
||||
*
|
||||
* The results may be nonsense if the histogram is concurrently
|
||||
* modified. To get a stable copy you can call `isc_histo_merge()`.
|
||||
*
|
||||
* Requires:
|
||||
*\li `hs` is a pointer to a valid histogram summary
|
||||
*\li `valuep != NULL`
|
||||
*\li `hg` is a pointer to a valid histogram
|
||||
*\li `0 < size && size <= ISC_HISTO_MAXQUANTILES`
|
||||
*\li `fraction != NULL`
|
||||
*\li `value != NULL`
|
||||
*\li `0.0 <= fraction[i] && fraction[i] <= 1.0` for every element
|
||||
*\li `fraction[i - 1] > fraction[i]` for every pair of elements
|
||||
*
|
||||
* Returns:
|
||||
*\li ISC_R_SUCCESS, if the proportion is in bounds
|
||||
*\li ISC_R_RANGE, otherwise
|
||||
*/
|
||||
|
||||
void
|
||||
isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value,
|
||||
double *proportionp);
|
||||
/*%<
|
||||
* The cumulative distribution function of the histogram. Given a
|
||||
* value, what proportion of the population have smaller values?
|
||||
* You can query any value.
|
||||
*
|
||||
* If the value is the median, the proportion is 0.5. The proportion
|
||||
* of the third quartile value is 0.75, and the proportion of the 99th
|
||||
* percentile value is 0.99.
|
||||
*
|
||||
* https://enwp.org/Cumulative_distribution_function
|
||||
*
|
||||
* Note: this function is slow for high-precision histograms
|
||||
* (more than 3 significant digits).
|
||||
*
|
||||
* Requires:
|
||||
*\li `hs` is a pointer to a valid histogram summary
|
||||
*\li `proportionp != NULL`
|
||||
*\li ISC_R_SUCCESS, if results were stored in the `value` array
|
||||
*\li ISC_R_UNSET, if the histogram is empty
|
||||
*/
|
||||
|
||||
/**********************************************************************/
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@
|
|||
|
||||
#define TIME_LIMIT (123 * NS_PER_MS)
|
||||
|
||||
#define SUBRANGE 69
|
||||
|
||||
#if VERBOSE
|
||||
|
||||
#define TRACE(fmt, ...) \
|
||||
|
|
@ -117,14 +119,22 @@ ISC_RUN_TEST_IMPL(basics) {
|
|||
prev_max = max;
|
||||
key++;
|
||||
result = isc_histo_get(hg, key, &min, &max, &count);
|
||||
|
||||
/* these tests can be slow */
|
||||
if (isc_time_monotonic() > start + TIME_LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* last bucket goes up to last possible value */
|
||||
assert_int_equal(max, UINT64_MAX);
|
||||
/* if we did not stop early */
|
||||
if (result != ISC_R_SUCCESS) {
|
||||
/* last bucket goes up to last possible value */
|
||||
assert_int_equal(max, UINT64_MAX);
|
||||
|
||||
double pop;
|
||||
isc_histo_moments(hg, &pop, NULL, NULL);
|
||||
assert_int_equal((uint64_t)pop, key * 8);
|
||||
double pop;
|
||||
isc_histo_moments(hg, &pop, NULL, NULL);
|
||||
assert_int_equal((uint64_t)pop, key * 8);
|
||||
}
|
||||
|
||||
isc_histo_destroy(&hg);
|
||||
|
||||
|
|
@ -132,6 +142,86 @@ ISC_RUN_TEST_IMPL(basics) {
|
|||
}
|
||||
}
|
||||
|
||||
ISC_RUN_TEST_IMPL(quantiles) {
|
||||
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
|
||||
isc_result_t result;
|
||||
uint64_t min, max, count;
|
||||
double pop;
|
||||
uint key;
|
||||
|
||||
isc_nanosecs_t start = isc_time_monotonic();
|
||||
|
||||
isc_histo_t *hg = NULL;
|
||||
isc_histo_create(mctx, bits, &hg);
|
||||
|
||||
for (key = 0; isc_histo_get(hg, key, &min, &max, &count) ==
|
||||
ISC_R_SUCCESS;
|
||||
key++)
|
||||
{
|
||||
/* inc twice so we can check bucket's midpoint */
|
||||
assert_int_equal(count, 0);
|
||||
isc_histo_inc(hg, min);
|
||||
isc_histo_inc(hg, max);
|
||||
}
|
||||
|
||||
const uint buckets = key;
|
||||
|
||||
/* no incs were lost */
|
||||
isc_histo_moments(hg, &pop, NULL, NULL);
|
||||
assert_float_equal(pop, buckets * 2, 0.5);
|
||||
|
||||
/* two ranks per bucket */
|
||||
const uint quantum = ISC_HISTO_MAXQUANTILES / 2 - 1;
|
||||
uint64_t value[ISC_HISTO_MAXQUANTILES];
|
||||
double frac[ISC_HISTO_MAXQUANTILES];
|
||||
uint base = 0;
|
||||
|
||||
for (key = 0; key < buckets; key++) {
|
||||
/* fill in the values one quantum at a time */
|
||||
if (key == 0 || key % quantum == buckets % quantum) {
|
||||
base = key;
|
||||
for (uint k = 0; k < quantum; k++) {
|
||||
double rank = (base + k) * 2;
|
||||
uint i = (quantum - k) * 2;
|
||||
frac[i - 1] = (rank + 1.0) / pop;
|
||||
frac[i - 0] = rank / pop;
|
||||
}
|
||||
frac[0] = (base + quantum) * 2 / pop;
|
||||
result = isc_histo_quantiles(
|
||||
hg, quantum * 2 + 1, frac, value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
}
|
||||
|
||||
result = isc_histo_get(hg, key, &min, &max, &count);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
assert_int_equal(count, 2);
|
||||
|
||||
uint64_t lomin = min == 0 ? min : min - 1;
|
||||
uint64_t himin = min;
|
||||
uint64_t lomid = floor(min / 2.0 + max / 2.0);
|
||||
uint64_t himid = ceil(min / 2.0 + max / 2.0);
|
||||
uint64_t lomax = max;
|
||||
uint64_t himax = max == UINT64_MAX ? max : max + 1;
|
||||
|
||||
uint i = (quantum + base - key) * 2;
|
||||
|
||||
/* check fenceposts */
|
||||
assert_in_range(value[i - 0], lomin, himin);
|
||||
assert_in_range(value[i - 1], lomid, himid);
|
||||
assert_in_range(value[i - 2], lomax, himax);
|
||||
|
||||
/* these tests can be slow */
|
||||
if (isc_time_monotonic() > start + TIME_LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
isc_histo_destroy(&hg);
|
||||
|
||||
TRACETIME("");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ensure relative error is as expected
|
||||
*/
|
||||
|
|
@ -183,106 +273,26 @@ ISC_RUN_TEST_IMPL(sigfigs) {
|
|||
}
|
||||
}
|
||||
|
||||
ISC_RUN_TEST_IMPL(summary) {
|
||||
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
|
||||
isc_result_t result;
|
||||
uint64_t min, max, count, value, rank, lorank, hirank;
|
||||
double pop;
|
||||
uint key;
|
||||
|
||||
isc_nanosecs_t start = isc_time_monotonic();
|
||||
|
||||
isc_histo_t *hg = NULL;
|
||||
isc_histo_create(mctx, bits, &hg);
|
||||
|
||||
for (key = 0; isc_histo_get(hg, key, &min, &max, &count) ==
|
||||
ISC_R_SUCCESS;
|
||||
key++)
|
||||
{
|
||||
/* inc twice so we can check bucket's midpoint */
|
||||
assert_int_equal(count, 0);
|
||||
isc_histo_inc(hg, min);
|
||||
isc_histo_inc(hg, max);
|
||||
}
|
||||
|
||||
isc_histosummary_t *hs = NULL;
|
||||
isc_histosummary_create(hg, &hs);
|
||||
|
||||
/* no incs were lost */
|
||||
isc_histo_moments(hg, &pop, NULL, NULL);
|
||||
assert_float_equal(pop, 2 * key, 0.5);
|
||||
|
||||
isc_histo_destroy(&hg);
|
||||
|
||||
for (key = 0; isc_histo_get(hs, key, &min, &max, &count) ==
|
||||
ISC_R_SUCCESS;
|
||||
isc_histo_next(hs, &key))
|
||||
{
|
||||
uint64_t lomin = min == 0 ? min : min - 1;
|
||||
uint64_t himin = min;
|
||||
uint64_t lomid = floor(min / 2.0 + max / 2.0);
|
||||
uint64_t himid = ceil(min / 2.0 + max / 2.0);
|
||||
uint64_t lomax = max;
|
||||
uint64_t himax = max == UINT64_MAX ? max : max + 1;
|
||||
|
||||
assert_int_equal(count, 2);
|
||||
|
||||
rank = key * 2;
|
||||
|
||||
/* check fenceposts */
|
||||
result = isc_histo_value_at_rank(hs, rank, &value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
assert_in_range(value, lomin, himin);
|
||||
result = isc_histo_value_at_rank(hs, rank + 1, &value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
assert_in_range(value, lomid, himid);
|
||||
result = isc_histo_value_at_rank(hs, rank + 2, &value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
assert_in_range(value, lomax, himax);
|
||||
|
||||
isc_histo_rank_of_value(hs, min, &rank);
|
||||
assert_int_equal(rank, key * 2);
|
||||
|
||||
/* only if the bucket covers enough distinct values */
|
||||
|
||||
if (min < lomid) {
|
||||
rank = key * 2 + 1;
|
||||
isc_histo_rank_of_value(hs, lomid, &lorank);
|
||||
isc_histo_rank_of_value(hs, himid, &hirank);
|
||||
assert_in_range(rank, lorank, hirank);
|
||||
}
|
||||
|
||||
if (himid < max) {
|
||||
rank = key * 2 + 2;
|
||||
isc_histo_rank_of_value(hs, lomax, &lorank);
|
||||
isc_histo_rank_of_value(hs, himax, &hirank);
|
||||
assert_in_range(rank, lorank, hirank);
|
||||
}
|
||||
|
||||
/* these tests can be slow */
|
||||
if (isc_time_monotonic() > start + TIME_LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
isc_histosummary_destroy(&hs);
|
||||
|
||||
TRACETIME("");
|
||||
}
|
||||
}
|
||||
|
||||
ISC_RUN_TEST_IMPL(subrange) {
|
||||
for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
|
||||
isc_result_t result;
|
||||
uint64_t min, max, count, value;
|
||||
uint64_t min, max, count;
|
||||
|
||||
isc_nanosecs_t start = isc_time_monotonic();
|
||||
|
||||
isc_histo_t *hg = NULL;
|
||||
isc_histo_create(mctx, bits, &hg);
|
||||
|
||||
uint buckets = 64;
|
||||
for (uint key = 0, top = buckets - 1;; key++, top++) {
|
||||
uint64_t value[SUBRANGE + 1];
|
||||
double frac[SUBRANGE + 1];
|
||||
for (uint i = 0; i <= SUBRANGE; i++) {
|
||||
frac[i] = (double)(SUBRANGE - i) / (double)(SUBRANGE);
|
||||
}
|
||||
|
||||
result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac, value);
|
||||
assert_int_equal(result, ISC_R_UNSET);
|
||||
|
||||
for (uint key = 0, top = SUBRANGE - 1;; key++, top++) {
|
||||
if (isc_histo_get(hg, key, &min, NULL, NULL) !=
|
||||
ISC_R_SUCCESS)
|
||||
{
|
||||
|
|
@ -293,27 +303,28 @@ ISC_RUN_TEST_IMPL(subrange) {
|
|||
{
|
||||
break;
|
||||
}
|
||||
isc_histo_put(hg, min, max, buckets);
|
||||
/*
|
||||
* If we try adding more than one sample per bucket
|
||||
* here, the test fails when buckets have different
|
||||
* sizes because [min,max] spans multiple chunks.
|
||||
*/
|
||||
isc_histo_put(hg, min, max, SUBRANGE);
|
||||
|
||||
isc_histosummary_t *hs = NULL;
|
||||
isc_histosummary_create(hg, &hs);
|
||||
result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac,
|
||||
value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
|
||||
for (uint bucket = 0; bucket < buckets; bucket++) {
|
||||
for (uint bucket = 0; bucket < SUBRANGE; bucket++) {
|
||||
result = isc_histo_get(hg, key + bucket, &min,
|
||||
&max, &count);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
/* did isc_histo_put() spread evenly? */
|
||||
assert_int_equal(count, 1);
|
||||
result = isc_histo_value_at_rank(hs, bucket,
|
||||
&value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
assert_int_equal(value, min);
|
||||
/* do the quantile values match? */
|
||||
assert_int_equal(value[SUBRANGE - bucket], min);
|
||||
}
|
||||
result = isc_histo_value_at_rank(hs, buckets, &value);
|
||||
assert_int_equal(result, ISC_R_SUCCESS);
|
||||
assert_int_equal(value, max);
|
||||
assert_int_equal(value[0], max);
|
||||
|
||||
isc_histosummary_destroy(&hs);
|
||||
isc_histo_destroy(&hg);
|
||||
isc_histo_create(mctx, bits, &hg);
|
||||
|
||||
|
|
@ -331,8 +342,8 @@ ISC_RUN_TEST_IMPL(subrange) {
|
|||
ISC_TEST_LIST_START
|
||||
|
||||
ISC_TEST_ENTRY(basics)
|
||||
ISC_TEST_ENTRY(quantiles)
|
||||
ISC_TEST_ENTRY(sigfigs)
|
||||
ISC_TEST_ENTRY(summary)
|
||||
ISC_TEST_ENTRY(subrange)
|
||||
|
||||
ISC_TEST_LIST_END
|
||||
|
|
|
|||
Loading…
Reference in a new issue