Simplify histogram quantiles

The `isc_histosummary_t` functions were written in the early days of `hg64` and carried over when I brought `hg64` into BIND. They were intended to be useful for graphing cumulative frequency distributions and the like, but in practice whatever draws charts is better off with a raw histogram export. Especially because of the poor performance of the old functions. The replacement `isc_histo_quantiles()` function is intended for providing a few quantile values in BIND's stats channel, when the user does not want the full histogram. Unlike the old functions, the caller provides all the query fractions up-front, so that the values can be found in a single scan instead of a scan per value. The scan is from larger values to smaller, since larger quantiles are usually more interesting, so the scan can bail out early.
2026-04-21 14:17:27 -04:00 · 2023-03-20 15:05:36 +00:00 · 2023-03-20 15:05:36 +00:00 · cd0e7f853a
commit cd0e7f853a
parent bc2389b828
3 changed files with 326 additions and 488 deletions
--- a/lib/isc/histo.c
+++ b/lib/isc/histo.c
@ -28,12 +28,22 @@
 #include <isc/tid.h>

 /*
- * XXXFANF to be added to isc/util.h by a commmit in a qp-trie
+ * XXXFANF to be added to <isc/util.h> by a commmit in a qp-trie
 * feature branch
 */
 #define STRUCT_FLEX_SIZE(pointer, member, count) \
 	(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))

+/*
+ * XXXFANF this should probably be in <isc/util.h> too
+ */
+#define OUTARG(ptr, val)                \
+	({                              \
+		if ((ptr) != NULL) {    \
+			*(ptr) = (val); \
+		}                       \
+	})
+
 #define HISTO_MAGIC	    ISC_MAGIC('H', 's', 't', 'o')
 #define HISTO_VALID(p)	    ISC_MAGIC_VALID(p, HISTO_MAGIC)
 #define HISTOMULTI_MAGIC    ISC_MAGIC('H', 'g', 'M', 't')
@ -72,38 +82,13 @@
 typedef atomic_uint_fast64_t hg_bucket_t;
 typedef atomic_ptr(hg_bucket_t) hg_chunk_t;

-#define ISC_HISTO_FIELDS \
-	uint magic;      \
-	uint sigbits;    \
-	isc_mem_t *mctx
-
 struct isc_histo {
-	ISC_HISTO_FIELDS;
-	/* chunk array must be first after common fields */
+	uint magic;
+	uint sigbits;
+	isc_mem_t *mctx;
 	hg_chunk_t chunk[CHUNKS];
 };

-/*
- * To convert between ranks and values, we scan the histogram to find the
- * required rank. Each per-chunk total contains the sum of all the buckets
- * in that chunk, so we can scan a chunk at a time rather than a bucket at
- * a time.
- *
- * XXXFANF When `sigbits` is large, the chunks get large and slow to scan.
- * If this turns out to be a problem, we could store ranks as well as
- * values in the summary, and use a binary search.
- */
-struct isc_histosummary {
-	ISC_HISTO_FIELDS;
-	/* chunk array must be first after common fields */
-	uint64_t *chunk[CHUNKS];
-	uint64_t total[CHUNKS];
-	uint64_t population;
-	uint64_t maximum;
-	size_t size;
-	uint64_t buckets[];
-};
-
 struct isc_histomulti {
 	uint magic;
 	uint size;
@ -112,21 +97,6 @@ struct isc_histomulti {

 /**********************************************************************/

-#define OUTARG(ptr, val)                \
-	({                              \
-		if ((ptr) != NULL) {    \
-			*(ptr) = (val); \
-		}                       \
-	})
-
-static inline uint64_t
-interpolate(uint64_t span, uint64_t mul, uint64_t div) {
-	double frac = div > 0 ? (double)mul / (double)div : mul > 0 ? 1 : 0;
-	return ((uint64_t)round(span * frac));
-}
-
-/**********************************************************************/
-
 void
 isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) {
 	REQUIRE(sigbits >= ISC_HISTO_MINBITS);
@ -163,9 +133,9 @@ isc_histo_destroy(isc_histo_t **hgp) {
 /**********************************************************************/

 uint
-isc_histo_sigbits(isc_historead_t hr) {
-	REQUIRE(HISTO_VALID(hr.hg));
-	return (hr.hg->sigbits);
+isc_histo_sigbits(isc_histo_t *hg) {
+	REQUIRE(HISTO_VALID(hg));
+	return (hg->sigbits);
 }

 /*
@ -221,11 +191,11 @@ isc_histo_digits_to_bits(uint digits) {
 *
 * This branchless conversion is due to Paul Khuong: see bin_down_of() in
 * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/
+ *
+ * This function is in the `isc_histo_inc()` fast path.
 */
 static inline uint
-value_to_key(isc_historead_t hr, uint64_t value) {
-	/* fast path */
-	const isc_histo_t *hg = hr.hg;
+value_to_key(const isc_histo_t *hg, uint64_t value) {
 	/* ensure that denormal numbers are all in chunk zero */
 	uint64_t chunked = value | CHUNKSIZE(hg);
 	int clz = __builtin_clzll((unsigned long long)(chunked));
@ -259,16 +229,16 @@ value_to_key(isc_historead_t hr, uint64_t value) {
 */

 static inline uint64_t
-key_to_minval(isc_historead_t hr, uint key) {
-	uint chunksize = CHUNKSIZE(hr.hg);
+key_to_minval(const isc_histo_t *hg, uint key) {
+	uint chunksize = CHUNKSIZE(hg);
 	uint exponent = (key / chunksize) - 1;
 	uint64_t mantissa = (key % chunksize) + chunksize;
 	return (key < chunksize ? key : mantissa << exponent);
 }

 static inline uint64_t
-key_to_maxval(isc_historead_t hr, uint key) {
-	return (key_to_minval(hr, key + 1) - 1);
+key_to_maxval(const isc_histo_t *hg, uint key) {
+	return (key_to_minval(hg, key + 1) - 1);
 }

 /**********************************************************************/
@ -293,29 +263,33 @@ key_to_new_bucket(isc_histo_t *hg, uint key) {
 }

 static hg_bucket_t *
-get_chunk(isc_historead_t hr, uint chunk) {
-	const hg_chunk_t *cpp = &hr.hg->chunk[chunk];
-	return (atomic_load_acquire(cpp));
+get_chunk(const isc_histo_t *hg, uint chunk) {
+	return (atomic_load_acquire(&hg->chunk[chunk]));
 }

 static inline hg_bucket_t *
-key_to_bucket(isc_historead_t hr, uint key) {
+key_to_bucket(const isc_histo_t *hg, uint key) {
 	/* fast path */
-	uint chunksize = CHUNKSIZE(hr.hg);
+	uint chunksize = CHUNKSIZE(hg);
 	uint chunk = key / chunksize;
 	uint bucket = key % chunksize;
-	hg_bucket_t *cp = get_chunk(hr, chunk);
+	hg_bucket_t *cp = get_chunk(hg, chunk);
 	return (cp == NULL ? NULL : &cp[bucket]);
 }

 static inline uint64_t
-get_key_count(isc_historead_t hr, uint key) {
-	hg_bucket_t *bp = key_to_bucket(hr, key);
+bucket_count(const hg_bucket_t *bp) {
 	return (bp == NULL ? 0 : atomic_load_relaxed(bp));
 }

+static inline uint64_t
+get_key_count(const isc_histo_t *hg, uint key) {
+	return (bucket_count(key_to_bucket(hg, key)));
+}
+
 static inline void
 add_key_count(isc_histo_t *hg, uint key, uint64_t inc) {
+	/* fast path */
 	if (inc > 0) {
 		hg_bucket_t *bp = key_to_bucket(hg, key);
 		bp = bp != NULL ? bp : key_to_new_bucket(hg, key);
@ -355,14 +329,14 @@ isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) {
 }

 isc_result_t
-isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
+isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
 	      uint64_t *countp) {
-	REQUIRE(HISTO_VALID(hr.hg));
+	REQUIRE(HISTO_VALID(hg));

-	if (key < BUCKETS(hr.hg)) {
-		OUTARG(minp, key_to_minval(hr, key));
-		OUTARG(maxp, key_to_maxval(hr, key));
-		OUTARG(countp, get_key_count(hr, key));
+	if (key < BUCKETS(hg)) {
+		OUTARG(minp, key_to_minval(hg, key));
+		OUTARG(maxp, key_to_maxval(hg, key));
+		OUTARG(countp, get_key_count(hg, key));
 		return (ISC_R_SUCCESS);
 	} else {
 		return (ISC_R_RANGE);
@ -370,9 +344,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
 }

 void
-isc_histo_next(isc_historead_t hr, uint *keyp) {
-	const isc_histo_t *hg = hr.hg;
-
+isc_histo_next(const isc_histo_t *hg, uint *keyp) {
 	REQUIRE(HISTO_VALID(hg));
 	REQUIRE(keyp != NULL);

@ -382,7 +354,7 @@ isc_histo_next(isc_historead_t hr, uint *keyp) {

 	key++;
 	while (key < buckets && key % chunksize == 0 &&
-	       key_to_bucket(hr, key) == NULL)
+	       key_to_bucket(hg, key) == NULL)
 	{
 		key += chunksize;
 	}
@ -390,14 +362,14 @@ isc_histo_next(isc_historead_t hr, uint *keyp) {
 }

 void
-isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) {
-	REQUIRE(HISTO_VALID(source.hg));
+isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) {
+	REQUIRE(HISTO_VALID(source));
 	REQUIRE(targetp != NULL);

 	if (*targetp != NULL) {
 		REQUIRE(HISTO_VALID(*targetp));
 	} else {
-		isc_histo_create(source.hg->mctx, source.hg->sigbits, targetp);
+		isc_histo_create(source->mctx, source->sigbits, targetp);
 	}

 	uint64_t min, max, count;
@ -450,7 +422,7 @@ isc_histomulti_destroy(isc_histomulti_t **hmp) {
 }

 void
-isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) {
+isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) {
 	REQUIRE(HISTOMULTI_VALID(hm));

 	for (uint i = 0; i < hm->size; i++) {
@ -477,17 +449,18 @@ isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
 * equation 4 (incremental mean) and equation 44 (incremental variance)
 */
 void
-isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) {
-	REQUIRE(HISTO_VALID(hr.hg));
+isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1,
+		  double *pm2) {
+	REQUIRE(HISTO_VALID(hg));

-	double pop = 0.0;
+	uint64_t pop = 0;
 	double mean = 0.0;
 	double sigma = 0.0;

 	uint64_t min, max, count;
 	for (uint key = 0;
-	     isc_histo_get(hr, key, &min, &max, &count) == ISC_R_SUCCESS;
-	     isc_histo_next(hr, &key))
+	     isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
+	     isc_histo_next(hg, &key))
 	{
 		if (count == 0) { /* avoid division by zero */
 			continue;
@ -504,166 +477,112 @@ isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) {
 	OUTARG(pm2, sqrt(sigma / pop));
 }

-/**********************************************************************/
+/*
+ * Clamped linear interpolation
+ *
+ * `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger
+ * than 53, `outrange` can get rounded up to a power of 2, so we clamp the
+ * result to keep within bounds (extra important when `max == UINT64_MAX`)
+ */
+static inline uint64_t
+lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) {
+	double inrange = (double)(hi - lo);
+	double inpart = (double)(in - lo);
+	double outrange = (double)(max - min);
+	double outpart = round(outrange * inpart / inrange);
+	return (min + ISC_MIN((uint64_t)outpart, max - min));
+}

-void
-isc_histosummary_create(isc_historead_t hr, isc_histosummary_t **hsp) {
-	const isc_histo_t *hg = hr.hg;
+/*
+ * There is non-zero space for the inner value, and it is inside the bounds
+ */
+static inline bool
+inside(uint64_t lo, uint64_t in, uint64_t hi) {
+	return (lo < hi && lo <= in && in <= hi);
+}
+
+isc_result_t
+isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
+		    uint64_t *value) {
+	hg_bucket_t *chunk[CHUNKS];
+	uint64_t total[CHUNKS];
+	uint64_t rank[ISC_HISTO_MAXQUANTILES];

 	REQUIRE(HISTO_VALID(hg));
-	REQUIRE(hsp != NULL);
-	REQUIRE(*hsp == NULL);
+	REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES);
+	REQUIRE(fraction != NULL);
+	REQUIRE(value != NULL);

-	uint chunksize = CHUNKSIZE(hg);
-	hg_bucket_t *chunk[CHUNKS] = { NULL };
+	const uint maxchunk = MAXCHUNK(hg);
+	const uint chunksize = CHUNKSIZE(hg);

 	/*
-	 * First, find out which chunks we will copy across and how much
-	 * space they need. We take a copy of the chunk pointers because
-	 * concurrent threads may add new chunks before we have finished.
+	 * Find out which chunks exist and what their totals are. We take a
+	 * copy of the chunk pointers to reduce the need for atomic ops
+	 * later on. Scan from low to high so that higher buckets are more
+	 * likely to be in the CPU cache when we scan from high to low.
 	 */
-	uint size = 0;
-	for (uint c = 0; c < CHUNKS; c++) {
+	uint64_t population = 0;
+	for (uint c = 0; c < maxchunk; c++) {
 		chunk[c] = get_chunk(hg, c);
+		total[c] = 0;
 		if (chunk[c] != NULL) {
-			size += chunksize;
+			for (uint b = chunksize; b-- > 0;) {
+				total[c] += bucket_count(&chunk[c][b]);
+			}
+			population += total[c];
 		}
 	}

-	isc_histosummary_t *hs =
-		isc_mem_get(hg->mctx, STRUCT_FLEX_SIZE(hs, buckets, size));
-	*hs = (isc_histosummary_t){
-		.magic = HISTO_MAGIC,
-		.sigbits = hg->sigbits,
-		.size = size,
-	};
-	isc_mem_attach(hg->mctx, &hs->mctx);
-
 	/*
-	 * Second, copy the contents of the buckets. The copied pointers
-	 * are faster than get_key_count() because get_chunk()'s atomics
-	 * would require re-fetching the chunk pointer for every bucket.
+	 * Now we know the population, we can convert fractions to ranks.
+	 * Also ensure they are within bounds and in decreasing order.
 	 */
-	uint maxkey = 0;
-	uint chunkbase = 0;
-	for (uint c = 0; c < CHUNKS; c++) {
-		if (chunk[c] == NULL) {
-			continue;
-		}
-		hs->chunk[c] = &hs->buckets[chunkbase];
-		chunkbase += chunksize;
-		for (uint b = 0; b < chunksize; b++) {
-			uint64_t count = atomic_load_relaxed(&chunk[c][b]);
-			hs->chunk[c][b] = count;
-			hs->total[c] += count;
-			hs->population += count;
-			maxkey = (count == 0) ? maxkey : chunksize * c + b;
+	for (uint i = 0; i < size; i++) {
+		REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0);
+		REQUIRE(i == 0 || fraction[i - 1] > fraction[i]);
+		rank[i] = round(fraction[i] * population);
+	}
+
+	/*
+	 * Scan chunks from high to low, keeping track of the bounds on
+	 * each chunk's ranks. Each time we match `rank[i]`, move on to the
+	 * next rank and continue the scan from the same place.
+	 */
+	uint i = 0;
+	uint64_t chunk_lo = population;
+	for (uint c = maxchunk; c-- > 0;) {
+		uint64_t chunk_hi = chunk_lo;
+		chunk_lo = chunk_hi - total[c];
+
+		/*
+		 * Scan buckets backwards within this chunk, in a similar
+		 * manner to the chunk scan. Skip all or part of the loop
+		 * if the current rank is not in the chunk.
+		 */
+		uint64_t bucket_lo = chunk_hi;
+		for (uint b = chunksize;
+		     b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);)
+		{
+			uint64_t bucket_hi = bucket_lo;
+			bucket_lo = bucket_hi - bucket_count(&chunk[c][b]);
+
+			/*
+			 * Convert all ranks that fall in this bucket.
+			 */
+			while (inside(bucket_lo, rank[i], bucket_hi)) {
+				uint key = chunksize * c + b;
+				value[i] = lerp(key_to_minval(hg, key),
+						key_to_maxval(hg, key),
+						bucket_lo, rank[i], bucket_hi);
+				if (++i == size) {
+					return (ISC_R_SUCCESS);
+				}
+			}
 		}
 	}
-	hs->maximum = key_to_maxval(hs, maxkey);

-	*hsp = hs;
-}
-
-void
-isc_histosummary_destroy(isc_histosummary_t **hsp) {
-	REQUIRE(hsp != NULL);
-	REQUIRE(HISTO_VALID(*hsp));
-
-	isc_histosummary_t *hs = *hsp;
-	*hsp = NULL;
-
-	isc_mem_putanddetach(&hs->mctx, hs,
-			     STRUCT_FLEX_SIZE(hs, buckets, hs->size));
-}
-
-/**********************************************************************/
-
-isc_result_t
-isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank,
-			uint64_t *valuep) {
-	REQUIRE(HISTO_VALID(hs));
-	REQUIRE(valuep != NULL);
-
-	uint maxchunk = MAXCHUNK(hs);
-	uint chunksize = CHUNKSIZE(hs);
-	uint64_t count = 0;
-	uint b, c;
-
-	if (rank > hs->population) {
-		return (ISC_R_RANGE);
-	}
-	if (rank == hs->population) {
-		*valuep = hs->maximum;
-		return (ISC_R_SUCCESS);
-	}
-
-	for (c = 0; c < maxchunk; c++) {
-		count = hs->total[c];
-		if (rank < count) {
-			break;
-		}
-		rank -= count;
-	}
-	INSIST(c < maxchunk);
-
-	for (b = 0; b < chunksize; b++) {
-		count = hs->chunk[c][b];
-		if (rank < count) {
-			break;
-		}
-		rank -= count;
-	}
-	INSIST(b < chunksize);
-
-	uint key = chunksize * c + b;
-	uint64_t min = key_to_minval(hs, key);
-	uint64_t max = key_to_maxval(hs, key);
-	*valuep = min + interpolate(max - min, rank, count);
-
-	return (ISC_R_SUCCESS);
-}
-
-void
-isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value,
-			uint64_t *rankp) {
-	REQUIRE(HISTO_VALID(hs));
-	REQUIRE(rankp != NULL);
-
-	uint key = value_to_key(hs, value);
-	uint chunksize = CHUNKSIZE(hs);
-	uint kc = key / chunksize;
-	uint kb = key % chunksize;
-	uint64_t rank = 0;
-
-	for (uint c = 0; c < kc; c++) {
-		rank += hs->total[c];
-	}
-	for (uint b = 0; b < kb; b++) {
-		rank += hs->chunk[kc][b];
-	}
-
-	uint64_t count = hs->chunk[kc][kb];
-	uint64_t min = key_to_minval(hs, key);
-	uint64_t max = key_to_maxval(hs, key);
-
-	*rankp = rank + interpolate(count, value - min, max - min);
-}
-
-isc_result_t
-isc_histo_quantile(const isc_histosummary_t *hs, double p, uint64_t *valuep) {
-	if (p < 0.0 || p > 1.0) {
-		return (ISC_R_RANGE);
-	}
-	double rank = round(hs->population * p);
-	return (isc_histo_value_at_rank(hs, (uint64_t)rank, valuep));
-}
-
-void
-isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, double *pp) {
-	uint64_t rank;
-	isc_histo_rank_of_value(hs, value, &rank);
-	*pp = (double)rank / (double)hs->population;
+	return (ISC_R_UNSET);
 }

 /**********************************************************************/
--- a/lib/isc/include/isc/histo.h
+++ b/lib/isc/include/isc/histo.h
@ -28,17 +28,16 @@
 * The bits <-> digits functions convert betwen decimal significant
 * digits (as in scientific notation) and binary significant bits.
 *
- * At the low end (near zero) there is one value per bucket, then two
- * values, four, etc.
- *
 * You can use the `isc_histo_get()` function to export data from the
 * histogram. The range of a bucket is returned as its minimum and
- * maximum values, inclusive, i.e. a closed interval. Closed intervals
- * are more inconvenient than half-open intervals, and half-open
- * intervals are more common in C. We use closed intervals so we are
- * able to express the maximum of the last bucket, UINT64_MAX, and
- * because OpenMetrics histograms describe buckets as
- * less-than-or-equal to a particular value.
+ * maximum values, inclusive, i.e. a closed interval. We use closed
+ * intervals so we are able to express the maximum of the last bucket,
+ * UINT64_MAX, although half-open intervals are more common in C.
+ *
+ * You can calculate some basic statistics directly from a histogram.
+ * The `isc_histo_quantiles()` function can get a histogram's median,
+ * 99th percentile, etc. The `isc_histo_moments()` function gets a
+ * histogram's population, mean, and standard deviation.
 *
 * The size of a histogram depends on the range of values in the
 * stream of samples, not the number of samples. Bucket counters are
@ -47,45 +46,32 @@
 * most 64 chunks, one for each bit of a 64 bit value. Histograms with
 * greater precision have larger chunks.
 *
- * The number of values that map to a bucket (1, 2, 4, 8, ...) is the
- * same in each chunk. Chunks 0 and 1 have one value per bucket, (see
- * `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2 values per bucket,
- * and they increase by a factor of 2 in each successive bucket.
+ * At the low end (values near zero) there is one value per bucket,
+ * then two values, four, eight, etc. The number of values that map to
+ * a bucket is the same in each chunk. Chunks 0 and 1 have one value
+ * per bucket, (see `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2
+ * values per bucket, chunk 3 has 4, etc.
 *
 * The update cost is roughly constant and very small (not much more
 * than an atomic increment). It mostly depends on cache locality and
 * thread contention.
 *
- * To get statistical properties of a histogram (population, mean,
- * standard deviation, CDF, quantiles, ranks) you must first construct
- * an `isc_histosummary_t`. A summary is a read-only snapshot of a
- * histogram augmented with information for calculating statistics
- * more efficiently.
- *
- * There is no overflow checking for bucket counters. It takes a few
- * nanoseconds to add a sample to the histogram, so it would take at
- * least a few CPU-centuries to cause an overflow. Aggregate
- * statistics from a quarter of a million CPUs might overflow in a
- * day. (Provided that in both examples the CPUs are doing nothing
- * apart from repeatedly adding 1 to histogram buckets.)
+ * There is no overflow checking for the 64 bit bucket counters. It
+ * takes a few nanoseconds to add a sample to the histogram, so it
+ * would take at least a few CPU-centuries to cause an overflow.
+ * Aggregate statistics from a quarter of a million CPUs might
+ * overflow in a day. (Provided that in both examples the CPUs are
+ * doing nothing apart from repeatedly adding 1 to histogram buckets.)
 */

-typedef struct isc_histo	isc_histo_t;
-typedef struct isc_histosummary isc_histosummary_t;
-typedef struct isc_histomulti	isc_histomulti_t;
+typedef struct isc_histo      isc_histo_t;
+typedef struct isc_histomulti isc_histomulti_t;

-/*
- * For functions that can take either type.
- */
-typedef union isc_historead {
-	const isc_histo_t	 *hg;
-	const isc_histosummary_t *hs;
-} isc_historead_t __attribute__((__transparent_union__));
-
-#define ISC_HISTO_MINBITS   1
-#define ISC_HISTO_MAXBITS   18
-#define ISC_HISTO_MINDIGITS 1
-#define ISC_HISTO_MAXDIGITS 6
+#define ISC_HISTO_MINBITS      1
+#define ISC_HISTO_MAXBITS      18
+#define ISC_HISTO_MINDIGITS    1
+#define ISC_HISTO_MAXDIGITS    6
+#define ISC_HISTO_MAXQUANTILES 101 /* enough for all the percentiles */

 /*
 * How many values map 1:1 to buckets for a given number of sigbits?
@ -126,7 +112,7 @@ isc_histo_destroy(isc_histo_t **hgp);
 */

 uint
-isc_histo_sigbits(isc_historead_t hr);
+isc_histo_sigbits(isc_histo_t *hg);
 /*%<
 * Get the histogram's `sigbits` setting
 *
@ -195,7 +181,7 @@ isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count);
 */

 isc_result_t
-isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
+isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
 	      uint64_t *countp);
 /*%<
 * Export information about a bucket.
@ -218,7 +204,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
 * which can be zero.
 *
 * Requires:
- *\li	`hr` is a pointer to a valid histogram or summary
+ *\li	`hg` is a pointer to a valid histogram
 *
 * Returns:
 *\li	ISC_R_SUCCESS, if `key` is valid
@ -226,7 +212,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp,
 */

 void
-isc_histo_next(isc_historead_t hr, uint *keyp);
+isc_histo_next(const isc_histo_t *hg, uint *keyp);
 /*%<
 * Skip to the next key, omitting chunks of unallocated buckets.
 *
@ -245,12 +231,12 @@ isc_histo_next(isc_historead_t hr, uint *keyp);
 *	}
 *
 * Requires:
- *\li	`hr` is a pointer to a valid histogram or summary
+ *\li	`hg` is a pointer to a valid histogram
 *\li	`keyp != NULL`
 */

 void
-isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
+isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source);
 /*%<
 * Increase the counts in `*ptarget` by the counts recorded in `source`
 *
@ -264,7 +250,7 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
 * Requires:
 *\li	`targetp != NULL`
 *\li	`*targetp` is NULL or a pointer to a valid histogram
- *\li	`source` is a pointer to a valid histogram or summary
+ *\li	`source` is a pointer to a valid histogram
 *
 * Ensures:
 *\li	`*targetp` is a pointer to a valid histogram
@ -307,7 +293,7 @@ isc_histomulti_destroy(isc_histomulti_t **hmp);
 */

 void
-isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source);
+isc_histomulti_merge(isc_histo_t **targetp, const isc_histomulti_t *source);
 /*%<
 * Increase the counts in `*targetp` by the counts recorded in `source`
 *
@ -343,37 +329,9 @@ isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
 /**********************************************************************/

 void
-isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp);
+isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, double *pm2);
 /*%<
- * Summarize a histogram for rank and quantile calculations.
- *
- * Requires:
- *\li	`hg` is a pointer to a valid histogram
- *\li	`hsp != NULL`
- *\li	`*hsp == NULL`
- *
- * Ensures:
- *\li	`*hsp` is a pointer to a histogram summary
- */
-
-void
-isc_histosummary_destroy(isc_histosummary_t **hsp);
-/*%<
- * Destroy a histogram summary
- *
- * Requires:
- *\li	`hsp != NULL`
- *\li	`*hsp` is a pointer to a valid histogram summary
- *
- * Ensures:
- *\li	all memory allocated by the summary has been released
- *\li	`*hsp == NULL`
- */
-
-void
-isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2);
-/*%<
- * Get the population, mean, and standard deviation of a histogram
+ * Get the population, mean, and standard deviation of a histogram.
 *
 * If `pm0` is non-NULL it is set to the population of the histogram.
 * (Strictly speaking, the zeroth moment is `pop / pop == 1`.)
@ -385,99 +343,49 @@ isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2);
 * recorded data. The standard deviation is the square root of the
 * variance, which is the second moment about the mean.
 *
+ * It is safe if the histogram is concurrently modified.
+ *
 * Requires:
- *\li	`hr` is a pointer to a valid histogram or summary
+ *\li	`hg` is a pointer to a valid histogram
 */

 isc_result_t
-isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank,
-			uint64_t *valuep);
-/*%<
- * Get the approximate value at a given rank in the recorded data.
- *
- * The value at rank 0 is the minimum of the bucket containing the
- * smallest value added to the histogram.
- *
- * The value at rank equal to the population is the maximum of the
- * bucket containing the largest value added to the histogram.
- *
- * Greater ranks return a range error.
- *
- * Note: this function is slow for high-precision histograms
- * (more than 3 significant digits).
- *
- * Requires:
- *\li	`hs` is a pointer to a valid histogram summary
- *\li	`valuep != NULL`
- *
- * Returns:
- *\li	ISC_R_SUCCESS, if `rank` is within bounds
- *\li	ISC_R_RANGE, otherwise
- */
-
-void
-isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value,
-			uint64_t *rankp);
-/*%<
- * Get the approximate rank of a value in the recorded data.
- *
- * You can query the rank of any value.
- *
- * Note: this function is slow for high-precision histograms
- * (more than 3 significant digits).
- *
- * Requires:
- *\li	`hs` is a pointer to a valid histogram summary
- *\li	`rankp != NULL`
- */
-
-isc_result_t
-isc_histo_quantile(const isc_histosummary_t *hs, double proportion,
-		   uint64_t *valuep);
+isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
+		    uint64_t *value);
 /*%<
 * The quantile function (aka inverse cumulative distribution function)
- * of the histogram. What value is greater than the given proportion of
+ * of the histogram. What value is greater than the given fraction of
 * the population?
 *
- * A proportion of 0.5 gets the median value: it is greater than half
+ * A fraction of 0.5 gets the median value: it is greater than half
 * the population. 0.75 gets the third quartile value, and 0.99 gets
- * the 99th percentile value. The proportion should be between 0.0 and
- * 1.0 inclusive.
+ * the 99th percentile value. The fraction must be between 0.0 and 1.0
+ * inclusive.
 *
 * https://enwp.org/Quantile_function
 *
- * Note: this function is slow for high-precision histograms
- * (more than 3 significant digits).
+ * This implementation allows you to query quantile values for
+ * multiple fractions in one function call. Internally, it makes one
+ * linear scan over the histogram's buckets to find all the fractions.
+ * Buckets are scanned from high to low, so that querying large
+ * quantiles is more efficient. The `fraction` array must be sorted in
+ * decreasing order. The results are stored in the `value` array. Both
+ * arrays have `size` elements.
+ *
+ * The results may be nonsense if the histogram is concurrently
+ * modified. To get a stable copy you can call `isc_histo_merge()`.
 *
 * Requires:
- *\li	`hs` is a pointer to a valid histogram summary
- *\li	`valuep != NULL`
+ *\li	`hg` is a pointer to a valid histogram
+ *\li	`0 < size && size <= ISC_HISTO_MAXQUANTILES`
+ *\li	`fraction != NULL`
+ *\li	`value != NULL`
+ *\li	`0.0 <= fraction[i] && fraction[i] <= 1.0` for every element
+ *\li	`fraction[i - 1] > fraction[i]` for every pair of elements
 *
 * Returns:
- *\li	ISC_R_SUCCESS, if the proportion is in bounds
- *\li	ISC_R_RANGE, otherwise
- */
-
-void
-isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value,
-	      double *proportionp);
-/*%<
- * The cumulative distribution function of the histogram. Given a
- * value, what proportion of the population have smaller values?
- * You can query any value.
- *
- * If the value is the median, the proportion is 0.5. The proportion
- * of the third quartile value is 0.75, and the proportion of the 99th
- * percentile value is 0.99.
- *
- * https://enwp.org/Cumulative_distribution_function
- *
- * Note: this function is slow for high-precision histograms
- * (more than 3 significant digits).
- *
- * Requires:
- *\li	`hs` is a pointer to a valid histogram summary
- *\li	`proportionp != NULL`
+ *\li	ISC_R_SUCCESS, if results were stored in the `value` array
+ *\li	ISC_R_UNSET, if the histogram is empty
 */

 /**********************************************************************/
--- a/tests/isc/histo_test.c
+++ b/tests/isc/histo_test.c
@ -32,6 +32,8 @@

 #define TIME_LIMIT (123 * NS_PER_MS)

+#define SUBRANGE 69
+
 #if VERBOSE

 #define TRACE(fmt, ...)                                                        \
@ -117,14 +119,22 @@ ISC_RUN_TEST_IMPL(basics) {
 			prev_max = max;
 			key++;
 			result = isc_histo_get(hg, key, &min, &max, &count);
+
+			/* these tests can be slow */
+			if (isc_time_monotonic() > start + TIME_LIMIT) {
+				break;
+			}
 		}

-		/* last bucket goes up to last possible value */
-		assert_int_equal(max, UINT64_MAX);
+		/* if we did not stop early */
+		if (result != ISC_R_SUCCESS) {
+			/* last bucket goes up to last possible value */
+			assert_int_equal(max, UINT64_MAX);

-		double pop;
-		isc_histo_moments(hg, &pop, NULL, NULL);
-		assert_int_equal((uint64_t)pop, key * 8);
+			double pop;
+			isc_histo_moments(hg, &pop, NULL, NULL);
+			assert_int_equal((uint64_t)pop, key * 8);
+		}

 		isc_histo_destroy(&hg);

@ -132,6 +142,86 @@ ISC_RUN_TEST_IMPL(basics) {
 	}
 }

+ISC_RUN_TEST_IMPL(quantiles) {
+	for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
+		isc_result_t result;
+		uint64_t min, max, count;
+		double pop;
+		uint key;
+
+		isc_nanosecs_t start = isc_time_monotonic();
+
+		isc_histo_t *hg = NULL;
+		isc_histo_create(mctx, bits, &hg);
+
+		for (key = 0; isc_histo_get(hg, key, &min, &max, &count) ==
+			      ISC_R_SUCCESS;
+		     key++)
+		{
+			/* inc twice so we can check bucket's midpoint */
+			assert_int_equal(count, 0);
+			isc_histo_inc(hg, min);
+			isc_histo_inc(hg, max);
+		}
+
+		const uint buckets = key;
+
+		/* no incs were lost */
+		isc_histo_moments(hg, &pop, NULL, NULL);
+		assert_float_equal(pop, buckets * 2, 0.5);
+
+		/* two ranks per bucket */
+		const uint quantum = ISC_HISTO_MAXQUANTILES / 2 - 1;
+		uint64_t value[ISC_HISTO_MAXQUANTILES];
+		double frac[ISC_HISTO_MAXQUANTILES];
+		uint base = 0;
+
+		for (key = 0; key < buckets; key++) {
+			/* fill in the values one quantum at a time */
+			if (key == 0 || key % quantum == buckets % quantum) {
+				base = key;
+				for (uint k = 0; k < quantum; k++) {
+					double rank = (base + k) * 2;
+					uint i = (quantum - k) * 2;
+					frac[i - 1] = (rank + 1.0) / pop;
+					frac[i - 0] = rank / pop;
+				}
+				frac[0] = (base + quantum) * 2 / pop;
+				result = isc_histo_quantiles(
+					hg, quantum * 2 + 1, frac, value);
+				assert_int_equal(result, ISC_R_SUCCESS);
+			}
+
+			result = isc_histo_get(hg, key, &min, &max, &count);
+			assert_int_equal(result, ISC_R_SUCCESS);
+			assert_int_equal(count, 2);
+
+			uint64_t lomin = min == 0 ? min : min - 1;
+			uint64_t himin = min;
+			uint64_t lomid = floor(min / 2.0 + max / 2.0);
+			uint64_t himid = ceil(min / 2.0 + max / 2.0);
+			uint64_t lomax = max;
+			uint64_t himax = max == UINT64_MAX ? max : max + 1;
+
+			uint i = (quantum + base - key) * 2;
+
+			/* check fenceposts */
+			assert_in_range(value[i - 0], lomin, himin);
+			assert_in_range(value[i - 1], lomid, himid);
+			assert_in_range(value[i - 2], lomax, himax);
+
+			/* these tests can be slow */
+			if (isc_time_monotonic() > start + TIME_LIMIT) {
+				break;
+			}
+		}
+
+		isc_histo_destroy(&hg);
+
+		TRACETIME("");
+	}
+}
+
 /*
 * ensure relative error is as expected
 */
@ -183,106 +273,26 @@ ISC_RUN_TEST_IMPL(sigfigs) {
 	}
 }

-ISC_RUN_TEST_IMPL(summary) {
-	for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
-		isc_result_t result;
-		uint64_t min, max, count, value, rank, lorank, hirank;
-		double pop;
-		uint key;
-
-		isc_nanosecs_t start = isc_time_monotonic();
-
-		isc_histo_t *hg = NULL;
-		isc_histo_create(mctx, bits, &hg);
-
-		for (key = 0; isc_histo_get(hg, key, &min, &max, &count) ==
-			      ISC_R_SUCCESS;
-		     key++)
-		{
-			/* inc twice so we can check bucket's midpoint */
-			assert_int_equal(count, 0);
-			isc_histo_inc(hg, min);
-			isc_histo_inc(hg, max);
-		}
-
-		isc_histosummary_t *hs = NULL;
-		isc_histosummary_create(hg, &hs);
-
-		/* no incs were lost */
-		isc_histo_moments(hg, &pop, NULL, NULL);
-		assert_float_equal(pop, 2 * key, 0.5);
-
-		isc_histo_destroy(&hg);
-
-		for (key = 0; isc_histo_get(hs, key, &min, &max, &count) ==
-			      ISC_R_SUCCESS;
-		     isc_histo_next(hs, &key))
-		{
-			uint64_t lomin = min == 0 ? min : min - 1;
-			uint64_t himin = min;
-			uint64_t lomid = floor(min / 2.0 + max / 2.0);
-			uint64_t himid = ceil(min / 2.0 + max / 2.0);
-			uint64_t lomax = max;
-			uint64_t himax = max == UINT64_MAX ? max : max + 1;
-
-			assert_int_equal(count, 2);
-
-			rank = key * 2;
-
-			/* check fenceposts */
-			result = isc_histo_value_at_rank(hs, rank, &value);
-			assert_int_equal(result, ISC_R_SUCCESS);
-			assert_in_range(value, lomin, himin);
-			result = isc_histo_value_at_rank(hs, rank + 1, &value);
-			assert_int_equal(result, ISC_R_SUCCESS);
-			assert_in_range(value, lomid, himid);
-			result = isc_histo_value_at_rank(hs, rank + 2, &value);
-			assert_int_equal(result, ISC_R_SUCCESS);
-			assert_in_range(value, lomax, himax);
-
-			isc_histo_rank_of_value(hs, min, &rank);
-			assert_int_equal(rank, key * 2);
-
-			/* only if the bucket covers enough distinct values */
-
-			if (min < lomid) {
-				rank = key * 2 + 1;
-				isc_histo_rank_of_value(hs, lomid, &lorank);
-				isc_histo_rank_of_value(hs, himid, &hirank);
-				assert_in_range(rank, lorank, hirank);
-			}
-
-			if (himid < max) {
-				rank = key * 2 + 2;
-				isc_histo_rank_of_value(hs, lomax, &lorank);
-				isc_histo_rank_of_value(hs, himax, &hirank);
-				assert_in_range(rank, lorank, hirank);
-			}
-
-			/* these tests can be slow */
-			if (isc_time_monotonic() > start + TIME_LIMIT) {
-				break;
-			}
-		}
-
-		isc_histosummary_destroy(&hs);
-
-		TRACETIME("");
-	}
-}
-
 ISC_RUN_TEST_IMPL(subrange) {
 	for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) {
 		isc_result_t result;
-		uint64_t min, max, count, value;
+		uint64_t min, max, count;

 		isc_nanosecs_t start = isc_time_monotonic();

 		isc_histo_t *hg = NULL;
 		isc_histo_create(mctx, bits, &hg);

-		uint buckets = 64;
-		for (uint key = 0, top = buckets - 1;; key++, top++) {
+		uint64_t value[SUBRANGE + 1];
+		double frac[SUBRANGE + 1];
+		for (uint i = 0; i <= SUBRANGE; i++) {
+			frac[i] = (double)(SUBRANGE - i) / (double)(SUBRANGE);
+		}
+
+		result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac, value);
+		assert_int_equal(result, ISC_R_UNSET);
+
+		for (uint key = 0, top = SUBRANGE - 1;; key++, top++) {
 			if (isc_histo_get(hg, key, &min, NULL, NULL) !=
 			    ISC_R_SUCCESS)
 			{
@ -293,27 +303,28 @@ ISC_RUN_TEST_IMPL(subrange) {
 			{
 				break;
 			}
-			isc_histo_put(hg, min, max, buckets);
+			/*
+			 * If we try adding more than one sample per bucket
+			 * here, the test fails when buckets have different
+			 * sizes because [min,max] spans multiple chunks.
+			 */
+			isc_histo_put(hg, min, max, SUBRANGE);

-			isc_histosummary_t *hs = NULL;
-			isc_histosummary_create(hg, &hs);
+			result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac,
+						     value);
+			assert_int_equal(result, ISC_R_SUCCESS);

-			for (uint bucket = 0; bucket < buckets; bucket++) {
+			for (uint bucket = 0; bucket < SUBRANGE; bucket++) {
 				result = isc_histo_get(hg, key + bucket, &min,
 						       &max, &count);
 				assert_int_equal(result, ISC_R_SUCCESS);
 				/* did isc_histo_put() spread evenly? */
 				assert_int_equal(count, 1);
-				result = isc_histo_value_at_rank(hs, bucket,
-								 &value);
-				assert_int_equal(result, ISC_R_SUCCESS);
-				assert_int_equal(value, min);
+				/* do the quantile values match? */
+				assert_int_equal(value[SUBRANGE - bucket], min);
 			}
-			result = isc_histo_value_at_rank(hs, buckets, &value);
-			assert_int_equal(result, ISC_R_SUCCESS);
-			assert_int_equal(value, max);
+			assert_int_equal(value[0], max);

-			isc_histosummary_destroy(&hs);
 			isc_histo_destroy(&hg);
 			isc_histo_create(mctx, bits, &hg);

@ -331,8 +342,8 @@ ISC_RUN_TEST_IMPL(subrange) {
 ISC_TEST_LIST_START

 ISC_TEST_ENTRY(basics)
+ISC_TEST_ENTRY(quantiles)
 ISC_TEST_ENTRY(sigfigs)
-ISC_TEST_ENTRY(summary)
 ISC_TEST_ENTRY(subrange)

 ISC_TEST_LIST_END