promql: Make it possible to add custom details in annotations and summarise multiple of the same annotation (#15577)

Signed-off-by: Jeanette Tan <jeanette.tan@grafana.com> Signed-off-by: zenador <zenador@users.noreply.github.com> Co-authored-by: Björn Rabenstein <github@rabenste.in>
2026-02-03 20:39:32 -05:00 · 2026-02-03 22:12:02 +08:00 · 2026-02-03 22:12:02 +08:00 · c0ad853750
commit c0ad853750
parent 02c68154bc
2 changed files with 176 additions and 30 deletions
--- a/util/annotations/annotations.go
+++ b/util/annotations/annotations.go
@ -16,7 +16,6 @@ package annotations
 import (
 	"errors"
 	"fmt"
-	"maps"

 	"github.com/prometheus/common/model"

@ -43,12 +42,18 @@ func (a *Annotations) Add(err error) Annotations {
 	if *a == nil {
 		*a = Annotations{}
 	}
+	if prevErr, exists := (*a)[err.Error()]; exists {
+		var anErr annoError
+		if errors.As(err, &anErr) {
+			err = anErr.Merge(prevErr)
+		}
+	}
 	(*a)[err.Error()] = err
 	return *a
 }

-// Merge adds the contents of the second annotation to the first, modifying
-// the first in-place, and returns the merged first Annotation for convenience.
+// Merge adds the contents of the second set of Annotations to the first, modifying
+// the first in-place, and returns the merged first Annotations for convenience.
 func (a *Annotations) Merge(aa Annotations) Annotations {
 	if *a == nil {
 		if aa == nil {
@ -56,7 +61,15 @@ func (a *Annotations) Merge(aa Annotations) Annotations {
 		}
 		*a = Annotations{}
 	}
-	maps.Copy((*a), aa)
+	for key, val := range aa {
+		if prevVal, exists := (*a)[key]; exists {
+			var anErr annoError
+			if errors.As(val, &anErr) {
+				val = anErr.Merge(prevVal)
+			}
+		}
+		(*a)[key] = val
+	}
 	return *a
 }

@ -81,10 +94,9 @@ func (a Annotations) AsStrings(query string, maxWarnings, maxInfos int) (warning
 	warnSkipped := 0
 	infoSkipped := 0
 	for _, err := range a {
-		var anErr annoErr
+		var anErr annoError
 		if errors.As(err, &anErr) {
-			anErr.Query = query
-			err = anErr
+			anErr.SetQuery(query)
 		}
 		switch {
 		case errors.Is(err, PromQLInfo):
@ -157,23 +169,48 @@ var (
 	MismatchedCustomBucketsHistogramsInfo   = fmt.Errorf("%w: mismatched custom buckets were reconciled during", PromQLInfo)
 )

+// annoError extends the standard error interface to provide additional functionality
+// for PromQL annotations, allowing them to be merged with other similar errors.
+type annoError interface {
+	error
+	// Necessary so we can use errors.Is() to disambiguate between warning and info.
+	Unwrap() error
+	// Necessary when we want to show position info. Also, this is only called at the end when we call
+	// AsStrings(), so before that we deduplicate based on the raw error string when query is empty,
+	// and the full error string with details will only be shown in the end when query is set.
+	SetQuery(string)
+	// We can define custom merge functions to merge individual annotations of the same type if they have
+	// the same raw error string.
+	Merge(error) error
+}
+
 type annoErr struct {
 	PositionRange posrange.PositionRange
 	Err           error
 	Query         string
 }

-func (e annoErr) Error() string {
+func (e *annoErr) Error() string {
 	if e.Query == "" {
 		return e.Err.Error()
 	}
 	return fmt.Sprintf("%s (%s)", e.Err, e.PositionRange.StartPosInput(e.Query, 0))
 }

-func (e annoErr) Unwrap() error {
+func (e *annoErr) Unwrap() error {
 	return e.Err
 }

+func (e *annoErr) SetQuery(query string) {
+	e.Query = query
+}
+
+// We do not merge generic annotations, instead we just ignore the provided error
+// and return the original.
+func (e *annoErr) Merge(_ error) error {
+	return e
+}
+
 func maybeAddMetricName(anno error, metricName string) error {
 	if metricName == "" {
 		return anno
@ -184,7 +221,7 @@ func maybeAddMetricName(anno error, metricName string) error {
 // NewInvalidQuantileWarning is used when the user specifies an invalid quantile
 // value, i.e. a float that is outside the range [0, 1] or NaN.
 func NewInvalidQuantileWarning(q float64, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w, got %g", InvalidQuantileWarning, q),
 	}
@ -193,7 +230,7 @@ func NewInvalidQuantileWarning(q float64, pos posrange.PositionRange) error {
 // NewInvalidRatioWarning is used when the user specifies an invalid ratio
 // value, i.e. a float that is outside the range [-1, 1] or NaN.
 func NewInvalidRatioWarning(q, to float64, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w, got %g, capping to %g", InvalidRatioWarning, q, to),
 	}
@ -203,7 +240,7 @@ func NewInvalidRatioWarning(q, to float64, pos posrange.PositionRange) error {
 // of a classic histogram.
 func NewBadBucketLabelWarning(metricName, label string, pos posrange.PositionRange) error {
 	anno := maybeAddMetricName(fmt.Errorf("%w of %q", BadBucketLabelWarning, label), metricName)
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           anno,
 	}
@ -213,7 +250,7 @@ func NewBadBucketLabelWarning(metricName, label string, pos posrange.PositionRan
 // float samples and histogram samples for functions that do not support mixed
 // samples.
 func NewMixedFloatsHistogramsWarning(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w metric name %q", MixedFloatsHistogramsWarning, metricName),
 	}
@ -222,7 +259,7 @@ func NewMixedFloatsHistogramsWarning(metricName string, pos posrange.PositionRan
 // NewMixedFloatsHistogramsAggWarning is used when the queried series includes both
 // float samples and histogram samples in an aggregation.
 func NewMixedFloatsHistogramsAggWarning(pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w aggregation", MixedFloatsHistogramsWarning),
 	}
@ -231,7 +268,7 @@ func NewMixedFloatsHistogramsAggWarning(pos posrange.PositionRange) error {
 // NewMixedClassicNativeHistogramsWarning is used when the queried series includes
 // both classic and native histograms.
 func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           maybeAddMetricName(MixedClassicNativeHistogramsWarning, metricName),
 	}
@ -240,7 +277,7 @@ func NewMixedClassicNativeHistogramsWarning(metricName string, pos posrange.Posi
 // NewNativeHistogramNotCounterWarning is used when histogramRate is called
 // with isCounter set to true on a gauge histogram.
 func NewNativeHistogramNotCounterWarning(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %q", NativeHistogramNotCounterWarning, metricName),
 	}
@ -249,7 +286,7 @@ func NewNativeHistogramNotCounterWarning(metricName string, pos posrange.Positio
 // NewNativeHistogramNotGaugeWarning is used when histogramRate is called
 // with isCounter set to false on a counter histogram.
 func NewNativeHistogramNotGaugeWarning(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %q", NativeHistogramNotGaugeWarning, metricName),
 	}
@ -258,7 +295,7 @@ func NewNativeHistogramNotGaugeWarning(metricName string, pos posrange.PositionR
 // NewMixedExponentialCustomHistogramsWarning is used when the queried series includes
 // histograms with both exponential and custom buckets schemas.
 func NewMixedExponentialCustomHistogramsWarning(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %q", MixedExponentialCustomHistogramsWarning, metricName),
 	}
@ -267,7 +304,7 @@ func NewMixedExponentialCustomHistogramsWarning(metricName string, pos posrange.
 // NewPossibleNonCounterInfo is used when a named counter metric with only float samples does not
 // have the suffixes _total, _sum, _count, or _bucket.
 func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %q", PossibleNonCounterInfo, metricName),
 	}
@ -276,7 +313,7 @@ func NewPossibleNonCounterInfo(metricName string, pos posrange.PositionRange) er
 // NewPossibleNonCounterLabelInfo is used when a named counter metric with only float samples does not
 // have the __type__ label set to "counter".
 func NewPossibleNonCounterLabelInfo(metricName, typeLabel string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w, got %q: %q", PossibleNonCounterLabelInfo, typeLabel, metricName),
 	}
@ -285,7 +322,7 @@ func NewPossibleNonCounterLabelInfo(metricName, typeLabel string, pos posrange.P
 // NewHistogramQuantileForcedMonotonicityInfo is used when the input (classic histograms) to
 // histogram_quantile needs to be forced to be monotonic.
 func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           maybeAddMetricName(HistogramQuantileForcedMonotonicityInfo, metricName),
 	}
@ -294,7 +331,7 @@ func NewHistogramQuantileForcedMonotonicityInfo(metricName string, pos posrange.
 // NewIncompatibleTypesInBinOpInfo is used if binary operators act on a
 // combination of types that doesn't work and therefore returns no result.
 func NewIncompatibleTypesInBinOpInfo(lhsType, operator, rhsType string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %q: %s %s %s", IncompatibleTypesInBinOpInfo, operator, lhsType, operator, rhsType),
 	}
@ -303,7 +340,7 @@ func NewIncompatibleTypesInBinOpInfo(lhsType, operator, rhsType string, pos posr
 // NewHistogramIgnoredInAggregationInfo is used when a histogram is ignored by
 // an aggregation operator that cannot handle histograms.
 func NewHistogramIgnoredInAggregationInfo(aggregation string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %s aggregation", HistogramIgnoredInAggregationInfo, aggregation),
 	}
@ -312,7 +349,7 @@ func NewHistogramIgnoredInAggregationInfo(aggregation string, pos posrange.Posit
 // NewHistogramIgnoredInMixedRangeInfo is used when a histogram is ignored
 // in a range vector which contains mix of floats and histograms.
 func NewHistogramIgnoredInMixedRangeInfo(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %q", HistogramIgnoredInMixedRangeInfo, metricName),
 	}
@ -321,28 +358,28 @@ func NewHistogramIgnoredInMixedRangeInfo(metricName string, pos posrange.Positio
 // NewIncompatibleBucketLayoutInBinOpWarning is used if binary operators act on a
 // combination of two incompatible histograms.
 func NewIncompatibleBucketLayoutInBinOpWarning(operator string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %s", IncompatibleBucketLayoutInBinOpWarning, operator),
 	}
 }

 func NewNativeHistogramQuantileNaNResultInfo(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           maybeAddMetricName(NativeHistogramQuantileNaNResultInfo, metricName),
 	}
 }

 func NewNativeHistogramQuantileNaNSkewInfo(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           maybeAddMetricName(NativeHistogramQuantileNaNSkewInfo, metricName),
 	}
 }

 func NewNativeHistogramFractionNaNsInfo(metricName string, pos posrange.PositionRange) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           maybeAddMetricName(NativeHistogramFractionNaNsInfo, metricName),
 	}
@ -368,7 +405,7 @@ func (op HistogramOperation) String() string {
 // NewHistogramCounterResetCollisionWarning is used when two counter histograms are added or subtracted where one has
 // a CounterReset hint and the other has NotCounterReset.
 func NewHistogramCounterResetCollisionWarning(pos posrange.PositionRange, operation HistogramOperation) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %s", HistogramCounterResetCollisionWarning, operation.String()),
 	}
@ -377,7 +414,7 @@ func NewHistogramCounterResetCollisionWarning(pos posrange.PositionRange, operat
 // NewMismatchedCustomBucketsHistogramsInfo is used when the queried series includes
 // custom buckets histograms with mismatched custom bounds that cause reconciling.
 func NewMismatchedCustomBucketsHistogramsInfo(pos posrange.PositionRange, operation HistogramOperation) error {
-	return annoErr{
+	return &annoErr{
 		PositionRange: pos,
 		Err:           fmt.Errorf("%w %s", MismatchedCustomBucketsHistogramsInfo, operation.String()),
 	}
--- a/util/annotations/annotations_test.go
+++ b/util/annotations/annotations_test.go
@ -0,0 +1,109 @@
+// Copyright 2024 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package annotations
+
+import (
+	"errors"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/prometheus/prometheus/promql/parser/posrange"
+)
+
+func TestAnnotations_AsStrings(t *testing.T) {
+	var annos Annotations
+	pos := posrange.PositionRange{Start: 3, End: 8}
+
+	annos.Add(errors.New("this is a non-annotation error"))
+
+	annos.Add(NewInvalidRatioWarning(1.1, 100, pos))
+	annos.Add(NewInvalidRatioWarning(1.2, 123, pos))
+
+	annos.Add(newTestCustomWarning(1.5, pos, 12, 14))
+	annos.Add(newTestCustomWarning(1.5, pos, 10, 20))
+	annos.Add(newTestCustomWarning(1.5, pos, 5, 15))
+	annos.Add(newTestCustomWarning(1.5, pos, 12, 14))
+
+	annos.Add(NewHistogramIgnoredInAggregationInfo("sum", pos))
+
+	warnings, infos := annos.AsStrings("lorem ipsum dolor sit amet", 0, 0)
+	require.ElementsMatch(t, warnings, []string{
+		"this is a non-annotation error",
+		"PromQL warning: ratio value should be between -1 and 1, got 1.1, capping to 100 (1:4)",
+		"PromQL warning: ratio value should be between -1 and 1, got 1.2, capping to 123 (1:4)",
+		"PromQL warning: custom value set to 1.5, 4 instances with smallest 5 and biggest 20 (1:4)",
+	})
+	require.ElementsMatch(t, infos, []string{
+		"PromQL info: ignored histogram in sum aggregation (1:4)",
+	})
+}
+
+type testCustomError struct {
+	PositionRange posrange.PositionRange
+	Err           error
+	Query         string
+	Min           []float64
+	Max           []float64
+	Count         int
+}
+
+func (e *testCustomError) Error() string {
+	if e.Query == "" {
+		return e.Err.Error()
+	}
+	return fmt.Sprintf("%s, %d instances with smallest %g and biggest %g (%s)", e.Err, e.Count+1, e.Min[0], e.Max[0], e.PositionRange.StartPosInput(e.Query, 0))
+}
+
+func (e *testCustomError) Unwrap() error {
+	return e.Err
+}
+
+func (e *testCustomError) SetQuery(query string) {
+	e.Query = query
+}
+
+func (e *testCustomError) Merge(other error) error {
+	o := &testCustomError{}
+	ok := errors.As(other, &o)
+	if !ok {
+		return e
+	}
+	if e.Err.Error() != o.Err.Error() || len(e.Min) != len(o.Min) || len(e.Max) != len(o.Max) {
+		return e
+	}
+	for i, aMin := range e.Min {
+		if aMin < o.Min[i] {
+			o.Min[i] = aMin
+		}
+	}
+	for i, aMax := range e.Max {
+		if aMax > o.Max[i] {
+			o.Max[i] = aMax
+		}
+	}
+	o.Count += e.Count + 1
+	return o
+}
+
+func newTestCustomWarning(q float64, pos posrange.PositionRange, smallest, largest float64) error {
+	testCustomWarning := fmt.Errorf("%w: custom value set to", PromQLWarning)
+	return &testCustomError{
+		PositionRange: pos,
+		Err:           fmt.Errorf("%w %g", testCustomWarning, q),
+		Min:           []float64{smallest},
+		Max:           []float64{largest},
+	}
+}