mirror of
https://github.com/prometheus/prometheus.git
synced 2026-02-03 20:39:32 -05:00
tsdb: check for out-of-order labels instead of duplicates during WAL replay
Replace HasAnyDuplicateLabelNames with HasOutOfOrderLabel for detecting corrupted series during WAL and chunk snapshot replay. Since labels are always expected to be sorted, checking for out-of-order labels is more correct and catches a broader class of corruption: - Duplicate labels (adjacent labels with same name trigger name <= prevName) - Unsorted labels (any ordering violation) This is also more efficient: O(1) space instead of O(n) for the map that was previously used to track seen label names. Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
This commit is contained in:
parent
76d4a7cb2b
commit
b1ddfa92a6
5 changed files with 60 additions and 59 deletions
|
|
@ -382,21 +382,17 @@ func (ls Labels) HasDuplicateLabelNames() (string, bool) {
|
|||
return "", false
|
||||
}
|
||||
|
||||
// HasAnyDuplicateLabelNames returns whether ls has any duplicate label names,
|
||||
// even if they are not consecutive. This is useful for detecting corrupted
|
||||
// label sets where the labels may not be properly sorted.
|
||||
// Note: the passed map is ignored; this implementation uses an internal map[int]struct{}
|
||||
// for efficiency since it compares symbol IDs rather than strings.
|
||||
func (ls Labels) HasAnyDuplicateLabelNames(map[string]struct{}) (string, bool) {
|
||||
seen := make(map[int]struct{}, 32)
|
||||
// HasOutOfOrderLabel checks if labels are not sorted by name (including duplicates).
|
||||
// Since labels are expected to be sorted, out-of-order labels indicate corruption.
|
||||
func (ls Labels) HasOutOfOrderLabel() (string, bool) {
|
||||
var prev string
|
||||
for i := 0; i < len(ls.data); {
|
||||
var lNum int
|
||||
lNum, i = decodeVarint(ls.data, i)
|
||||
_, i = decodeVarint(ls.data, i)
|
||||
if _, exists := seen[lNum]; exists {
|
||||
return ls.syms.ToName(lNum), true
|
||||
lName, newI := decodeString(ls.syms, ls.data, i)
|
||||
_, i = decodeVarint(ls.data, newI)
|
||||
if prev != "" && lName <= prev {
|
||||
return lName, true
|
||||
}
|
||||
seen[lNum] = struct{}{}
|
||||
prev = lName
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -233,17 +233,13 @@ func (ls Labels) HasDuplicateLabelNames() (string, bool) {
|
|||
return "", false
|
||||
}
|
||||
|
||||
// HasAnyDuplicateLabelNames returns whether ls has any duplicate label names,
|
||||
// even if they are not consecutive. This is useful for detecting corrupted
|
||||
// label sets where the labels may not be properly sorted.
|
||||
// The seen map is cleared and reused to reduce allocations when called in a loop.
|
||||
func (ls Labels) HasAnyDuplicateLabelNames(seen map[string]struct{}) (string, bool) {
|
||||
clear(seen)
|
||||
for _, l := range ls {
|
||||
if _, exists := seen[l.Name]; exists {
|
||||
return l.Name, true
|
||||
// HasOutOfOrderLabel checks if labels are not sorted by name (including duplicates).
|
||||
// Since labels are expected to be sorted, out-of-order labels indicate corruption.
|
||||
func (ls Labels) HasOutOfOrderLabel() (string, bool) {
|
||||
for i := 1; i < len(ls); i++ {
|
||||
if ls[i].Name <= ls[i-1].Name {
|
||||
return ls[i].Name, true
|
||||
}
|
||||
seen[l.Name] = struct{}{}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -256,19 +256,17 @@ func (ls Labels) HasDuplicateLabelNames() (string, bool) {
|
|||
return "", false
|
||||
}
|
||||
|
||||
// HasAnyDuplicateLabelNames returns whether ls has any duplicate label names,
|
||||
// even if they are not consecutive. This is useful for detecting corrupted
|
||||
// label sets where the labels may not be properly sorted.
|
||||
// The seen map is cleared and reused to reduce allocations when called in a loop.
|
||||
func (ls Labels) HasAnyDuplicateLabelNames(seen map[string]struct{}) (string, bool) {
|
||||
clear(seen)
|
||||
// HasOutOfOrderLabel checks if labels are not sorted by name (including duplicates).
|
||||
// Since labels are expected to be sorted, out-of-order labels indicate corruption.
|
||||
func (ls Labels) HasOutOfOrderLabel() (string, bool) {
|
||||
var prev string
|
||||
for i := 0; i < len(ls.data); {
|
||||
lName, newI := decodeString(ls.data, i)
|
||||
_, i = decodeString(ls.data, newI)
|
||||
if _, exists := seen[lName]; exists {
|
||||
if prev != "" && lName <= prev {
|
||||
return lName, true
|
||||
}
|
||||
seen[lName] = struct{}{}
|
||||
prev = lName
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
|
|
|||
|
|
@ -201,42 +201,55 @@ func TestLabels_HasDuplicateLabelNames(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestLabels_HasAnyDuplicateLabelNames(t *testing.T) {
|
||||
func TestLabels_HasOutOfOrderLabel(t *testing.T) {
|
||||
// Helper to create unsorted labels using ScratchBuilder without Sort()
|
||||
unsortedLabels := func(ss ...string) Labels {
|
||||
b := NewScratchBuilder(len(ss) / 2)
|
||||
for i := 0; i < len(ss); i += 2 {
|
||||
b.Add(ss[i], ss[i+1])
|
||||
}
|
||||
return b.Labels()
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
input Labels
|
||||
duplicate bool
|
||||
labelName string
|
||||
name string
|
||||
input Labels
|
||||
outOfOrder bool
|
||||
labelName string
|
||||
}{
|
||||
{
|
||||
name: "no duplicates",
|
||||
input: FromMap(map[string]string{"__name__": "up", "hostname": "localhost"}),
|
||||
duplicate: false,
|
||||
name: "sorted labels",
|
||||
input: FromMap(map[string]string{"__name__": "up", "hostname": "localhost"}),
|
||||
outOfOrder: false,
|
||||
},
|
||||
{
|
||||
name: "consecutive duplicates",
|
||||
input: FromStrings("__name__", "up", "hostname", "localhost", "hostname", "127.0.0.1"),
|
||||
duplicate: true,
|
||||
labelName: "hostname",
|
||||
name: "duplicate labels",
|
||||
input: unsortedLabels("__name__", "up", "hostname", "localhost", "hostname", "127.0.0.1"),
|
||||
outOfOrder: true,
|
||||
labelName: "hostname",
|
||||
},
|
||||
{
|
||||
name: "non-consecutive duplicates",
|
||||
input: FromStrings("a", "1", "b", "2", "a", "3"),
|
||||
duplicate: true,
|
||||
labelName: "a",
|
||||
name: "out of order labels",
|
||||
input: unsortedLabels("b", "1", "a", "2"),
|
||||
outOfOrder: true,
|
||||
labelName: "a",
|
||||
},
|
||||
{
|
||||
name: "empty labels",
|
||||
input: EmptyLabels(),
|
||||
duplicate: false,
|
||||
name: "empty labels",
|
||||
input: EmptyLabels(),
|
||||
outOfOrder: false,
|
||||
},
|
||||
{
|
||||
name: "single label",
|
||||
input: FromStrings("a", "1"),
|
||||
outOfOrder: false,
|
||||
},
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
l, d := c.input.HasAnyDuplicateLabelNames(seen)
|
||||
require.Equal(t, c.duplicate, d, "incorrect duplicate bool")
|
||||
l, ooo := c.input.HasOutOfOrderLabel()
|
||||
require.Equal(t, c.outOfOrder, ooo, "incorrect out of order bool")
|
||||
require.Equal(t, c.labelName, l, "incorrect label name")
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -250,14 +250,13 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
|
|||
|
||||
// The records are always replayed from the oldest to the newest.
|
||||
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
|
||||
dupLabelsSeen := make(map[string]struct{}, 32)
|
||||
Outer:
|
||||
for d := range decoded {
|
||||
switch v := d.(type) {
|
||||
case []record.RefSeries:
|
||||
for _, walSeries := range v {
|
||||
if dupName, hasDup := walSeries.Labels.HasAnyDuplicateLabelNames(dupLabelsSeen); hasDup {
|
||||
h.logger.Warn("skipping series with corrupted labels during WAL replay", "ref", walSeries.Ref, "duplicate_label", dupName)
|
||||
if badName, outOfOrder := walSeries.Labels.HasOutOfOrderLabel(); outOfOrder {
|
||||
h.logger.Warn("skipping series with out-of-order labels during WAL replay", "ref", walSeries.Ref, "out_of_order_label", badName)
|
||||
h.metrics.walReplayCorruptedSeriesTotal.Inc()
|
||||
continue
|
||||
}
|
||||
|
|
@ -1621,11 +1620,10 @@ func (h *Head) loadChunkSnapshot() (int, int, map[chunks.HeadSeriesRef]*memSerie
|
|||
|
||||
shardedRefSeries[idx] = make(map[chunks.HeadSeriesRef]*memSeries)
|
||||
localRefSeries := shardedRefSeries[idx]
|
||||
dupLabelsSeen := make(map[string]struct{}, 32)
|
||||
|
||||
for csr := range rc {
|
||||
if dupName, hasDup := csr.lset.HasAnyDuplicateLabelNames(dupLabelsSeen); hasDup {
|
||||
h.logger.Warn("skipping series with corrupted labels during chunk snapshot replay", "ref", csr.ref, "duplicate_label", dupName)
|
||||
if badName, outOfOrder := csr.lset.HasOutOfOrderLabel(); outOfOrder {
|
||||
h.logger.Warn("skipping series with out-of-order labels during chunk snapshot replay", "ref", csr.ref, "out_of_order_label", badName)
|
||||
h.metrics.walReplayCorruptedSeriesTotal.Inc()
|
||||
continue
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue