tsdb: check for out-of-order labels instead of duplicates during WAL replay

Replace HasAnyDuplicateLabelNames with HasOutOfOrderLabel for detecting
corrupted series during WAL and chunk snapshot replay.

Since labels are always expected to be sorted, checking for out-of-order
labels is more correct and catches a broader class of corruption:
- Duplicate labels (adjacent labels with same name trigger name <= prevName)
- Unsorted labels (any ordering violation)

This is also more efficient: O(1) space instead of O(n) for the map that
was previously used to track seen label names.

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
This commit is contained in:
Arve Knudsen 2026-01-21 08:30:10 +01:00
parent 76d4a7cb2b
commit b1ddfa92a6
5 changed files with 60 additions and 59 deletions

View file

@ -382,21 +382,17 @@ func (ls Labels) HasDuplicateLabelNames() (string, bool) {
return "", false
}
// HasAnyDuplicateLabelNames returns whether ls has any duplicate label names,
// even if they are not consecutive. This is useful for detecting corrupted
// label sets where the labels may not be properly sorted.
// Note: the passed map is ignored; this implementation uses an internal map[int]struct{}
// for efficiency since it compares symbol IDs rather than strings.
func (ls Labels) HasAnyDuplicateLabelNames(map[string]struct{}) (string, bool) {
seen := make(map[int]struct{}, 32)
// HasOutOfOrderLabel checks if labels are not sorted by name (including duplicates).
// Since labels are expected to be sorted, out-of-order labels indicate corruption.
func (ls Labels) HasOutOfOrderLabel() (string, bool) {
var prev string
for i := 0; i < len(ls.data); {
var lNum int
lNum, i = decodeVarint(ls.data, i)
_, i = decodeVarint(ls.data, i)
if _, exists := seen[lNum]; exists {
return ls.syms.ToName(lNum), true
lName, newI := decodeString(ls.syms, ls.data, i)
_, i = decodeVarint(ls.data, newI)
if prev != "" && lName <= prev {
return lName, true
}
seen[lNum] = struct{}{}
prev = lName
}
return "", false
}

View file

@ -233,17 +233,13 @@ func (ls Labels) HasDuplicateLabelNames() (string, bool) {
return "", false
}
// HasAnyDuplicateLabelNames returns whether ls has any duplicate label names,
// even if they are not consecutive. This is useful for detecting corrupted
// label sets where the labels may not be properly sorted.
// The seen map is cleared and reused to reduce allocations when called in a loop.
func (ls Labels) HasAnyDuplicateLabelNames(seen map[string]struct{}) (string, bool) {
clear(seen)
for _, l := range ls {
if _, exists := seen[l.Name]; exists {
return l.Name, true
// HasOutOfOrderLabel checks if labels are not sorted by name (including duplicates).
// Since labels are expected to be sorted, out-of-order labels indicate corruption.
func (ls Labels) HasOutOfOrderLabel() (string, bool) {
for i := 1; i < len(ls); i++ {
if ls[i].Name <= ls[i-1].Name {
return ls[i].Name, true
}
seen[l.Name] = struct{}{}
}
return "", false
}

View file

@ -256,19 +256,17 @@ func (ls Labels) HasDuplicateLabelNames() (string, bool) {
return "", false
}
// HasAnyDuplicateLabelNames returns whether ls has any duplicate label names,
// even if they are not consecutive. This is useful for detecting corrupted
// label sets where the labels may not be properly sorted.
// The seen map is cleared and reused to reduce allocations when called in a loop.
func (ls Labels) HasAnyDuplicateLabelNames(seen map[string]struct{}) (string, bool) {
clear(seen)
// HasOutOfOrderLabel checks if labels are not sorted by name (including duplicates).
// Since labels are expected to be sorted, out-of-order labels indicate corruption.
func (ls Labels) HasOutOfOrderLabel() (string, bool) {
var prev string
for i := 0; i < len(ls.data); {
lName, newI := decodeString(ls.data, i)
_, i = decodeString(ls.data, newI)
if _, exists := seen[lName]; exists {
if prev != "" && lName <= prev {
return lName, true
}
seen[lName] = struct{}{}
prev = lName
}
return "", false
}

View file

@ -201,42 +201,55 @@ func TestLabels_HasDuplicateLabelNames(t *testing.T) {
}
}
func TestLabels_HasAnyDuplicateLabelNames(t *testing.T) {
func TestLabels_HasOutOfOrderLabel(t *testing.T) {
// Helper to create unsorted labels using ScratchBuilder without Sort()
unsortedLabels := func(ss ...string) Labels {
b := NewScratchBuilder(len(ss) / 2)
for i := 0; i < len(ss); i += 2 {
b.Add(ss[i], ss[i+1])
}
return b.Labels()
}
cases := []struct {
name string
input Labels
duplicate bool
labelName string
name string
input Labels
outOfOrder bool
labelName string
}{
{
name: "no duplicates",
input: FromMap(map[string]string{"__name__": "up", "hostname": "localhost"}),
duplicate: false,
name: "sorted labels",
input: FromMap(map[string]string{"__name__": "up", "hostname": "localhost"}),
outOfOrder: false,
},
{
name: "consecutive duplicates",
input: FromStrings("__name__", "up", "hostname", "localhost", "hostname", "127.0.0.1"),
duplicate: true,
labelName: "hostname",
name: "duplicate labels",
input: unsortedLabels("__name__", "up", "hostname", "localhost", "hostname", "127.0.0.1"),
outOfOrder: true,
labelName: "hostname",
},
{
name: "non-consecutive duplicates",
input: FromStrings("a", "1", "b", "2", "a", "3"),
duplicate: true,
labelName: "a",
name: "out of order labels",
input: unsortedLabels("b", "1", "a", "2"),
outOfOrder: true,
labelName: "a",
},
{
name: "empty labels",
input: EmptyLabels(),
duplicate: false,
name: "empty labels",
input: EmptyLabels(),
outOfOrder: false,
},
{
name: "single label",
input: FromStrings("a", "1"),
outOfOrder: false,
},
}
seen := make(map[string]struct{})
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
l, d := c.input.HasAnyDuplicateLabelNames(seen)
require.Equal(t, c.duplicate, d, "incorrect duplicate bool")
l, ooo := c.input.HasOutOfOrderLabel()
require.Equal(t, c.outOfOrder, ooo, "incorrect out of order bool")
require.Equal(t, c.labelName, l, "incorrect label name")
})
}

View file

@ -250,14 +250,13 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch
// The records are always replayed from the oldest to the newest.
missingSeries := make(map[chunks.HeadSeriesRef]struct{})
dupLabelsSeen := make(map[string]struct{}, 32)
Outer:
for d := range decoded {
switch v := d.(type) {
case []record.RefSeries:
for _, walSeries := range v {
if dupName, hasDup := walSeries.Labels.HasAnyDuplicateLabelNames(dupLabelsSeen); hasDup {
h.logger.Warn("skipping series with corrupted labels during WAL replay", "ref", walSeries.Ref, "duplicate_label", dupName)
if badName, outOfOrder := walSeries.Labels.HasOutOfOrderLabel(); outOfOrder {
h.logger.Warn("skipping series with out-of-order labels during WAL replay", "ref", walSeries.Ref, "out_of_order_label", badName)
h.metrics.walReplayCorruptedSeriesTotal.Inc()
continue
}
@ -1621,11 +1620,10 @@ func (h *Head) loadChunkSnapshot() (int, int, map[chunks.HeadSeriesRef]*memSerie
shardedRefSeries[idx] = make(map[chunks.HeadSeriesRef]*memSeries)
localRefSeries := shardedRefSeries[idx]
dupLabelsSeen := make(map[string]struct{}, 32)
for csr := range rc {
if dupName, hasDup := csr.lset.HasAnyDuplicateLabelNames(dupLabelsSeen); hasDup {
h.logger.Warn("skipping series with corrupted labels during chunk snapshot replay", "ref", csr.ref, "duplicate_label", dupName)
if badName, outOfOrder := csr.lset.HasOutOfOrderLabel(); outOfOrder {
h.logger.Warn("skipping series with out-of-order labels during chunk snapshot replay", "ref", csr.ref, "out_of_order_label", badName)
h.metrics.walReplayCorruptedSeriesTotal.Inc()
continue
}