From c65aa507bf5832cc9a239d04a2f5e91ef562b1d7 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 28 Jan 2026 09:13:50 +0100 Subject: [PATCH 1/2] tsdb: fix missing hash shard lock in deleteSeriesByID The deleteSeriesByID function accessed hashes[hashShard] while only holding locks[refShard]. When hashShard != refShard, this is a data race because hashes[hashShard] is protected by locks[hashShard]. This fix conditionally acquires locks[hashShard] when it differs from refShard, following the same pattern used in gc(). Fixes #17950 Signed-off-by: Arve Knudsen --- tsdb/head.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tsdb/head.go b/tsdb/head.go index 3d700944d9..14f30f1ac5 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -2183,8 +2183,16 @@ func (h *Head) deleteSeriesByID(refs []chunks.HeadSeriesRef) { deleted[storage.SeriesRef(series.ref)] = struct{}{} series.lset.Range(func(l labels.Label) { affected[l] = struct{}{} }) + + // Acquire hashShard lock if it differs from refShard to safely access hashes[hashShard]. + if hashShard != refShard { + h.series.locks[hashShard].Lock() + } h.series.hashes[hashShard].del(hash, series.ref) delete(h.series.series[refShard], series.ref) + if hashShard != refShard { + h.series.locks[hashShard].Unlock() + } h.series.locks[refShard].Unlock() } From 8a024775930d1a2fedc9bae0f448421daf3abf0e Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 28 Jan 2026 11:24:12 +0100 Subject: [PATCH 2/2] Add test Signed-off-by: Arve Knudsen --- tsdb/head_test.go | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 493f938860..00674cad62 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -7179,3 +7179,50 @@ func TestHistogramStalenessConversionMetrics(t *testing.T) { }) } } + +// TestHead_RaceDeleteSeriesByID tests for a data race in deleteSeriesByID +// when hashShard != refShard. The bug is that deleteSeriesByID accesses +// hashes[hashShard] while only holding locks[refShard]. When these shards +// differ, this is a data race with concurrent operations on hashes[hashShard]. +// This test should be run with -race to detect the issue. +func TestHead_RaceDeleteSeriesByID(t *testing.T) { + // Use a small stripe size to increase the probability that hashShard != refShard. + opts := newTestHeadDefaultOptions(1000, false) + opts.StripeSize = 2 + + head, _ := newTestHeadWithOptions(t, compression.None, opts) + require.NoError(t, head.Init(0)) + + const numSeries = 1000 + + // Pre-create series to delete. + refs := make([]chunks.HeadSeriesRef, 0, numSeries) + app := head.Appender(t.Context()) + for i := range numSeries { + lset := labels.FromStrings("a", strconv.Itoa(i)) + ref, err := app.Append(0, lset, 100, float64(i)) + require.NoError(t, err) + refs = append(refs, chunks.HeadSeriesRef(ref)) + } + require.NoError(t, app.Commit()) + + // Goroutine creates new series concurrently (accesses hashes[] via getOrCreate). + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + app := head.Appender(t.Context()) + for i := numSeries; i < numSeries*3; i++ { + lset := labels.FromStrings("b", strconv.Itoa(i)) + _, _ = app.Append(0, lset, 100, float64(i)) + } + _ = app.Commit() + }() + + // Delete pre-created series concurrently (accesses hashes[] via del). + for _, ref := range refs { + head.deleteSeriesByID([]chunks.HeadSeriesRef{ref}) + } + + wg.Wait() +}