diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index a4b05d387a..1386a64aec 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -200,7 +200,6 @@ func TestBasicContentNegotiation(t *testing.T) { } func TestSampleDelivery(t *testing.T) { - t.Parallel() // Let's create an even number of send batches, so we don't run into the // batch timeout case. n := 3 @@ -409,7 +408,6 @@ func TestWALMetadataDelivery(t *testing.T) { } func TestSampleDeliveryTimeout(t *testing.T) { - t.Parallel() for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} { t.Run(fmt.Sprint(protoMsg), func(t *testing.T) { // Let's send one less sample than batch size, and wait the timeout duration @@ -2038,7 +2036,6 @@ func TestIsSampleOld(t *testing.T) { // Simulates scenario in which remote write endpoint is down and a subset of samples is dropped due to age limit while backoffing. func TestSendSamplesWithBackoffWithSampleAgeLimit(t *testing.T) { - t.Parallel() for _, protoMsg := range []remoteapi.WriteMessageType{remoteapi.WriteV1MessageType, remoteapi.WriteV2MessageType} { t.Run(fmt.Sprint(protoMsg), func(t *testing.T) { maxSamplesPerSend := 10 diff --git a/tsdb/agent/db.go b/tsdb/agent/db.go index 460ceb7c04..3f79d9176a 100644 --- a/tsdb/agent/db.go +++ b/tsdb/agent/db.go @@ -490,7 +490,7 @@ func (db *DB) loadWAL(r *wlog.Reader, multiRef map[chunks.HeadSeriesRef]chunks.H return } decoded <- series - case record.Samples: + case record.Samples, record.SamplesV2: samples := db.walReplaySamplesPool.Get()[:0] samples, err = dec.Samples(rec, samples) if err != nil { @@ -710,7 +710,7 @@ func (db *DB) truncate(mint int64) error { db.metrics.checkpointCreationTotal.Inc() - if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, db.keepSeriesInWALCheckpointFn(last), mint); err != nil { + if _, err = wlog.Checkpoint(db.logger, db.wal, first, last, db.keepSeriesInWALCheckpointFn(last), mint, db.opts.EnableSTStorage); err != nil { db.metrics.checkpointCreationFail.Inc() var cerr *wlog.CorruptionErr if errors.As(err, &cerr) { @@ -1156,7 +1156,7 @@ func (a *appenderBase) log() error { a.mtx.RLock() defer a.mtx.RUnlock() - var encoder record.Encoder + encoder := record.Encoder{EnableSTStorage: a.opts.EnableSTStorage} buf := a.bufPool.Get().([]byte) defer func() { a.bufPool.Put(buf) //nolint:staticcheck @@ -1280,7 +1280,7 @@ func (a *appenderBase) logSeries() error { a.bufPool.Put(buf) //nolint:staticcheck }() - var encoder record.Encoder + encoder := record.Encoder{EnableSTStorage: a.opts.EnableSTStorage} buf = encoder.Series(a.pendingSeries, buf) if err := a.wal.Log(buf); err != nil { return err diff --git a/tsdb/agent/db_append_v2_test.go b/tsdb/agent/db_append_v2_test.go index 3e10a1163b..4192e7d98d 100644 --- a/tsdb/agent/db_append_v2_test.go +++ b/tsdb/agent/db_append_v2_test.go @@ -18,6 +18,7 @@ import ( "fmt" "math" "path/filepath" + "strconv" "testing" "time" @@ -89,278 +90,269 @@ func TestDB_InvalidSeries_AppendV2(t *testing.T) { }) } -func TestCommit_AppendV2(t *testing.T) { +func TestCommitAppendV2(t *testing.T) { const ( numDatapoints = 1000 numHistograms = 100 numSeries = 8 ) + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + opts := DefaultOptions() + opts.EnableSTStorage = enableStStorage + s := createTestAgentDB(t, nil, opts) - s := createTestAgentDB(t, nil, DefaultOptions()) - app := s.AppenderV2(context.TODO()) + app := s.AppenderV2(context.TODO()) - lbls := labelsForTest(t.Name(), numSeries) - for _, l := range lbls { - lset := labels.New(l...) + lbls := labelsForTest(t.Name(), numSeries) + for _, l := range lbls { + lset := labels.New(l...) - for i := range numDatapoints { - sample := chunks.GenerateSamples(0, 1) - _, err := app.Append(0, lset, 0, sample[0].T(), sample[0].F(), nil, nil, storage.AOptions{ - Exemplars: []exemplar.Exemplar{{ - Labels: lset, - Ts: sample[0].T() + int64(i), - Value: sample[0].F(), - HasTs: true, - }}, - }) + for i := range numDatapoints { + sample := chunks.GenerateSamples(0, 1) + _, err := app.Append(0, lset, int64(i), sample[0].T()+2000, sample[0].F(), nil, nil, storage.AOptions{ + Exemplars: []exemplar.Exemplar{{ + Labels: lset, + Ts: sample[0].T() + int64(i) + 2000, + Value: sample[0].F(), + HasTs: true, + }}, + }) + require.NoError(t, err) + } + } + + lbls = labelsForTest(t.Name()+"_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) + + histograms := tsdbutil.GenerateTestHistograms(numHistograms) + + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{}) + require.NoError(t, err) + } + } + + lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) + + customBucketHistograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms) + + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, customBucketHistograms[i], nil, storage.AOptions{}) + require.NoError(t, err) + } + } + + lbls = labelsForTest(t.Name()+"_float_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) + + floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms) + + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{}) + require.NoError(t, err) + } + } + + lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) + + customBucketFloatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms) + + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, customBucketFloatHistograms[i], storage.AOptions{}) + require.NoError(t, err) + } + } + + require.NoError(t, app.Commit()) + require.NoError(t, s.Close()) + + sr, err := wlog.NewSegmentsReader(s.wal.Dir()) require.NoError(t, err) - } + defer func() { + require.NoError(t, sr.Close()) + }() + + // Read records from WAL and check for expected count of series, samples, and exemplars. + var ( + r = wlog.NewReader(sr) + dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) + + walSeriesCount, walSamplesCount, walExemplarsCount, walHistogramCount, walFloatHistogramCount int + ) + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + var series []record.RefSeries + series, err = dec.Series(rec, series) + require.NoError(t, err) + walSeriesCount += len(series) + + case record.Samples, record.SamplesV2: + var samples []record.RefSample + samples, err = dec.Samples(rec, samples) + require.NoError(t, err) + walSamplesCount += len(samples) + + case record.HistogramSamples, record.CustomBucketsHistogramSamples: + var histograms []record.RefHistogramSample + histograms, err = dec.HistogramSamples(rec, histograms) + require.NoError(t, err) + walHistogramCount += len(histograms) + + case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: + var floatHistograms []record.RefFloatHistogramSample + floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms) + require.NoError(t, err) + walFloatHistogramCount += len(floatHistograms) + + case record.Exemplars: + var exemplars []record.RefExemplar + exemplars, err = dec.Exemplars(rec, exemplars) + require.NoError(t, err) + walExemplarsCount += len(exemplars) + + default: + } + } + + // Check that the WAL contained the same number of committed series/samples/exemplars. + require.Equal(t, numSeries*5, walSeriesCount, "unexpected number of series") + require.Equal(t, numSeries*numDatapoints, walSamplesCount, "unexpected number of samples") + require.Equal(t, numSeries*numDatapoints, walExemplarsCount, "unexpected number of exemplars") + require.Equal(t, numSeries*numHistograms*2, walHistogramCount, "unexpected number of histograms") + require.Equal(t, numSeries*numHistograms*2, walFloatHistogramCount, "unexpected number of float histograms") + + // Check that we can still create both kinds of Appender - see https://github.com/prometheus/prometheus/issues/17800. + _ = s.Appender(context.TODO()) + _ = s.AppenderV2(context.TODO()) + }) } - - lbls = labelsForTest(t.Name()+"_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) - - histograms := tsdbutil.GenerateTestHistograms(numHistograms) - - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{}) - require.NoError(t, err) - } - } - - lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) - - customBucketHistograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms) - - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, customBucketHistograms[i], nil, storage.AOptions{}) - require.NoError(t, err) - } - } - - lbls = labelsForTest(t.Name()+"_float_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) - - floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms) - - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{}) - require.NoError(t, err) - } - } - - lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) - - customBucketFloatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms) - - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, nil, customBucketFloatHistograms[i], storage.AOptions{}) - require.NoError(t, err) - } - } - - require.NoError(t, app.Commit()) - require.NoError(t, s.Close()) - - sr, err := wlog.NewSegmentsReader(s.wal.Dir()) - require.NoError(t, err) - defer func() { - require.NoError(t, sr.Close()) - }() - - // Read records from WAL and check for expected count of series, samples, and exemplars. - var ( - r = wlog.NewReader(sr) - dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) - - walSeriesCount, walSamplesCount, walExemplarsCount, walHistogramCount, walFloatHistogramCount int - ) - for r.Next() { - rec := r.Record() - switch dec.Type(rec) { - case record.Series: - var series []record.RefSeries - series, err = dec.Series(rec, series) - require.NoError(t, err) - walSeriesCount += len(series) - - case record.Samples: - var samples []record.RefSample - samples, err = dec.Samples(rec, samples) - require.NoError(t, err) - walSamplesCount += len(samples) - - case record.HistogramSamples, record.CustomBucketsHistogramSamples: - var histograms []record.RefHistogramSample - histograms, err = dec.HistogramSamples(rec, histograms) - require.NoError(t, err) - walHistogramCount += len(histograms) - - case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: - var floatHistograms []record.RefFloatHistogramSample - floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms) - require.NoError(t, err) - walFloatHistogramCount += len(floatHistograms) - - case record.Exemplars: - var exemplars []record.RefExemplar - exemplars, err = dec.Exemplars(rec, exemplars) - require.NoError(t, err) - walExemplarsCount += len(exemplars) - - default: - } - } - - // Check that the WAL contained the same number of committed series/samples/exemplars. - require.Equal(t, numSeries*5, walSeriesCount, "unexpected number of series") - require.Equal(t, numSeries*numDatapoints, walSamplesCount, "unexpected number of samples") - require.Equal(t, numSeries*numDatapoints, walExemplarsCount, "unexpected number of exemplars") - require.Equal(t, numSeries*numHistograms*2, walHistogramCount, "unexpected number of histograms") - require.Equal(t, numSeries*numHistograms*2, walFloatHistogramCount, "unexpected number of float histograms") - - // Check that we can still create both kinds of Appender - see https://github.com/prometheus/prometheus/issues/17800. - _ = s.Appender(context.TODO()) - _ = s.AppenderV2(context.TODO()) } -func TestRollback_AppendV2(t *testing.T) { +func TestRollbackAppendV2(t *testing.T) { const ( numDatapoints = 1000 numHistograms = 100 numSeries = 8 ) - s := createTestAgentDB(t, nil, DefaultOptions()) - app := s.AppenderV2(context.TODO()) + for _, enableStStorage := range []bool{false, true} { + opts := DefaultOptions() + opts.EnableSTStorage = enableStStorage + s := createTestAgentDB(t, nil, opts) + app := s.AppenderV2(context.TODO()) - lbls := labelsForTest(t.Name(), numSeries) - for _, l := range lbls { - lset := labels.New(l...) + lbls := labelsForTest(t.Name(), numSeries) + for _, l := range lbls { + lset := labels.New(l...) - for range numDatapoints { - sample := chunks.GenerateSamples(0, 1) - _, err := app.Append(0, lset, 0, sample[0].T(), sample[0].F(), nil, nil, storage.AOptions{}) - require.NoError(t, err) + for i := range numDatapoints { + sample := chunks.GenerateSamples(0, 1) + _, err := app.Append(0, lset, int64(i), sample[0].T()+2000, sample[0].F(), nil, nil, storage.AOptions{}) + require.NoError(t, err) + } } - } - lbls = labelsForTest(t.Name()+"_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) + lbls = labelsForTest(t.Name()+"_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) - histograms := tsdbutil.GenerateTestHistograms(numHistograms) + histograms := tsdbutil.GenerateTestHistograms(numHistograms) - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{}) - require.NoError(t, err) + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{}) + require.NoError(t, err) + } } - } - lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) + lbls = labelsForTest(t.Name()+"_custom_buckets_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) - histograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms) + histograms := tsdbutil.GenerateTestCustomBucketsHistograms(numHistograms) - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, histograms[i], nil, storage.AOptions{}) - require.NoError(t, err) + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, histograms[i], nil, storage.AOptions{}) + require.NoError(t, err) + } } - } - lbls = labelsForTest(t.Name()+"_float_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) + lbls = labelsForTest(t.Name()+"_float_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) - floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms) + floatHistograms := tsdbutil.GenerateTestFloatHistograms(numHistograms) - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{}) - require.NoError(t, err) + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{}) + require.NoError(t, err) + } } - } - lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries) - for _, l := range lbls { - lset := labels.New(l...) + lbls = labelsForTest(t.Name()+"_custom_buckets_float_histogram", numSeries) + for _, l := range lbls { + lset := labels.New(l...) - floatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms) + floatHistograms := tsdbutil.GenerateTestCustomBucketsFloatHistograms(numHistograms) - for i := range numHistograms { - _, err := app.Append(0, lset, 0, int64(i), 0, nil, floatHistograms[i], storage.AOptions{}) - require.NoError(t, err) + for i := range numHistograms { + _, err := app.Append(0, lset, int64(i), int64(i+2000), 0, nil, floatHistograms[i], storage.AOptions{}) + require.NoError(t, err) + } } - } - // Do a rollback, which should clear uncommitted data. A followup call to - // commit should persist nothing to the WAL. - require.NoError(t, app.Rollback()) - require.NoError(t, app.Commit()) - require.NoError(t, s.Close()) + // Do a rollback, which should clear uncommitted data. A followup call to + // commit should persist nothing to the WAL. + require.NoError(t, app.Rollback()) + require.NoError(t, app.Commit()) + require.NoError(t, s.Close()) - sr, err := wlog.NewSegmentsReader(s.wal.Dir()) - require.NoError(t, err) - defer func() { - require.NoError(t, sr.Close()) - }() + sr, err := wlog.NewSegmentsReader(s.wal.Dir()) + require.NoError(t, err) + defer func() { + require.NoError(t, sr.Close()) + }() - // Read records from WAL and check for expected count of series and samples. - var ( - r = wlog.NewReader(sr) - dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) + // Read records from WAL and check for expected count of series and samples. + var ( + r = wlog.NewReader(sr) + dec = record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) - walSeriesCount, walSamplesCount, walHistogramCount, walFloatHistogramCount, walExemplarsCount int - ) - for r.Next() { - rec := r.Record() - switch dec.Type(rec) { - case record.Series: - var series []record.RefSeries - series, err = dec.Series(rec, series) - require.NoError(t, err) - walSeriesCount += len(series) + walSeriesCount int + ) + for r.Next() { + rec := r.Record() + switch dec.Type(rec) { + case record.Series: + var series []record.RefSeries + series, err = dec.Series(rec, series) + require.NoError(t, err) + walSeriesCount += len(series) - case record.Samples: - var samples []record.RefSample - samples, err = dec.Samples(rec, samples) - require.NoError(t, err) - walSamplesCount += len(samples) + case record.Samples, record.SamplesV2: + t.Errorf("should not have found samples") - case record.Exemplars: - var exemplars []record.RefExemplar - exemplars, err = dec.Exemplars(rec, exemplars) - require.NoError(t, err) - walExemplarsCount += len(exemplars) + case record.Exemplars: + t.Errorf("should not have found exemplars") - case record.HistogramSamples, record.CustomBucketsHistogramSamples: - var histograms []record.RefHistogramSample - histograms, err = dec.HistogramSamples(rec, histograms) - require.NoError(t, err) - walHistogramCount += len(histograms) + case record.HistogramSamples, record.CustomBucketsHistogramSamples, record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: + t.Errorf("should not have found histograms") - case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: - var floatHistograms []record.RefFloatHistogramSample - floatHistograms, err = dec.FloatHistogramSamples(rec, floatHistograms) - require.NoError(t, err) - walFloatHistogramCount += len(floatHistograms) - - default: + default: + } } - } - // Check that only series get stored after calling Rollback. - require.Equal(t, numSeries*5, walSeriesCount, "series should have been written to WAL") - require.Equal(t, 0, walSamplesCount, "samples should not have been written to WAL") - require.Equal(t, 0, walExemplarsCount, "exemplars should not have been written to WAL") - require.Equal(t, 0, walHistogramCount, "histograms should not have been written to WAL") - require.Equal(t, 0, walFloatHistogramCount, "float histograms should not have been written to WAL") + // Check that only series get stored after calling Rollback. + require.Equal(t, numSeries*5, walSeriesCount, "series should have been written to WAL") + } } func TestFullTruncateWAL_AppendV2(t *testing.T) { diff --git a/tsdb/agent/db_test.go b/tsdb/agent/db_test.go index 31e309d3fd..2f8212ff7a 100644 --- a/tsdb/agent/db_test.go +++ b/tsdb/agent/db_test.go @@ -225,7 +225,7 @@ func TestCommit(t *testing.T) { require.NoError(t, err) walSeriesCount += len(series) - case record.Samples: + case record.Samples, record.SamplesV2: var samples []record.RefSample samples, err = dec.Samples(rec, samples) require.NoError(t, err) @@ -361,7 +361,7 @@ func TestRollback(t *testing.T) { require.NoError(t, err) walSeriesCount += len(series) - case record.Samples: + case record.Samples, record.SamplesV2: var samples []record.RefSample samples, err = dec.Samples(rec, samples) require.NoError(t, err) @@ -1344,7 +1344,7 @@ func readWALSamples(t *testing.T, walDir string) []walSample { series, err := dec.Series(rec, nil) require.NoError(t, err) lastSeries = series[0] - case record.Samples: + case record.Samples, record.SamplesV2: samples, err = dec.Samples(rec, samples[:0]) require.NoError(t, err) for _, s := range samples { diff --git a/tsdb/compression/compression.go b/tsdb/compression/compression.go new file mode 100644 index 0000000000..147a526f7e --- /dev/null +++ b/tsdb/compression/compression.go @@ -0,0 +1,130 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package compression + +import ( + "errors" + "fmt" + + "github.com/golang/snappy" + "github.com/klauspost/compress/zstd" +) + +// Type represents the compression type used for encoding and decoding data. +type Type string + +const ( + // None represents no compression case. + // None it's a default when Type is empty. + None Type = "none" + // Snappy represents snappy block format. + Snappy Type = "snappy" + // Zstd represents zstd compression. + Zstd Type = "zstd" +) + +// Encoder provides compression encoding functionality for supported compression +// types. It is agnostic to the content being compressed, operating on byte +// slices of serialized data streams. The encoder maintains internal state for +// Zstd compression and can handle multiple compression types including None, +// Snappy, and Zstd. +type Encoder struct { + w *zstd.Encoder +} + +// NewEncoder creates a new Encoder. Returns an error if the zstd encoder cannot +// be initialized. +func NewEncoder() (*Encoder, error) { + e := &Encoder{} + w, err := zstd.NewWriter(nil) + if err != nil { + return nil, err + } + e.w = w + return e, nil +} + +// Encode returns the encoded form of src for the given compression type. It also +// returns the indicator if the compression was performed. Encode may skip +// compressing for None type, but also when src is too large e.g. for Snappy block format. +// +// The buf is used as a buffer for returned encoding, and it must not overlap with +// src. It is valid to pass a nil buf. +func (e *Encoder) Encode(t Type, src, buf []byte) (_ []byte, compressed bool, err error) { + switch { + case len(src) == 0, t == "", t == None: + return src, false, nil + case t == Snappy: + // If MaxEncodedLen is less than 0 the record is too large to be compressed. + if snappy.MaxEncodedLen(len(src)) < 0 { + return src, false, nil + } + + // The snappy library uses `len` to calculate if we need a new buffer. + // In order to allocate as few buffers as possible make the length + // equal to the capacity. + buf = buf[:cap(buf)] + return snappy.Encode(buf, src), true, nil + case t == Zstd: + if e == nil { + return nil, false, errors.New("zstd requested but encoder was not initialized with NewEncoder()") + } + return e.w.EncodeAll(src, buf[:0]), true, nil + default: + return nil, false, fmt.Errorf("unsupported compression type: %s", t) + } +} + +// Decoder provides decompression functionality for supported compression types. +// It is agnostic to the content being decompressed, operating on byte slices of +// serialized data streams. The decoder maintains internal state for Zstd +// decompression and can handle multiple compression types including None, +// Snappy, and Zstd. +type Decoder struct { + r *zstd.Decoder +} + +// NewDecoder creates a new Decoder. +func NewDecoder() *Decoder { + d := &Decoder{} + + // Calling zstd.NewReader with a nil io.Reader and no options cannot return an error. + r, _ := zstd.NewReader(nil) + d.r = r + return d +} + +// Decode returns the decoded form of src or error, given expected compression type. +// +// The buf is used as a buffer for the returned decoded entry, and it must not +// overlap with src. It is valid to pass a nil buf. +func (d *Decoder) Decode(t Type, src, buf []byte) (_ []byte, err error) { + switch { + case len(src) == 0, t == "", t == None: + return src, nil + case t == Snappy: + // The snappy library uses `len` to calculate if we need a new buffer. + // In order to allocate as few buffers as possible make the length + // equal to the capacity. + buf = buf[:cap(buf)] + return snappy.Decode(buf, src) + case t == Zstd: + if d == nil { + return nil, errors.New("zstd requested but Decoder was not initialized with NewDecoder()") + } + return d.r.DecodeAll(src, buf[:0]) + default: + return nil, fmt.Errorf("unsupported compression type: %s", t) + } +} diff --git a/tsdb/db_append_v2_test.go b/tsdb/db_append_v2_test.go index 16134e8c93..e81bc80227 100644 --- a/tsdb/db_append_v2_test.go +++ b/tsdb/db_append_v2_test.go @@ -193,7 +193,7 @@ func TestDataNotAvailableAfterRollback_AppendV2(t *testing.T) { require.NoError(t, err) walSeriesCount += len(series) - case record.Samples: + case record.Samples, record.SamplesV2: var samples []record.RefSample samples, err = dec.Samples(rec, samples) require.NoError(t, err) @@ -968,16 +968,18 @@ func TestWALReplayRaceOnSamplesLoggedBeforeSeries_AppendV2(t *testing.T) { // We test both with few and many samples appended after series creation. If samples are < 120 then there's no // mmap-ed chunk, otherwise there's at least 1 mmap-ed chunk when replaying the WAL. - for _, numSamplesAfterSeriesCreation := range []int{1, 1000} { - for run := 1; run <= numRuns; run++ { - t.Run(fmt.Sprintf("samples after series creation = %d, run = %d", numSamplesAfterSeriesCreation, run), func(t *testing.T) { - testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation) - }) + for _, enableStStorage := range []bool{false, true} { + for _, numSamplesAfterSeriesCreation := range []int{1, 1000} { + for run := 1; run <= numRuns; run++ { + t.Run(fmt.Sprintf("samples after series creation = %d, run = %d, stStorage = %v", numSamplesAfterSeriesCreation, run, enableStStorage), func(t *testing.T) { + testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation, enableStStorage) + }) + } } } } -func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int) { +func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int, enableStStorage bool) { const numSeries = 1000 db := newTestDB(t) @@ -985,7 +987,7 @@ func testWALReplayRaceOnSamplesLoggedBeforeSeriesAppendV2(t *testing.T, numSampl for seriesRef := 1; seriesRef <= numSeries; seriesRef++ { // Log samples before the series is logged to the WAL. - var enc record.Encoder + enc := record.Encoder{EnableSTStorage: enableStStorage} var samples []record.RefSample for ts := range numSamplesBeforeSeriesCreation { @@ -1176,139 +1178,143 @@ func TestTombstoneCleanResultEmptyBlock_AppendV2(t *testing.T) { func TestSizeRetention_AppendV2(t *testing.T) { t.Parallel() - opts := DefaultOptions() - opts.OutOfOrderTimeWindow = 100 - db := newTestDB(t, withOpts(opts), withRngs(100)) + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 100 + db := newTestDB(t, withOpts(opts), withRngs(100)) - blocks := []*BlockMeta{ - {MinTime: 100, MaxTime: 200}, // Oldest block - {MinTime: 200, MaxTime: 300}, - {MinTime: 300, MaxTime: 400}, - {MinTime: 400, MaxTime: 500}, - {MinTime: 500, MaxTime: 600}, // Newest Block - } + blocks := []*BlockMeta{ + {MinTime: 100, MaxTime: 200}, // Oldest block + {MinTime: 200, MaxTime: 300}, + {MinTime: 300, MaxTime: 400}, + {MinTime: 400, MaxTime: 500}, + {MinTime: 500, MaxTime: 600}, // Newest Block + } - for _, m := range blocks { - createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime)) - } + for _, m := range blocks { + createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime)) + } - headBlocks := []*BlockMeta{ - {MinTime: 700, MaxTime: 800}, - } + headBlocks := []*BlockMeta{ + {MinTime: 700, MaxTime: 800}, + } - // Add some data to the WAL. - headApp := db.Head().AppenderV2(context.Background()) - var aSeries labels.Labels - var it chunkenc.Iterator - for _, m := range headBlocks { - series := genSeries(100, 10, m.MinTime, m.MaxTime+1) - for _, s := range series { - aSeries = s.Labels() - it = s.Iterator(it) - for it.Next() == chunkenc.ValFloat { - tim, v := it.At() - _, err := headApp.Append(0, s.Labels(), 0, tim, v, nil, nil, storage.AOptions{}) + // Add some data to the WAL. + headApp := db.Head().AppenderV2(context.Background()) + var aSeries labels.Labels + var it chunkenc.Iterator + for _, m := range headBlocks { + series := genSeries(100, 10, m.MinTime, m.MaxTime+1) + for _, s := range series { + aSeries = s.Labels() + it = s.Iterator(it) + for it.Next() == chunkenc.ValFloat { + tim, v := it.At() + _, err := headApp.Append(0, s.Labels(), 0, tim, v, nil, nil, storage.AOptions{}) + require.NoError(t, err) + } + require.NoError(t, it.Err()) + } + } + require.NoError(t, headApp.Commit()) + db.Head().mmapHeadChunks() + + require.Eventually(t, func() bool { + return db.Head().chunkDiskMapper.IsQueueEmpty() + }, 2*time.Second, 100*time.Millisecond) + + // Test that registered size matches the actual disk size. + require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. + require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered. + blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. + walSize, err := db.Head().wal.Size() + require.NoError(t, err) + cdmSize, err := db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + // Expected size should take into account block size + WAL size + Head + // chunks size + expSize := blockSize + walSize + cdmSize + actSize, err := fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Create a WAL checkpoint, and compare sizes. + first, last, err := wlog.Segments(db.Head().wal.Dir()) + require.NoError(t, err) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0, enableStStorage) + require.NoError(t, err) + blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. + walSize, err = db.Head().wal.Size() + require.NoError(t, err) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + expSize = blockSize + walSize + cdmSize + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Truncate Chunk Disk Mapper and compare sizes. + require.NoError(t, db.Head().chunkDiskMapper.Truncate(900)) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + expSize = blockSize + walSize + cdmSize + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Add some out of order samples to check the size of WBL. + headApp = db.Head().AppenderV2(context.Background()) + for ts := int64(750); ts < 800; ts++ { + _, err := headApp.Append(0, aSeries, 0, ts, float64(ts), nil, nil, storage.AOptions{}) require.NoError(t, err) } - require.NoError(t, it.Err()) - } + require.NoError(t, headApp.Commit()) + + walSize, err = db.Head().wal.Size() + require.NoError(t, err) + wblSize, err := db.Head().wbl.Size() + require.NoError(t, err) + require.NotZero(t, wblSize) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + expSize = blockSize + walSize + wblSize + cdmSize + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Decrease the max bytes limit so that a delete is triggered. + // Check total size, total count and check that the oldest block was deleted. + firstBlockSize := db.Blocks()[0].Size() + sizeLimit := actSize - firstBlockSize + db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size. + require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. + + expBlocks := blocks[1:] + actBlocks := db.Blocks() + blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) + walSize, err = db.Head().wal.Size() + require.NoError(t, err) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + // Expected size should take into account block size + WAL size + WBL size + expSize = blockSize + walSize + wblSize + cdmSize + actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount)) + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + + require.Equal(t, 1, actRetentionCount, "metric retention count mismatch") + require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size") + require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit) + require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1) + require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block") + require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block") + }) } - require.NoError(t, headApp.Commit()) - db.Head().mmapHeadChunks() - - require.Eventually(t, func() bool { - return db.Head().chunkDiskMapper.IsQueueEmpty() - }, 2*time.Second, 100*time.Millisecond) - - // Test that registered size matches the actual disk size. - require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. - require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered. - blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. - walSize, err := db.Head().wal.Size() - require.NoError(t, err) - cdmSize, err := db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - // Expected size should take into account block size + WAL size + Head - // chunks size - expSize := blockSize + walSize + cdmSize - actSize, err := fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Create a WAL checkpoint, and compare sizes. - first, last, err := wlog.Segments(db.Head().wal.Dir()) - require.NoError(t, err) - _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0) - require.NoError(t, err) - blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. - walSize, err = db.Head().wal.Size() - require.NoError(t, err) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - expSize = blockSize + walSize + cdmSize - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Truncate Chunk Disk Mapper and compare sizes. - require.NoError(t, db.Head().chunkDiskMapper.Truncate(900)) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - expSize = blockSize + walSize + cdmSize - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Add some out of order samples to check the size of WBL. - headApp = db.Head().AppenderV2(context.Background()) - for ts := int64(750); ts < 800; ts++ { - _, err := headApp.Append(0, aSeries, 0, ts, float64(ts), nil, nil, storage.AOptions{}) - require.NoError(t, err) - } - require.NoError(t, headApp.Commit()) - - walSize, err = db.Head().wal.Size() - require.NoError(t, err) - wblSize, err := db.Head().wbl.Size() - require.NoError(t, err) - require.NotZero(t, wblSize) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - expSize = blockSize + walSize + wblSize + cdmSize - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Decrease the max bytes limit so that a delete is triggered. - // Check total size, total count and check that the oldest block was deleted. - firstBlockSize := db.Blocks()[0].Size() - sizeLimit := actSize - firstBlockSize - db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size. - require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. - - expBlocks := blocks[1:] - actBlocks := db.Blocks() - blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) - walSize, err = db.Head().wal.Size() - require.NoError(t, err) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - // Expected size should take into account block size + WAL size + WBL size - expSize = blockSize + walSize + wblSize + cdmSize - actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount)) - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - - require.Equal(t, 1, actRetentionCount, "metric retention count mismatch") - require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size") - require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit) - require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1) - require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block") - require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block") } func TestNotMatcherSelectsLabelsUnsetSeries_AppendV2(t *testing.T) { @@ -1499,33 +1505,36 @@ func TestInitializeHeadTimestamp_AppendV2(t *testing.T) { require.Equal(t, int64(1000), db.head.MaxTime()) require.True(t, db.head.initialized()) }) - t.Run("wal-only", func(t *testing.T) { - dir := t.TempDir() - require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) - w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) - require.NoError(t, err) + for _, enableStStorage := range []bool{false, true} { + t.Run("wal-only-st-"+strconv.FormatBool(enableStStorage), func(t *testing.T) { + dir := t.TempDir() - var enc record.Encoder - err = w.Log( - enc.Series([]record.RefSeries{ - {Ref: 123, Labels: labels.FromStrings("a", "1")}, - {Ref: 124, Labels: labels.FromStrings("a", "2")}, - }, nil), - enc.Samples([]record.RefSample{ - {Ref: 123, T: 5000, V: 1}, - {Ref: 124, T: 15000, V: 1}, - }, nil), - ) - require.NoError(t, err) - require.NoError(t, w.Close()) + require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) + w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) + require.NoError(t, err) - db := newTestDB(t, withDir(dir)) + enc := record.Encoder{EnableSTStorage: enableStStorage} + err = w.Log( + enc.Series([]record.RefSeries{ + {Ref: 123, Labels: labels.FromStrings("a", "1")}, + {Ref: 124, Labels: labels.FromStrings("a", "2")}, + }, nil), + enc.Samples([]record.RefSample{ + {Ref: 123, T: 5000, V: 1}, + {Ref: 124, T: 15000, V: 1}, + }, nil), + ) + require.NoError(t, err) + require.NoError(t, w.Close()) - require.Equal(t, int64(5000), db.head.MinTime()) - require.Equal(t, int64(15000), db.head.MaxTime()) - require.True(t, db.head.initialized()) - }) + db := newTestDB(t, withDir(dir)) + + require.Equal(t, int64(5000), db.head.MinTime()) + require.Equal(t, int64(15000), db.head.MaxTime()) + require.True(t, db.head.initialized()) + }) + } t.Run("existing-block", func(t *testing.T) { dir := t.TempDir() @@ -1537,37 +1546,39 @@ func TestInitializeHeadTimestamp_AppendV2(t *testing.T) { require.Equal(t, int64(2000), db.head.MaxTime()) require.True(t, db.head.initialized()) }) - t.Run("existing-block-and-wal", func(t *testing.T) { - dir := t.TempDir() + for _, enableStStorage := range []bool{false, true} { + t.Run("existing-block-and-wal-st-"+strconv.FormatBool(enableStStorage), func(t *testing.T) { + dir := t.TempDir() - createBlock(t, dir, genSeries(1, 1, 1000, 6000)) + createBlock(t, dir, genSeries(1, 1, 1000, 6000)) - require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) - w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) - require.NoError(t, err) + require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) + w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) + require.NoError(t, err) - var enc record.Encoder - err = w.Log( - enc.Series([]record.RefSeries{ - {Ref: 123, Labels: labels.FromStrings("a", "1")}, - {Ref: 124, Labels: labels.FromStrings("a", "2")}, - }, nil), - enc.Samples([]record.RefSample{ - {Ref: 123, T: 5000, V: 1}, - {Ref: 124, T: 15000, V: 1}, - }, nil), - ) - require.NoError(t, err) - require.NoError(t, w.Close()) + enc := record.Encoder{EnableSTStorage: enableStStorage} + err = w.Log( + enc.Series([]record.RefSeries{ + {Ref: 123, Labels: labels.FromStrings("a", "1")}, + {Ref: 124, Labels: labels.FromStrings("a", "2")}, + }, nil), + enc.Samples([]record.RefSample{ + {Ref: 123, T: 5000, V: 1}, + {Ref: 124, T: 15000, V: 1}, + }, nil), + ) + require.NoError(t, err) + require.NoError(t, w.Close()) - db := newTestDB(t, withDir(dir)) + db := newTestDB(t, withDir(dir)) - require.Equal(t, int64(6000), db.head.MinTime()) - require.Equal(t, int64(15000), db.head.MaxTime()) - require.True(t, db.head.initialized()) - // Check that old series has been GCed. - require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series)) - }) + require.Equal(t, int64(6000), db.head.MinTime()) + require.Equal(t, int64(15000), db.head.MaxTime()) + require.True(t, db.head.initialized()) + // Check that old series has been GCed. + require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series)) + }) + } } func TestNoEmptyBlocks_AppendV2(t *testing.T) { @@ -3273,7 +3284,7 @@ func testOOOWALWriteAppendV2(t *testing.T, series, err := dec.Series(rec, nil) require.NoError(t, err) records = append(records, series) - case record.Samples: + case record.Samples, record.SamplesV2: samples, err := dec.Samples(rec, nil) require.NoError(t, err) records = append(records, samples) @@ -3430,112 +3441,116 @@ func TestMetadataInWAL_AppenderV2(t *testing.T) { } func TestMetadataCheckpointingOnlyKeepsLatestEntry_AppendV2(t *testing.T) { - ctx := context.Background() - numSamples := 10000 - hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false) - hb.opts.EnableMetadataWALRecords = true + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + ctx := context.Background() + numSamples := 10000 + hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false) + hb.opts.EnableMetadataWALRecords = true - // Add some series so we can append metadata to them. - s1 := labels.FromStrings("a", "b") - s2 := labels.FromStrings("c", "d") - s3 := labels.FromStrings("e", "f") - s4 := labels.FromStrings("g", "h") + // Add some series so we can append metadata to them. + s1 := labels.FromStrings("a", "b") + s2 := labels.FromStrings("c", "d") + s3 := labels.FromStrings("e", "f") + s4 := labels.FromStrings("g", "h") - m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"} - m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"} - m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"} - m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"} + m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"} + m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"} + m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"} + m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"} - app := hb.AppenderV2(ctx) - ts := int64(0) - _, err := app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m1}) - require.NoError(t, err) - _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2}) - require.NoError(t, err) - _, err = app.Append(0, s3, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m3}) - require.NoError(t, err) - _, err = app.Append(0, s4, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m4}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + app := hb.AppenderV2(ctx) + ts := int64(0) + _, err := app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m1}) + require.NoError(t, err) + _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2}) + require.NoError(t, err) + _, err = app.Append(0, s3, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m3}) + require.NoError(t, err) + _, err = app.Append(0, s4, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m4}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - // Update metadata for first series. - m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"} - app = hb.AppenderV2(ctx) - ts++ - _, err = app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m5}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + // Update metadata for first series. + m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"} + app = hb.AppenderV2(ctx) + ts++ + _, err = app.Append(0, s1, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m5}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - // Switch back-and-forth metadata for second series. - // Since it ended on a new metadata record, we expect a single new entry. - m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"} + // Switch back-and-forth metadata for second series. + // Since it ended on a new metadata record, we expect a single new entry. + m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"} - app = hb.AppenderV2(ctx) - ts++ - _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + app = hb.AppenderV2(ctx) + ts++ + _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - app = hb.AppenderV2(ctx) - ts++ - _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + app = hb.AppenderV2(ctx) + ts++ + _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - app = hb.AppenderV2(ctx) - ts++ - _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + app = hb.AppenderV2(ctx) + ts++ + _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - app = hb.AppenderV2(ctx) - ts++ - _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + app = hb.AppenderV2(ctx) + ts++ + _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m2}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - app = hb.AppenderV2(ctx) - ts++ - _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6}) - require.NoError(t, err) - require.NoError(t, app.Commit()) + app = hb.AppenderV2(ctx) + ts++ + _, err = app.Append(0, s2, 0, ts, 0, nil, nil, storage.AOptions{Metadata: m6}) + require.NoError(t, err) + require.NoError(t, app.Commit()) - // Let's create a checkpoint. - first, last, err := wlog.Segments(w.Dir()) - require.NoError(t, err) - keep := func(id chunks.HeadSeriesRef) bool { - return id != 3 + // Let's create a checkpoint. + first, last, err := wlog.Segments(w.Dir()) + require.NoError(t, err) + keep := func(id chunks.HeadSeriesRef) bool { + return id != 3 + } + _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0, enableStStorage) + require.NoError(t, err) + + // Confirm there's been a checkpoint. + cdir, _, err := wlog.LastCheckpoint(w.Dir()) + require.NoError(t, err) + + // Read in checkpoint and WAL. + recs := readTestWAL(t, cdir) + var gotMetadataBlocks [][]record.RefMetadata + for _, rec := range recs { + if mr, ok := rec.([]record.RefMetadata); ok { + gotMetadataBlocks = append(gotMetadataBlocks, mr) + } + } + + // There should only be 1 metadata block present, with only the latest + // metadata kept around. + wantMetadata := []record.RefMetadata{ + {Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help}, + {Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help}, + {Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help}, + } + require.Len(t, gotMetadataBlocks, 1) + require.Len(t, gotMetadataBlocks[0], 3) + gotMetadataBlock := gotMetadataBlocks[0] + + sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref }) + require.Equal(t, wantMetadata, gotMetadataBlock) + require.NoError(t, hb.Close()) + }) } - _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0) - require.NoError(t, err) - - // Confirm there's been a checkpoint. - cdir, _, err := wlog.LastCheckpoint(w.Dir()) - require.NoError(t, err) - - // Read in checkpoint and WAL. - recs := readTestWAL(t, cdir) - var gotMetadataBlocks [][]record.RefMetadata - for _, rec := range recs { - if mr, ok := rec.([]record.RefMetadata); ok { - gotMetadataBlocks = append(gotMetadataBlocks, mr) - } - } - - // There should only be 1 metadata block present, with only the latest - // metadata kept around. - wantMetadata := []record.RefMetadata{ - {Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help}, - {Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help}, - {Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help}, - } - require.Len(t, gotMetadataBlocks, 1) - require.Len(t, gotMetadataBlocks[0], 3) - gotMetadataBlock := gotMetadataBlocks[0] - - sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref }) - require.Equal(t, wantMetadata, gotMetadataBlock) - require.NoError(t, hb.Close()) } func TestMetadataAssertInMemoryData_AppendV2(t *testing.T) { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 403ce3636a..4576e96061 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -394,7 +394,7 @@ func TestDataNotAvailableAfterRollback(t *testing.T) { require.NoError(t, err) walSeriesCount += len(series) - case record.Samples: + case record.Samples, record.SamplesV2: var samples []record.RefSample samples, err = dec.Samples(rec, samples) require.NoError(t, err) @@ -1169,24 +1169,25 @@ func TestWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T) { // We test both with few and many samples appended after series creation. If samples are < 120 then there's no // mmap-ed chunk, otherwise there's at least 1 mmap-ed chunk when replaying the WAL. - for _, numSamplesAfterSeriesCreation := range []int{1, 1000} { - for run := 1; run <= numRuns; run++ { - t.Run(fmt.Sprintf("samples after series creation = %d, run = %d", numSamplesAfterSeriesCreation, run), func(t *testing.T) { - testWALReplayRaceOnSamplesLoggedBeforeSeries(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation) - }) + for _, enableStStorage := range []bool{false, true} { + for _, numSamplesAfterSeriesCreation := range []int{1, 1000} { + for run := 1; run <= numRuns; run++ { + t.Run(fmt.Sprintf("samples after series creation = %d, run = %d, stStorage=%v", numSamplesAfterSeriesCreation, run, enableStStorage), func(t *testing.T) { + testWALReplayRaceOnSamplesLoggedBeforeSeries(t, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation, enableStStorage) + }) + } } } } -func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int) { +func testWALReplayRaceOnSamplesLoggedBeforeSeries(t *testing.T, numSamplesBeforeSeriesCreation, numSamplesAfterSeriesCreation int, enableStStorage bool) { const numSeries = 1000 - db := newTestDB(t) db.DisableCompactions() for seriesRef := 1; seriesRef <= numSeries; seriesRef++ { // Log samples before the series is logged to the WAL. - var enc record.Encoder + enc := record.Encoder{EnableSTStorage: enableStStorage} var samples []record.RefSample for ts := range numSamplesBeforeSeriesCreation { @@ -1550,139 +1551,143 @@ func TestRetentionDurationMetric(t *testing.T) { func TestSizeRetention(t *testing.T) { t.Parallel() - opts := DefaultOptions() - opts.OutOfOrderTimeWindow = 100 - db := newTestDB(t, withOpts(opts), withRngs(100)) + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + opts := DefaultOptions() + opts.OutOfOrderTimeWindow = 100 + db := newTestDB(t, withOpts(opts), withRngs(100)) - blocks := []*BlockMeta{ - {MinTime: 100, MaxTime: 200}, // Oldest block - {MinTime: 200, MaxTime: 300}, - {MinTime: 300, MaxTime: 400}, - {MinTime: 400, MaxTime: 500}, - {MinTime: 500, MaxTime: 600}, // Newest Block - } + blocks := []*BlockMeta{ + {MinTime: 100, MaxTime: 200}, // Oldest block + {MinTime: 200, MaxTime: 300}, + {MinTime: 300, MaxTime: 400}, + {MinTime: 400, MaxTime: 500}, + {MinTime: 500, MaxTime: 600}, // Newest Block + } - for _, m := range blocks { - createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime)) - } + for _, m := range blocks { + createBlock(t, db.Dir(), genSeries(100, 10, m.MinTime, m.MaxTime)) + } - headBlocks := []*BlockMeta{ - {MinTime: 700, MaxTime: 800}, - } + headBlocks := []*BlockMeta{ + {MinTime: 700, MaxTime: 800}, + } - // Add some data to the WAL. - headApp := db.Head().Appender(context.Background()) - var aSeries labels.Labels - var it chunkenc.Iterator - for _, m := range headBlocks { - series := genSeries(100, 10, m.MinTime, m.MaxTime+1) - for _, s := range series { - aSeries = s.Labels() - it = s.Iterator(it) - for it.Next() == chunkenc.ValFloat { - tim, v := it.At() - _, err := headApp.Append(0, s.Labels(), tim, v) + // Add some data to the WAL. + headApp := db.Head().Appender(context.Background()) + var aSeries labels.Labels + var it chunkenc.Iterator + for _, m := range headBlocks { + series := genSeries(100, 10, m.MinTime, m.MaxTime+1) + for _, s := range series { + aSeries = s.Labels() + it = s.Iterator(it) + for it.Next() == chunkenc.ValFloat { + tim, v := it.At() + _, err := headApp.Append(0, s.Labels(), tim, v) + require.NoError(t, err) + } + require.NoError(t, it.Err()) + } + } + require.NoError(t, headApp.Commit()) + db.Head().mmapHeadChunks() + + require.Eventually(t, func() bool { + return db.Head().chunkDiskMapper.IsQueueEmpty() + }, 2*time.Second, 100*time.Millisecond) + + // Test that registered size matches the actual disk size. + require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. + require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered. + blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. + walSize, err := db.Head().wal.Size() + require.NoError(t, err) + cdmSize, err := db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + // Expected size should take into account block size + WAL size + Head + // chunks size + expSize := blockSize + walSize + cdmSize + actSize, err := fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Create a WAL checkpoint, and compare sizes. + first, last, err := wlog.Segments(db.Head().wal.Dir()) + require.NoError(t, err) + _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0, enableStStorage) + require.NoError(t, err) + blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. + walSize, err = db.Head().wal.Size() + require.NoError(t, err) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + expSize = blockSize + walSize + cdmSize + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Truncate Chunk Disk Mapper and compare sizes. + require.NoError(t, db.Head().chunkDiskMapper.Truncate(900)) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + expSize = blockSize + walSize + cdmSize + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Add some out of order samples to check the size of WBL. + headApp = db.Head().Appender(context.Background()) + for ts := int64(750); ts < 800; ts++ { + _, err := headApp.Append(0, aSeries, ts, float64(ts)) require.NoError(t, err) } - require.NoError(t, it.Err()) - } + require.NoError(t, headApp.Commit()) + + walSize, err = db.Head().wal.Size() + require.NoError(t, err) + wblSize, err := db.Head().wbl.Size() + require.NoError(t, err) + require.NotZero(t, wblSize) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + expSize = blockSize + walSize + wblSize + cdmSize + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") + + // Decrease the max bytes limit so that a delete is triggered. + // Check total size, total count and check that the oldest block was deleted. + firstBlockSize := db.Blocks()[0].Size() + sizeLimit := actSize - firstBlockSize + db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size. + require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. + + expBlocks := blocks[1:] + actBlocks := db.Blocks() + blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) + walSize, err = db.Head().wal.Size() + require.NoError(t, err) + cdmSize, err = db.Head().chunkDiskMapper.Size() + require.NoError(t, err) + require.NotZero(t, cdmSize) + // Expected size should take into account block size + WAL size + WBL size + expSize = blockSize + walSize + wblSize + cdmSize + actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount)) + actSize, err = fileutil.DirSize(db.Dir()) + require.NoError(t, err) + + require.Equal(t, 1, actRetentionCount, "metric retention count mismatch") + require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size") + require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit) + require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1) + require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block") + require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block") + }) } - require.NoError(t, headApp.Commit()) - db.Head().mmapHeadChunks() - - require.Eventually(t, func() bool { - return db.Head().chunkDiskMapper.IsQueueEmpty() - }, 2*time.Second, 100*time.Millisecond) - - // Test that registered size matches the actual disk size. - require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. - require.Len(t, db.Blocks(), len(blocks)) // Ensure all blocks are registered. - blockSize := int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. - walSize, err := db.Head().wal.Size() - require.NoError(t, err) - cdmSize, err := db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - // Expected size should take into account block size + WAL size + Head - // chunks size - expSize := blockSize + walSize + cdmSize - actSize, err := fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Create a WAL checkpoint, and compare sizes. - first, last, err := wlog.Segments(db.Head().wal.Dir()) - require.NoError(t, err) - _, err = wlog.Checkpoint(promslog.NewNopLogger(), db.Head().wal, first, last-1, func(chunks.HeadSeriesRef) bool { return false }, 0) - require.NoError(t, err) - blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) // Use the actual internal metrics. - walSize, err = db.Head().wal.Size() - require.NoError(t, err) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - expSize = blockSize + walSize + cdmSize - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Truncate Chunk Disk Mapper and compare sizes. - require.NoError(t, db.Head().chunkDiskMapper.Truncate(900)) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - expSize = blockSize + walSize + cdmSize - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Add some out of order samples to check the size of WBL. - headApp = db.Head().Appender(context.Background()) - for ts := int64(750); ts < 800; ts++ { - _, err := headApp.Append(0, aSeries, ts, float64(ts)) - require.NoError(t, err) - } - require.NoError(t, headApp.Commit()) - - walSize, err = db.Head().wal.Size() - require.NoError(t, err) - wblSize, err := db.Head().wbl.Size() - require.NoError(t, err) - require.NotZero(t, wblSize) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - expSize = blockSize + walSize + wblSize + cdmSize - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - require.Equal(t, expSize, actSize, "registered size doesn't match actual disk size") - - // Decrease the max bytes limit so that a delete is triggered. - // Check total size, total count and check that the oldest block was deleted. - firstBlockSize := db.Blocks()[0].Size() - sizeLimit := actSize - firstBlockSize - db.opts.MaxBytes = sizeLimit // Set the new db size limit one block smaller that the actual size. - require.NoError(t, db.reloadBlocks()) // Reload the db to register the new db size. - - expBlocks := blocks[1:] - actBlocks := db.Blocks() - blockSize = int64(prom_testutil.ToFloat64(db.metrics.blocksBytes)) - walSize, err = db.Head().wal.Size() - require.NoError(t, err) - cdmSize, err = db.Head().chunkDiskMapper.Size() - require.NoError(t, err) - require.NotZero(t, cdmSize) - // Expected size should take into account block size + WAL size + WBL size - expSize = blockSize + walSize + wblSize + cdmSize - actRetentionCount := int(prom_testutil.ToFloat64(db.metrics.sizeRetentionCount)) - actSize, err = fileutil.DirSize(db.Dir()) - require.NoError(t, err) - - require.Equal(t, 1, actRetentionCount, "metric retention count mismatch") - require.Equal(t, expSize, actSize, "metric db size doesn't match actual disk size") - require.LessOrEqual(t, expSize, sizeLimit, "actual size (%v) is expected to be less than or equal to limit (%v)", expSize, sizeLimit) - require.Len(t, actBlocks, len(blocks)-1, "new block count should be decreased from:%v to:%v", len(blocks), len(blocks)-1) - require.Equal(t, expBlocks[0].MaxTime, actBlocks[0].meta.MaxTime, "maxT mismatch of the first block") - require.Equal(t, expBlocks[len(expBlocks)-1].MaxTime, actBlocks[len(actBlocks)-1].meta.MaxTime, "maxT mismatch of the last block") } func TestSizeRetentionMetric(t *testing.T) { @@ -2071,33 +2076,36 @@ func TestInitializeHeadTimestamp(t *testing.T) { require.Equal(t, int64(1000), db.head.MaxTime()) require.True(t, db.head.initialized()) }) - t.Run("wal-only", func(t *testing.T) { - dir := t.TempDir() - require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) - w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) - require.NoError(t, err) + for _, enableStStorage := range []bool{false, true} { + t.Run("wal-only-st-"+strconv.FormatBool(enableStStorage), func(t *testing.T) { + dir := t.TempDir() - var enc record.Encoder - err = w.Log( - enc.Series([]record.RefSeries{ - {Ref: 123, Labels: labels.FromStrings("a", "1")}, - {Ref: 124, Labels: labels.FromStrings("a", "2")}, - }, nil), - enc.Samples([]record.RefSample{ - {Ref: 123, T: 5000, V: 1}, - {Ref: 124, T: 15000, V: 1}, - }, nil), - ) - require.NoError(t, err) - require.NoError(t, w.Close()) + require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) + w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) + require.NoError(t, err) - db := newTestDB(t, withDir(dir)) + enc := record.Encoder{EnableSTStorage: enableStStorage} + err = w.Log( + enc.Series([]record.RefSeries{ + {Ref: 123, Labels: labels.FromStrings("a", "1")}, + {Ref: 124, Labels: labels.FromStrings("a", "2")}, + }, nil), + enc.Samples([]record.RefSample{ + {Ref: 123, T: 5000, V: 1}, + {Ref: 124, T: 15000, V: 1}, + }, nil), + ) + require.NoError(t, err) + require.NoError(t, w.Close()) - require.Equal(t, int64(5000), db.head.MinTime()) - require.Equal(t, int64(15000), db.head.MaxTime()) - require.True(t, db.head.initialized()) - }) + db := newTestDB(t, withDir(dir)) + + require.Equal(t, int64(5000), db.head.MinTime()) + require.Equal(t, int64(15000), db.head.MaxTime()) + require.True(t, db.head.initialized()) + }) + } t.Run("existing-block", func(t *testing.T) { dir := t.TempDir() @@ -2109,37 +2117,40 @@ func TestInitializeHeadTimestamp(t *testing.T) { require.Equal(t, int64(2000), db.head.MaxTime()) require.True(t, db.head.initialized()) }) - t.Run("existing-block-and-wal", func(t *testing.T) { - dir := t.TempDir() - createBlock(t, dir, genSeries(1, 1, 1000, 6000)) + for _, enableStStorage := range []bool{false, true} { + t.Run("existing-block-and-wal-"+strconv.FormatBool(enableStStorage), func(t *testing.T) { + dir := t.TempDir() - require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) - w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) - require.NoError(t, err) + createBlock(t, dir, genSeries(1, 1, 1000, 6000)) - var enc record.Encoder - err = w.Log( - enc.Series([]record.RefSeries{ - {Ref: 123, Labels: labels.FromStrings("a", "1")}, - {Ref: 124, Labels: labels.FromStrings("a", "2")}, - }, nil), - enc.Samples([]record.RefSample{ - {Ref: 123, T: 5000, V: 1}, - {Ref: 124, T: 15000, V: 1}, - }, nil), - ) - require.NoError(t, err) - require.NoError(t, w.Close()) + require.NoError(t, os.MkdirAll(path.Join(dir, "wal"), 0o777)) + w, err := wlog.New(nil, nil, path.Join(dir, "wal"), compression.None) + require.NoError(t, err) - db := newTestDB(t, withDir(dir)) + enc := record.Encoder{EnableSTStorage: enableStStorage} + err = w.Log( + enc.Series([]record.RefSeries{ + {Ref: 123, Labels: labels.FromStrings("a", "1")}, + {Ref: 124, Labels: labels.FromStrings("a", "2")}, + }, nil), + enc.Samples([]record.RefSample{ + {Ref: 123, T: 5000, V: 1}, + {Ref: 124, T: 15000, V: 1}, + }, nil), + ) + require.NoError(t, err) + require.NoError(t, w.Close()) - require.Equal(t, int64(6000), db.head.MinTime()) - require.Equal(t, int64(15000), db.head.MaxTime()) - require.True(t, db.head.initialized()) - // Check that old series has been GCed. - require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series)) - }) + db := newTestDB(t, withDir(dir)) + + require.Equal(t, int64(6000), db.head.MinTime()) + require.Equal(t, int64(15000), db.head.MaxTime()) + require.True(t, db.head.initialized()) + // Check that old series has been GCed. + require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.series)) + }) + } } func TestNoEmptyBlocks(t *testing.T) { @@ -4530,7 +4541,7 @@ func testOOOWALWrite(t *testing.T, series, err := dec.Series(rec, nil) require.NoError(t, err) records = append(records, series) - case record.Samples: + case record.Samples, record.SamplesV2: samples, err := dec.Samples(rec, nil) require.NoError(t, err) records = append(records, samples) @@ -4691,102 +4702,106 @@ func TestMetadataCheckpointingOnlyKeepsLatestEntry(t *testing.T) { require.NoError(t, err) } - ctx := context.Background() - numSamples := 10000 - hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false) + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + ctx := context.Background() + numSamples := 10000 + hb, w := newTestHead(t, int64(numSamples)*10, compression.None, false) - // Add some series so we can append metadata to them. - app := hb.Appender(ctx) - s1 := labels.FromStrings("a", "b") - s2 := labels.FromStrings("c", "d") - s3 := labels.FromStrings("e", "f") - s4 := labels.FromStrings("g", "h") + // Add some series so we can append metadata to them. + app := hb.Appender(ctx) + s1 := labels.FromStrings("a", "b") + s2 := labels.FromStrings("c", "d") + s3 := labels.FromStrings("e", "f") + s4 := labels.FromStrings("g", "h") - for _, s := range []labels.Labels{s1, s2, s3, s4} { - _, err := app.Append(0, s, 0, 0) - require.NoError(t, err) + for _, s := range []labels.Labels{s1, s2, s3, s4} { + _, err := app.Append(0, s, 0, 0) + require.NoError(t, err) + } + require.NoError(t, app.Commit()) + + // Add a first round of metadata to the first three series. + // Re-take the Appender, as the previous Commit will have it closed. + m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"} + m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"} + m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"} + m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"} + app = hb.Appender(ctx) + updateMetadata(t, app, s1, m1) + updateMetadata(t, app, s2, m2) + updateMetadata(t, app, s3, m3) + updateMetadata(t, app, s4, m4) + require.NoError(t, app.Commit()) + + // Update metadata for first series. + m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"} + app = hb.Appender(ctx) + updateMetadata(t, app, s1, m5) + require.NoError(t, app.Commit()) + + // Switch back-and-forth metadata for second series. + // Since it ended on a new metadata record, we expect a single new entry. + m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"} + + app = hb.Appender(ctx) + updateMetadata(t, app, s2, m6) + require.NoError(t, app.Commit()) + + app = hb.Appender(ctx) + updateMetadata(t, app, s2, m2) + require.NoError(t, app.Commit()) + + app = hb.Appender(ctx) + updateMetadata(t, app, s2, m6) + require.NoError(t, app.Commit()) + + app = hb.Appender(ctx) + updateMetadata(t, app, s2, m2) + require.NoError(t, app.Commit()) + + app = hb.Appender(ctx) + updateMetadata(t, app, s2, m6) + require.NoError(t, app.Commit()) + + // Let's create a checkpoint. + first, last, err := wlog.Segments(w.Dir()) + require.NoError(t, err) + keep := func(id chunks.HeadSeriesRef) bool { + return id != 3 + } + _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0, enableStStorage) + require.NoError(t, err) + + // Confirm there's been a checkpoint. + cdir, _, err := wlog.LastCheckpoint(w.Dir()) + require.NoError(t, err) + + // Read in checkpoint and WAL. + recs := readTestWAL(t, cdir) + var gotMetadataBlocks [][]record.RefMetadata + for _, rec := range recs { + if mr, ok := rec.([]record.RefMetadata); ok { + gotMetadataBlocks = append(gotMetadataBlocks, mr) + } + } + + // There should only be 1 metadata block present, with only the latest + // metadata kept around. + wantMetadata := []record.RefMetadata{ + {Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help}, + {Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help}, + {Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help}, + } + require.Len(t, gotMetadataBlocks, 1) + require.Len(t, gotMetadataBlocks[0], 3) + gotMetadataBlock := gotMetadataBlocks[0] + + sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref }) + require.Equal(t, wantMetadata, gotMetadataBlock) + require.NoError(t, hb.Close()) + }) } - require.NoError(t, app.Commit()) - - // Add a first round of metadata to the first three series. - // Re-take the Appender, as the previous Commit will have it closed. - m1 := metadata.Metadata{Type: "gauge", Unit: "unit_1", Help: "help_1"} - m2 := metadata.Metadata{Type: "gauge", Unit: "unit_2", Help: "help_2"} - m3 := metadata.Metadata{Type: "gauge", Unit: "unit_3", Help: "help_3"} - m4 := metadata.Metadata{Type: "gauge", Unit: "unit_4", Help: "help_4"} - app = hb.Appender(ctx) - updateMetadata(t, app, s1, m1) - updateMetadata(t, app, s2, m2) - updateMetadata(t, app, s3, m3) - updateMetadata(t, app, s4, m4) - require.NoError(t, app.Commit()) - - // Update metadata for first series. - m5 := metadata.Metadata{Type: "counter", Unit: "unit_5", Help: "help_5"} - app = hb.Appender(ctx) - updateMetadata(t, app, s1, m5) - require.NoError(t, app.Commit()) - - // Switch back-and-forth metadata for second series. - // Since it ended on a new metadata record, we expect a single new entry. - m6 := metadata.Metadata{Type: "counter", Unit: "unit_6", Help: "help_6"} - - app = hb.Appender(ctx) - updateMetadata(t, app, s2, m6) - require.NoError(t, app.Commit()) - - app = hb.Appender(ctx) - updateMetadata(t, app, s2, m2) - require.NoError(t, app.Commit()) - - app = hb.Appender(ctx) - updateMetadata(t, app, s2, m6) - require.NoError(t, app.Commit()) - - app = hb.Appender(ctx) - updateMetadata(t, app, s2, m2) - require.NoError(t, app.Commit()) - - app = hb.Appender(ctx) - updateMetadata(t, app, s2, m6) - require.NoError(t, app.Commit()) - - // Let's create a checkpoint. - first, last, err := wlog.Segments(w.Dir()) - require.NoError(t, err) - keep := func(id chunks.HeadSeriesRef) bool { - return id != 3 - } - _, err = wlog.Checkpoint(promslog.NewNopLogger(), w, first, last-1, keep, 0) - require.NoError(t, err) - - // Confirm there's been a checkpoint. - cdir, _, err := wlog.LastCheckpoint(w.Dir()) - require.NoError(t, err) - - // Read in checkpoint and WAL. - recs := readTestWAL(t, cdir) - var gotMetadataBlocks [][]record.RefMetadata - for _, rec := range recs { - if mr, ok := rec.([]record.RefMetadata); ok { - gotMetadataBlocks = append(gotMetadataBlocks, mr) - } - } - - // There should only be 1 metadata block present, with only the latest - // metadata kept around. - wantMetadata := []record.RefMetadata{ - {Ref: 1, Type: record.GetMetricType(m5.Type), Unit: m5.Unit, Help: m5.Help}, - {Ref: 2, Type: record.GetMetricType(m6.Type), Unit: m6.Unit, Help: m6.Help}, - {Ref: 4, Type: record.GetMetricType(m4.Type), Unit: m4.Unit, Help: m4.Help}, - } - require.Len(t, gotMetadataBlocks, 1) - require.Len(t, gotMetadataBlocks[0], 3) - gotMetadataBlock := gotMetadataBlocks[0] - - sort.Slice(gotMetadataBlock, func(i, j int) bool { return gotMetadataBlock[i].Ref < gotMetadataBlock[j].Ref }) - require.Equal(t, wantMetadata, gotMetadataBlock) - require.NoError(t, hb.Close()) } func TestMetadataAssertInMemoryData(t *testing.T) { diff --git a/tsdb/head.go b/tsdb/head.go index 3d700944d9..45ab0031e8 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -201,6 +201,11 @@ type HeadOptions struct { // NOTE(bwplotka): This feature might be deprecated and removed once PROM-60 // is implemented. EnableMetadataWALRecords bool + + // EnableSTStorage determines whether databases (WAL/WBL, tsdb, + // agent) should set a Start Time value per sample. Currently not + // user-settable and only set in tests. + EnableSTStorage bool } const ( @@ -1382,7 +1387,7 @@ func (h *Head) truncateWAL(mint int64) error { } h.metrics.checkpointCreationTotal.Inc() - if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpointFn(mint), mint); err != nil { + if _, err = wlog.Checkpoint(h.logger, h.wal, first, last, h.keepSeriesInWALCheckpointFn(mint), mint, h.opts.EnableSTStorage); err != nil { h.metrics.checkpointCreationFail.Inc() var cerr *chunks.CorruptionErr if errors.As(err, &cerr) { @@ -1676,7 +1681,7 @@ func (h *Head) Delete(ctx context.Context, mint, maxt int64, ms ...*labels.Match } if h.wal != nil { - var enc record.Encoder + enc := record.Encoder{EnableSTStorage: h.opts.EnableSTStorage} if err := h.wal.Log(enc.Tombstones(stones, nil)); err != nil { return err } diff --git a/tsdb/head_append.go b/tsdb/head_append.go index 005d20b720..9157ad9991 100644 --- a/tsdb/head_append.go +++ b/tsdb/head_append.go @@ -1059,7 +1059,7 @@ func (a *headAppenderBase) log() error { defer func() { a.head.putBytesBuffer(buf) }() var rec []byte - var enc record.Encoder + enc := record.Encoder{EnableSTStorage: a.head.opts.EnableSTStorage} if len(a.seriesRefs) > 0 { rec = enc.Series(a.seriesRefs, buf) @@ -1742,6 +1742,9 @@ func (a *headAppenderBase) Commit() (err error) { chunkRange: h.chunkRange.Load(), samplesPerChunk: h.opts.SamplesPerChunk, }, + enc: record.Encoder{ + EnableSTStorage: h.opts.EnableSTStorage, + }, } for _, b := range a.batches { diff --git a/tsdb/head_append_v2_test.go b/tsdb/head_append_v2_test.go index 082d756e60..ba756f801f 100644 --- a/tsdb/head_append_v2_test.go +++ b/tsdb/head_append_v2_test.go @@ -1867,296 +1867,300 @@ func TestHistogramInWALAndMmapChunk_AppenderV2(t *testing.T) { } func TestChunkSnapshot_AppenderV2(t *testing.T) { - head, _ := newTestHead(t, 120*4, compression.None, false) - defer func() { - head.opts.EnableMemorySnapshotOnShutdown = false - require.NoError(t, head.Close()) - }() + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + head, _ := newTestHead(t, 120*4, compression.None, false) + defer func() { + head.opts.EnableMemorySnapshotOnShutdown = false + require.NoError(t, head.Close()) + }() - type ex struct { - seriesLabels labels.Labels - e exemplar.Exemplar - } - - numSeries := 10 - expSeries := make(map[string][]chunks.Sample) - expHist := make(map[string][]chunks.Sample) - expFloatHist := make(map[string][]chunks.Sample) - expTombstones := make(map[storage.SeriesRef]tombstones.Intervals) - expExemplars := make([]ex, 0) - histograms := tsdbutil.GenerateTestGaugeHistograms(481) - floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481) - - newExemplar := func(lbls labels.Labels, ts int64) exemplar.Exemplar { - e := ex{ - seriesLabels: lbls, - e: exemplar.Exemplar{ - Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), - Value: rand.Float64(), - Ts: ts, - }, - } - expExemplars = append(expExemplars, e) - return e.e - } - - checkSamples := func() { - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) - require.Equal(t, expSeries, series) - } - checkHistograms := func() { - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*")) - require.Equal(t, expHist, series) - } - checkFloatHistograms := func() { - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*")) - require.Equal(t, expFloatHist, series) - } - checkTombstones := func() { - tr, err := head.Tombstones() - require.NoError(t, err) - actTombstones := make(map[storage.SeriesRef]tombstones.Intervals) - require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error { - for _, itv := range itvs { - actTombstones[ref].Add(itv) + type ex struct { + seriesLabels labels.Labels + e exemplar.Exemplar + } + + numSeries := 10 + expSeries := make(map[string][]chunks.Sample) + expHist := make(map[string][]chunks.Sample) + expFloatHist := make(map[string][]chunks.Sample) + expTombstones := make(map[storage.SeriesRef]tombstones.Intervals) + expExemplars := make([]ex, 0) + histograms := tsdbutil.GenerateTestGaugeHistograms(481) + floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481) + + newExemplar := func(lbls labels.Labels, ts int64) exemplar.Exemplar { + e := ex{ + seriesLabels: lbls, + e: exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }, + } + expExemplars = append(expExemplars, e) + return e.e + } + + checkSamples := func() { + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + require.Equal(t, expSeries, series) + } + checkHistograms := func() { + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*")) + require.Equal(t, expHist, series) + } + checkFloatHistograms := func() { + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*")) + require.Equal(t, expFloatHist, series) + } + checkTombstones := func() { + tr, err := head.Tombstones() + require.NoError(t, err) + actTombstones := make(map[storage.SeriesRef]tombstones.Intervals) + require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error { + for _, itv := range itvs { + actTombstones[ref].Add(itv) + } + return nil + })) + require.Equal(t, expTombstones, actTombstones) + } + checkExemplars := func() { + actExemplars := make([]ex, 0, len(expExemplars)) + err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error { + actExemplars = append(actExemplars, ex{ + seriesLabels: seriesLabels, + e: e, + }) + return nil + }) + require.NoError(t, err) + // Verifies both existence of right exemplars and order of exemplars in the buffer. + testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})}) + } + + var ( + wlast, woffset int + err error + ) + + closeHeadAndCheckSnapshot := func() { + require.NoError(t, head.Close()) + + _, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot) + require.NoError(t, err) + require.Equal(t, wlast, sidx) + require.Equal(t, woffset, soffset) + } + + openHeadAndCheckReplay := func() { + w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None) + require.NoError(t, err) + head, err = NewHead(nil, nil, w, nil, head.opts, nil) + require.NoError(t, err) + require.NoError(t, head.Init(math.MinInt64)) + + checkSamples() + checkHistograms() + checkFloatHistograms() + checkTombstones() + checkExemplars() + } + + { // Initial data that goes into snapshot. + // Add some initial samples with >=1 m-map chunk. + app := head.AppenderV2(context.Background()) + for i := 1; i <= numSeries; i++ { + lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) + lblStr := lbls.String() + lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) + lblsHistStr := lblsHist.String() + lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) + lblsFloatHistStr := lblsFloatHist.String() + + // 240 samples should m-map at least 1 chunk. + for ts := int64(1); ts <= 240; ts++ { + // Add an exemplar, but only to float sample. + aOpts := storage.AOptions{} + if ts%10 == 0 { + aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)} + } + val := rand.Float64() + expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) + _, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts) + require.NoError(t, err) + + hist := histograms[int(ts)] + expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) + _, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{}) + require.NoError(t, err) + + floatHist := floatHistogram[int(ts)] + expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) + _, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{}) + require.NoError(t, err) + + // Create multiple WAL records (commit). + if ts%10 == 0 { + require.NoError(t, app.Commit()) + app = head.AppenderV2(context.Background()) + } + } + } + require.NoError(t, app.Commit()) + + // Add some tombstones. + enc := record.Encoder{EnableSTStorage: enableStStorage} + for i := 1; i <= numSeries; i++ { + ref := storage.SeriesRef(i) + itvs := tombstones.Intervals{ + {Mint: 1234, Maxt: 2345}, + {Mint: 3456, Maxt: 4567}, + } + for _, itv := range itvs { + expTombstones[ref].Add(itv) + } + head.tombstones.AddInterval(ref, itvs...) + err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ + {Ref: ref, Intervals: itvs}, + }, nil)) + require.NoError(t, err) + } + } + + // These references should be the ones used for the snapshot. + wlast, woffset, err = head.wal.LastSegmentAndOffset() + require.NoError(t, err) + if woffset != 0 && woffset < 32*1024 { + // The page is always filled before taking the snapshot. + woffset = 32 * 1024 + } + + { + // Creating snapshot and verifying it. + head.opts.EnableMemorySnapshotOnShutdown = true + closeHeadAndCheckSnapshot() // This will create a snapshot. + + // Test the replay of snapshot. + openHeadAndCheckReplay() + } + + { // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk. + // Add more samples. + app := head.AppenderV2(context.Background()) + for i := 1; i <= numSeries; i++ { + lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) + lblStr := lbls.String() + lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) + lblsHistStr := lblsHist.String() + lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) + lblsFloatHistStr := lblsFloatHist.String() + + // 240 samples should m-map at least 1 chunk. + for ts := int64(241); ts <= 480; ts++ { + // Add an exemplar, but only to float sample. + aOpts := storage.AOptions{} + if ts%10 == 0 { + aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)} + } + val := rand.Float64() + expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) + _, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts) + require.NoError(t, err) + + hist := histograms[int(ts)] + expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) + _, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{}) + require.NoError(t, err) + + floatHist := floatHistogram[int(ts)] + expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) + _, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{}) + require.NoError(t, err) + + // Create multiple WAL records (commit). + if ts%10 == 0 { + require.NoError(t, app.Commit()) + app = head.AppenderV2(context.Background()) + } + } + } + require.NoError(t, app.Commit()) + + // Add more tombstones. + enc := record.Encoder{EnableSTStorage: enableStStorage} + for i := 1; i <= numSeries; i++ { + ref := storage.SeriesRef(i) + itvs := tombstones.Intervals{ + {Mint: 12345, Maxt: 23456}, + {Mint: 34567, Maxt: 45678}, + } + for _, itv := range itvs { + expTombstones[ref].Add(itv) + } + head.tombstones.AddInterval(ref, itvs...) + err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ + {Ref: ref, Intervals: itvs}, + }, nil)) + require.NoError(t, err) + } + } + { + // Close Head and verify that new snapshot was not created. + head.opts.EnableMemorySnapshotOnShutdown = false + closeHeadAndCheckSnapshot() // This should not create a snapshot. + + // Test the replay of snapshot, m-map chunks, and WAL. + head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. + openHeadAndCheckReplay() + } + + // Creating another snapshot should delete the older snapshot and replay still works fine. + wlast, woffset, err = head.wal.LastSegmentAndOffset() + require.NoError(t, err) + if woffset != 0 && woffset < 32*1024 { + // The page is always filled before taking the snapshot. + woffset = 32 * 1024 + } + + { + // Close Head and verify that new snapshot was created. + closeHeadAndCheckSnapshot() + + // Verify that there is only 1 snapshot. + files, err := os.ReadDir(head.opts.ChunkDirRoot) + require.NoError(t, err) + snapshots := 0 + for i := len(files) - 1; i >= 0; i-- { + fi := files[i] + if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) { + snapshots++ + require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name()) + } + } + require.Equal(t, 1, snapshots) + + // Test the replay of snapshot. + head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. + + // Disabling exemplars to check that it does not hard fail replay + // https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870. + head.opts.EnableExemplarStorage = false + head.opts.MaxExemplars.Store(0) + expExemplars = expExemplars[:0] + + openHeadAndCheckReplay() + + require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) } - return nil - })) - require.Equal(t, expTombstones, actTombstones) - } - checkExemplars := func() { - actExemplars := make([]ex, 0, len(expExemplars)) - err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error { - actExemplars = append(actExemplars, ex{ - seriesLabels: seriesLabels, - e: e, - }) - return nil }) - require.NoError(t, err) - // Verifies both existence of right exemplars and order of exemplars in the buffer. - testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})}) - } - - var ( - wlast, woffset int - err error - ) - - closeHeadAndCheckSnapshot := func() { - require.NoError(t, head.Close()) - - _, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot) - require.NoError(t, err) - require.Equal(t, wlast, sidx) - require.Equal(t, woffset, soffset) - } - - openHeadAndCheckReplay := func() { - w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None) - require.NoError(t, err) - head, err = NewHead(nil, nil, w, nil, head.opts, nil) - require.NoError(t, err) - require.NoError(t, head.Init(math.MinInt64)) - - checkSamples() - checkHistograms() - checkFloatHistograms() - checkTombstones() - checkExemplars() - } - - { // Initial data that goes into snapshot. - // Add some initial samples with >=1 m-map chunk. - app := head.AppenderV2(context.Background()) - for i := 1; i <= numSeries; i++ { - lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) - lblStr := lbls.String() - lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) - lblsHistStr := lblsHist.String() - lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) - lblsFloatHistStr := lblsFloatHist.String() - - // 240 samples should m-map at least 1 chunk. - for ts := int64(1); ts <= 240; ts++ { - // Add an exemplar, but only to float sample. - aOpts := storage.AOptions{} - if ts%10 == 0 { - aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)} - } - val := rand.Float64() - expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) - _, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts) - require.NoError(t, err) - - hist := histograms[int(ts)] - expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) - _, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{}) - require.NoError(t, err) - - floatHist := floatHistogram[int(ts)] - expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) - _, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{}) - require.NoError(t, err) - - // Create multiple WAL records (commit). - if ts%10 == 0 { - require.NoError(t, app.Commit()) - app = head.AppenderV2(context.Background()) - } - } - } - require.NoError(t, app.Commit()) - - // Add some tombstones. - var enc record.Encoder - for i := 1; i <= numSeries; i++ { - ref := storage.SeriesRef(i) - itvs := tombstones.Intervals{ - {Mint: 1234, Maxt: 2345}, - {Mint: 3456, Maxt: 4567}, - } - for _, itv := range itvs { - expTombstones[ref].Add(itv) - } - head.tombstones.AddInterval(ref, itvs...) - err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ - {Ref: ref, Intervals: itvs}, - }, nil)) - require.NoError(t, err) - } - } - - // These references should be the ones used for the snapshot. - wlast, woffset, err = head.wal.LastSegmentAndOffset() - require.NoError(t, err) - if woffset != 0 && woffset < 32*1024 { - // The page is always filled before taking the snapshot. - woffset = 32 * 1024 - } - - { - // Creating snapshot and verifying it. - head.opts.EnableMemorySnapshotOnShutdown = true - closeHeadAndCheckSnapshot() // This will create a snapshot. - - // Test the replay of snapshot. - openHeadAndCheckReplay() - } - - { // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk. - // Add more samples. - app := head.AppenderV2(context.Background()) - for i := 1; i <= numSeries; i++ { - lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) - lblStr := lbls.String() - lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) - lblsHistStr := lblsHist.String() - lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) - lblsFloatHistStr := lblsFloatHist.String() - - // 240 samples should m-map at least 1 chunk. - for ts := int64(241); ts <= 480; ts++ { - // Add an exemplar, but only to float sample. - aOpts := storage.AOptions{} - if ts%10 == 0 { - aOpts.Exemplars = []exemplar.Exemplar{newExemplar(lbls, ts)} - } - val := rand.Float64() - expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) - _, err := app.Append(0, lbls, 0, ts, val, nil, nil, aOpts) - require.NoError(t, err) - - hist := histograms[int(ts)] - expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) - _, err = app.Append(0, lblsHist, 0, ts, 0, hist, nil, storage.AOptions{}) - require.NoError(t, err) - - floatHist := floatHistogram[int(ts)] - expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) - _, err = app.Append(0, lblsFloatHist, 0, ts, 0, nil, floatHist, storage.AOptions{}) - require.NoError(t, err) - - // Create multiple WAL records (commit). - if ts%10 == 0 { - require.NoError(t, app.Commit()) - app = head.AppenderV2(context.Background()) - } - } - } - require.NoError(t, app.Commit()) - - // Add more tombstones. - var enc record.Encoder - for i := 1; i <= numSeries; i++ { - ref := storage.SeriesRef(i) - itvs := tombstones.Intervals{ - {Mint: 12345, Maxt: 23456}, - {Mint: 34567, Maxt: 45678}, - } - for _, itv := range itvs { - expTombstones[ref].Add(itv) - } - head.tombstones.AddInterval(ref, itvs...) - err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ - {Ref: ref, Intervals: itvs}, - }, nil)) - require.NoError(t, err) - } - } - { - // Close Head and verify that new snapshot was not created. - head.opts.EnableMemorySnapshotOnShutdown = false - closeHeadAndCheckSnapshot() // This should not create a snapshot. - - // Test the replay of snapshot, m-map chunks, and WAL. - head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. - openHeadAndCheckReplay() - } - - // Creating another snapshot should delete the older snapshot and replay still works fine. - wlast, woffset, err = head.wal.LastSegmentAndOffset() - require.NoError(t, err) - if woffset != 0 && woffset < 32*1024 { - // The page is always filled before taking the snapshot. - woffset = 32 * 1024 - } - - { - // Close Head and verify that new snapshot was created. - closeHeadAndCheckSnapshot() - - // Verify that there is only 1 snapshot. - files, err := os.ReadDir(head.opts.ChunkDirRoot) - require.NoError(t, err) - snapshots := 0 - for i := len(files) - 1; i >= 0; i-- { - fi := files[i] - if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) { - snapshots++ - require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name()) - } - } - require.Equal(t, 1, snapshots) - - // Test the replay of snapshot. - head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. - - // Disabling exemplars to check that it does not hard fail replay - // https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870. - head.opts.EnableExemplarStorage = false - head.opts.MaxExemplars.Store(0) - expExemplars = expExemplars[:0] - - openHeadAndCheckReplay() - - require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) } } diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 7b8ae0ecbd..81cb236801 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -112,8 +112,8 @@ func BenchmarkCreateSeries(b *testing.B) { } } -func populateTestWL(t testing.TB, w *wlog.WL, recs []any, buf []byte) []byte { - var enc record.Encoder +func populateTestWL(t testing.TB, w *wlog.WL, recs []any, buf []byte, enableSTStorage bool) []byte { + enc := record.Encoder{EnableSTStorage: enableSTStorage} for _, r := range recs { buf = buf[:0] switch v := r.(type) { @@ -159,7 +159,7 @@ func readTestWAL(t testing.TB, dir string) (recs []any) { series, err := dec.Series(rec, nil) require.NoError(t, err) recs = append(recs, series) - case record.Samples: + case record.Samples, record.SamplesV2: samples, err := dec.Samples(rec, nil) require.NoError(t, err) recs = append(recs, samples) @@ -256,177 +256,179 @@ func BenchmarkLoadWLs(b *testing.B) { // Rough estimates of most common % of samples that have an exemplar for each scrape. exemplarsPercentages := []float64{0, 0.5, 1, 5} lastExemplarsPerSeries := -1 - for _, c := range cases { - missingSeriesPercentages := []float64{0, 0.1} - for _, missingSeriesPct := range missingSeriesPercentages { - for _, p := range exemplarsPercentages { - exemplarsPerSeries := int(math.RoundToEven(float64(c.samplesPerSeries) * p / 100)) - // For tests with low samplesPerSeries we could end up testing with 0 exemplarsPerSeries - // multiple times without this check. - if exemplarsPerSeries == lastExemplarsPerSeries { - continue - } - lastExemplarsPerSeries = exemplarsPerSeries - b.Run(fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct), - func(b *testing.B) { - dir := b.TempDir() + for _, enableStStorage := range []bool{false, true} { + for _, c := range cases { + missingSeriesPercentages := []float64{0, 0.1} + for _, missingSeriesPct := range missingSeriesPercentages { + for _, p := range exemplarsPercentages { + exemplarsPerSeries := int(math.RoundToEven(float64(c.samplesPerSeries) * p / 100)) + // For tests with low samplesPerSeries we could end up testing with 0 exemplarsPerSeries + // multiple times without this check. + if exemplarsPerSeries == lastExemplarsPerSeries { + continue + } + lastExemplarsPerSeries = exemplarsPerSeries + b.Run(fmt.Sprintf("batches=%d,seriesPerBatch=%d,samplesPerSeries=%d,exemplarsPerSeries=%d,mmappedChunkT=%d,oooSeriesPct=%.3f,oooSamplesPct=%.3f,oooCapMax=%d,missingSeriesPct=%.3f,stStorage=%v", c.batches, c.seriesPerBatch, c.samplesPerSeries, exemplarsPerSeries, c.mmappedChunkT, c.oooSeriesPct, c.oooSamplesPct, c.oooCapMax, missingSeriesPct, enableStStorage), + func(b *testing.B) { + dir := b.TempDir() - wal, err := wlog.New(nil, nil, dir, compression.None) - require.NoError(b, err) - var wbl *wlog.WL - if c.oooSeriesPct != 0 { - wbl, err = wlog.New(nil, nil, dir, compression.None) + wal, err := wlog.New(nil, nil, dir, compression.None) require.NoError(b, err) - } - - // Write series. - refSeries := make([]record.RefSeries, 0, c.seriesPerBatch) - var buf []byte - builder := labels.NewBuilder(labels.EmptyLabels()) - for j := 1; j < labelsPerSeries; j++ { - builder.Set(defaultLabelName+strconv.Itoa(j), defaultLabelValue+strconv.Itoa(j)) - } - for k := 0; k < c.batches; k++ { - refSeries = refSeries[:0] - for i := k * c.seriesPerBatch; i < (k+1)*c.seriesPerBatch; i++ { - builder.Set(defaultLabelName, strconv.Itoa(i)) - refSeries = append(refSeries, record.RefSeries{Ref: chunks.HeadSeriesRef(i) * 101, Labels: builder.Labels()}) + var wbl *wlog.WL + if c.oooSeriesPct != 0 { + wbl, err = wlog.New(nil, nil, dir, compression.None) + require.NoError(b, err) } - writeSeries := refSeries - if missingSeriesPct > 0 { - newWriteSeries := make([]record.RefSeries, 0, int(float64(len(refSeries))*(1.0-missingSeriesPct))) - keepRatio := 1.0 - missingSeriesPct - // Keep approximately every 1/keepRatio series. - for i, s := range refSeries { - if int(float64(i)*keepRatio) != int(float64(i+1)*keepRatio) { - newWriteSeries = append(newWriteSeries, s) + // Write series. + refSeries := make([]record.RefSeries, 0, c.seriesPerBatch) + var buf []byte + builder := labels.NewBuilder(labels.EmptyLabels()) + for j := 1; j < labelsPerSeries; j++ { + builder.Set(defaultLabelName+strconv.Itoa(j), defaultLabelValue+strconv.Itoa(j)) + } + for k := 0; k < c.batches; k++ { + refSeries = refSeries[:0] + for i := k * c.seriesPerBatch; i < (k+1)*c.seriesPerBatch; i++ { + builder.Set(defaultLabelName, strconv.Itoa(i)) + refSeries = append(refSeries, record.RefSeries{Ref: chunks.HeadSeriesRef(i) * 101, Labels: builder.Labels()}) + } + + writeSeries := refSeries + if missingSeriesPct > 0 { + newWriteSeries := make([]record.RefSeries, 0, int(float64(len(refSeries))*(1.0-missingSeriesPct))) + keepRatio := 1.0 - missingSeriesPct + // Keep approximately every 1/keepRatio series. + for i, s := range refSeries { + if int(float64(i)*keepRatio) != int(float64(i+1)*keepRatio) { + newWriteSeries = append(newWriteSeries, s) + } } + writeSeries = newWriteSeries } - writeSeries = newWriteSeries + + buf = populateTestWL(b, wal, []any{writeSeries}, buf, enableStStorage) } - buf = populateTestWL(b, wal, []any{writeSeries}, buf) - } + // Write samples. + refSamples := make([]record.RefSample, 0, c.seriesPerBatch) - // Write samples. - refSamples := make([]record.RefSample, 0, c.seriesPerBatch) + oooSeriesPerBatch := int(float64(c.seriesPerBatch) * c.oooSeriesPct) + oooSamplesPerSeries := int(float64(c.samplesPerSeries) * c.oooSamplesPct) - oooSeriesPerBatch := int(float64(c.seriesPerBatch) * c.oooSeriesPct) - oooSamplesPerSeries := int(float64(c.samplesPerSeries) * c.oooSamplesPct) + for i := 0; i < c.samplesPerSeries; i++ { + for j := 0; j < c.batches; j++ { + refSamples = refSamples[:0] - for i := 0; i < c.samplesPerSeries; i++ { - for j := 0; j < c.batches; j++ { - refSamples = refSamples[:0] - - k := j * c.seriesPerBatch - // Skip appending the first oooSamplesPerSeries samples for the series in the batch that - // should have OOO samples. OOO samples are appended after all the in-order samples. - if i < oooSamplesPerSeries { - k += oooSeriesPerBatch + k := j * c.seriesPerBatch + // Skip appending the first oooSamplesPerSeries samples for the series in the batch that + // should have OOO samples. OOO samples are appended after all the in-order samples. + if i < oooSamplesPerSeries { + k += oooSeriesPerBatch + } + for ; k < (j+1)*c.seriesPerBatch; k++ { + refSamples = append(refSamples, record.RefSample{ + Ref: chunks.HeadSeriesRef(k) * 101, + T: int64(i) * 10, + V: float64(i) * 100, + }) + } + buf = populateTestWL(b, wal, []any{refSamples}, buf, enableStStorage) } - for ; k < (j+1)*c.seriesPerBatch; k++ { - refSamples = append(refSamples, record.RefSample{ - Ref: chunks.HeadSeriesRef(k) * 101, - T: int64(i) * 10, - V: float64(i) * 100, - }) + } + + // Write mmapped chunks. + if c.mmappedChunkT != 0 { + chunkDiskMapper, err := chunks.NewChunkDiskMapper(nil, mmappedChunksDir(dir), chunkenc.NewPool(), chunks.DefaultWriteBufferSize, chunks.DefaultWriteQueueSize) + require.NoError(b, err) + cOpts := chunkOpts{ + chunkDiskMapper: chunkDiskMapper, + chunkRange: c.mmappedChunkT, + samplesPerChunk: DefaultSamplesPerChunk, } - buf = populateTestWL(b, wal, []any{refSamples}, buf) - } - } - - // Write mmapped chunks. - if c.mmappedChunkT != 0 { - chunkDiskMapper, err := chunks.NewChunkDiskMapper(nil, mmappedChunksDir(dir), chunkenc.NewPool(), chunks.DefaultWriteBufferSize, chunks.DefaultWriteQueueSize) - require.NoError(b, err) - cOpts := chunkOpts{ - chunkDiskMapper: chunkDiskMapper, - chunkRange: c.mmappedChunkT, - samplesPerChunk: DefaultSamplesPerChunk, - } - for k := 0; k < c.batches*c.seriesPerBatch; k++ { - // Create one mmapped chunk per series, with one sample at the given time. - s := newMemSeries(labels.Labels{}, chunks.HeadSeriesRef(k)*101, 0, defaultIsolationDisabled, false) - s.append(c.mmappedChunkT, 42, 0, cOpts) - // There's only one head chunk because only a single sample is appended. mmapChunks() - // ignores the latest chunk, so we need to cut a new head chunk to guarantee the chunk with - // the sample at c.mmappedChunkT is mmapped. - s.cutNewHeadChunk(c.mmappedChunkT, chunkenc.EncXOR, c.mmappedChunkT) - s.mmapChunks(chunkDiskMapper) - } - require.NoError(b, chunkDiskMapper.Close()) - } - - // Write exemplars. - refExemplars := make([]record.RefExemplar, 0, c.seriesPerBatch) - for i := range exemplarsPerSeries { - for j := 0; j < c.batches; j++ { - refExemplars = refExemplars[:0] - for k := j * c.seriesPerBatch; k < (j+1)*c.seriesPerBatch; k++ { - refExemplars = append(refExemplars, record.RefExemplar{ - Ref: chunks.HeadSeriesRef(k) * 101, - T: int64(i) * 10, - V: float64(i) * 100, - Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i)), - }) + for k := 0; k < c.batches*c.seriesPerBatch; k++ { + // Create one mmapped chunk per series, with one sample at the given time. + s := newMemSeries(labels.Labels{}, chunks.HeadSeriesRef(k)*101, 0, defaultIsolationDisabled, false) + s.append(c.mmappedChunkT, 42, 0, cOpts) + // There's only one head chunk because only a single sample is appended. mmapChunks() + // ignores the latest chunk, so we need to cut a new head chunk to guarantee the chunk with + // the sample at c.mmappedChunkT is mmapped. + s.cutNewHeadChunk(c.mmappedChunkT, chunkenc.EncXOR, c.mmappedChunkT) + s.mmapChunks(chunkDiskMapper) } - buf = populateTestWL(b, wal, []any{refExemplars}, buf) + require.NoError(b, chunkDiskMapper.Close()) } - } - // Write OOO samples and mmap markers. - refMarkers := make([]record.RefMmapMarker, 0, oooSeriesPerBatch) - refSamples = make([]record.RefSample, 0, oooSeriesPerBatch) - for i := range oooSamplesPerSeries { - shouldAddMarkers := c.oooCapMax != 0 && i != 0 && int64(i)%c.oooCapMax == 0 - - for j := 0; j < c.batches; j++ { - refSamples = refSamples[:0] - if shouldAddMarkers { - refMarkers = refMarkers[:0] + // Write exemplars. + refExemplars := make([]record.RefExemplar, 0, c.seriesPerBatch) + for i := range exemplarsPerSeries { + for j := 0; j < c.batches; j++ { + refExemplars = refExemplars[:0] + for k := j * c.seriesPerBatch; k < (j+1)*c.seriesPerBatch; k++ { + refExemplars = append(refExemplars, record.RefExemplar{ + Ref: chunks.HeadSeriesRef(k) * 101, + T: int64(i) * 10, + V: float64(i) * 100, + Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i)), + }) + } + buf = populateTestWL(b, wal, []any{refExemplars}, buf, enableStStorage) } - for k := j * c.seriesPerBatch; k < (j*c.seriesPerBatch)+oooSeriesPerBatch; k++ { - ref := chunks.HeadSeriesRef(k) * 101 + } + + // Write OOO samples and mmap markers. + refMarkers := make([]record.RefMmapMarker, 0, oooSeriesPerBatch) + refSamples = make([]record.RefSample, 0, oooSeriesPerBatch) + for i := range oooSamplesPerSeries { + shouldAddMarkers := c.oooCapMax != 0 && i != 0 && int64(i)%c.oooCapMax == 0 + + for j := 0; j < c.batches; j++ { + refSamples = refSamples[:0] if shouldAddMarkers { - // loadWBL() checks that the marker's MmapRef is less than or equal to the ref - // for the last mmap chunk. Setting MmapRef to 0 to always pass that check. - refMarkers = append(refMarkers, record.RefMmapMarker{Ref: ref, MmapRef: 0}) + refMarkers = refMarkers[:0] } - refSamples = append(refSamples, record.RefSample{ - Ref: ref, - T: int64(i) * 10, - V: float64(i) * 100, - }) + for k := j * c.seriesPerBatch; k < (j*c.seriesPerBatch)+oooSeriesPerBatch; k++ { + ref := chunks.HeadSeriesRef(k) * 101 + if shouldAddMarkers { + // loadWBL() checks that the marker's MmapRef is less than or equal to the ref + // for the last mmap chunk. Setting MmapRef to 0 to always pass that check. + refMarkers = append(refMarkers, record.RefMmapMarker{Ref: ref, MmapRef: 0}) + } + refSamples = append(refSamples, record.RefSample{ + Ref: ref, + T: int64(i) * 10, + V: float64(i) * 100, + }) + } + if shouldAddMarkers { + populateTestWL(b, wbl, []any{refMarkers}, buf, enableStStorage) + } + buf = populateTestWL(b, wal, []any{refSamples}, buf, enableStStorage) + buf = populateTestWL(b, wbl, []any{refSamples}, buf, enableStStorage) } - if shouldAddMarkers { - populateTestWL(b, wbl, []any{refMarkers}, buf) + } + + b.ResetTimer() + + // Load the WAL. + for b.Loop() { + opts := DefaultHeadOptions() + opts.ChunkRange = 1000 + opts.ChunkDirRoot = dir + if c.oooCapMax > 0 { + opts.OutOfOrderCapMax.Store(c.oooCapMax) } - buf = populateTestWL(b, wal, []any{refSamples}, buf) - buf = populateTestWL(b, wbl, []any{refSamples}, buf) + h, err := NewHead(nil, nil, wal, wbl, opts, nil) + require.NoError(b, err) + h.Init(0) } - } - - b.ResetTimer() - - // Load the WAL. - for b.Loop() { - opts := DefaultHeadOptions() - opts.ChunkRange = 1000 - opts.ChunkDirRoot = dir - if c.oooCapMax > 0 { - opts.OutOfOrderCapMax.Store(c.oooCapMax) + b.StopTimer() + wal.Close() + if wbl != nil { + wbl.Close() } - h, err := NewHead(nil, nil, wal, wbl, opts, nil) - require.NoError(b, err) - h.Init(0) - } - b.StopTimer() - wal.Close() - if wbl != nil { - wbl.Close() - } - }) + }) + } } } } @@ -711,124 +713,126 @@ func TestHead_HighConcurrencyReadAndWrite(t *testing.T) { } func TestHead_ReadWAL(t *testing.T) { - for _, compress := range []compression.Type{compression.None, compression.Snappy, compression.Zstd} { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - entries := []any{ - []record.RefSeries{ - {Ref: 10, Labels: labels.FromStrings("a", "1")}, - {Ref: 11, Labels: labels.FromStrings("a", "2")}, - {Ref: 100, Labels: labels.FromStrings("a", "3")}, - }, - []record.RefSample{ - {Ref: 0, T: 99, V: 1}, - {Ref: 10, T: 100, V: 2}, - {Ref: 100, T: 100, V: 3}, - }, - []record.RefSeries{ - {Ref: 50, Labels: labels.FromStrings("a", "4")}, - // This series has two refs pointing to it. - {Ref: 101, Labels: labels.FromStrings("a", "3")}, - }, - []record.RefSample{ - {Ref: 10, T: 101, V: 5}, - {Ref: 50, T: 101, V: 6}, - // Sample for duplicate series record. - {Ref: 101, T: 101, V: 7}, - }, - []tombstones.Stone{ - {Ref: 0, Intervals: []tombstones.Interval{{Mint: 99, Maxt: 101}}}, - // Tombstone for duplicate series record. - {Ref: 101, Intervals: []tombstones.Interval{{Mint: 0, Maxt: 100}}}, - }, - []record.RefExemplar{ - {Ref: 10, T: 100, V: 1, Labels: labels.FromStrings("trace_id", "asdf")}, - // Exemplar for duplicate series record. - {Ref: 101, T: 101, V: 7, Labels: labels.FromStrings("trace_id", "zxcv")}, - }, - []record.RefMetadata{ - // Metadata for duplicate series record. - {Ref: 101, Type: uint8(record.Counter), Unit: "foo", Help: "total foo"}, - }, - } - - head, w := newTestHead(t, 1000, compress, false) - - populateTestWL(t, w, entries, nil) - - require.NoError(t, head.Init(math.MinInt64)) - require.Equal(t, uint64(101), head.lastSeriesID.Load()) - - s10 := head.series.getByID(10) - s11 := head.series.getByID(11) - s50 := head.series.getByID(50) - s100 := head.series.getByID(100) - s101 := head.series.getByID(101) - - testutil.RequireEqual(t, labels.FromStrings("a", "1"), s10.lset) - require.Nil(t, s11) // Series without samples should be garbage collected at head.Init(). - testutil.RequireEqual(t, labels.FromStrings("a", "4"), s50.lset) - testutil.RequireEqual(t, labels.FromStrings("a", "3"), s100.lset) - - // Duplicate series record should not be written to the head. - require.Nil(t, s101) - // But it should have a WAL expiry set. - keepUntil, ok := head.getWALExpiry(101) - require.True(t, ok) - require.Equal(t, int64(101), keepUntil) - // Only the duplicate series record should have a WAL expiry set. - _, ok = head.getWALExpiry(50) - require.False(t, ok) - - expandChunk := func(c chunkenc.Iterator) (x []sample) { - for c.Next() == chunkenc.ValFloat { - t, v := c.At() - x = append(x, sample{t: t, f: v}) + for _, enableStStorage := range []bool{false, true} { + for _, compress := range []compression.Type{compression.None, compression.Snappy, compression.Zstd} { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + entries := []any{ + []record.RefSeries{ + {Ref: 10, Labels: labels.FromStrings("a", "1")}, + {Ref: 11, Labels: labels.FromStrings("a", "2")}, + {Ref: 100, Labels: labels.FromStrings("a", "3")}, + }, + []record.RefSample{ + {Ref: 0, T: 99, V: 1}, + {Ref: 10, T: 100, V: 2}, + {Ref: 100, T: 100, V: 3}, + }, + []record.RefSeries{ + {Ref: 50, Labels: labels.FromStrings("a", "4")}, + // This series has two refs pointing to it. + {Ref: 101, Labels: labels.FromStrings("a", "3")}, + }, + []record.RefSample{ + {Ref: 10, T: 101, V: 5}, + {Ref: 50, T: 101, V: 6}, + // Sample for duplicate series record. + {Ref: 101, T: 101, V: 7}, + }, + []tombstones.Stone{ + {Ref: 0, Intervals: []tombstones.Interval{{Mint: 99, Maxt: 101}}}, + // Tombstone for duplicate series record. + {Ref: 101, Intervals: []tombstones.Interval{{Mint: 0, Maxt: 100}}}, + }, + []record.RefExemplar{ + {Ref: 10, T: 100, V: 1, Labels: labels.FromStrings("trace_id", "asdf")}, + // Exemplar for duplicate series record. + {Ref: 101, T: 101, V: 7, Labels: labels.FromStrings("trace_id", "zxcv")}, + }, + []record.RefMetadata{ + // Metadata for duplicate series record. + {Ref: 101, Type: uint8(record.Counter), Unit: "foo", Help: "total foo"}, + }, } - require.NoError(t, c.Err()) - return x - } - // Verify samples and exemplar for series 10. - c, _, _, err := s10.chunk(0, head.chunkDiskMapper, &head.memChunkPool) - require.NoError(t, err) - require.Equal(t, []sample{{0, 100, 2, nil, nil}, {0, 101, 5, nil, nil}}, expandChunk(c.chunk.Iterator(nil))) + head, w := newTestHead(t, 1000, compress, false) - q, err := head.ExemplarQuerier(context.Background()) - require.NoError(t, err) - e, err := q.Select(0, 1000, []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "a", "1")}) - require.NoError(t, err) - require.NotEmpty(t, e) - require.NotEmpty(t, e[0].Exemplars) - require.True(t, exemplar.Exemplar{Ts: 100, Value: 1, Labels: labels.FromStrings("trace_id", "asdf")}.Equals(e[0].Exemplars[0])) + populateTestWL(t, w, entries, nil, enableStStorage) - // Verify samples for series 50 - c, _, _, err = s50.chunk(0, head.chunkDiskMapper, &head.memChunkPool) - require.NoError(t, err) - require.Equal(t, []sample{{0, 101, 6, nil, nil}}, expandChunk(c.chunk.Iterator(nil))) + require.NoError(t, head.Init(math.MinInt64)) + require.Equal(t, uint64(101), head.lastSeriesID.Load()) - // Verify records for series 100 and its duplicate, series 101. - // The samples before the new series record should be discarded since a duplicate record - // is only possible when old samples were compacted. - c, _, _, err = s100.chunk(0, head.chunkDiskMapper, &head.memChunkPool) - require.NoError(t, err) - require.Equal(t, []sample{{0, 101, 7, nil, nil}}, expandChunk(c.chunk.Iterator(nil))) + s10 := head.series.getByID(10) + s11 := head.series.getByID(11) + s50 := head.series.getByID(50) + s100 := head.series.getByID(100) + s101 := head.series.getByID(101) - q, err = head.ExemplarQuerier(context.Background()) - require.NoError(t, err) - e, err = q.Select(0, 1000, []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "a", "3")}) - require.NoError(t, err) - require.NotEmpty(t, e) - require.NotEmpty(t, e[0].Exemplars) - require.True(t, exemplar.Exemplar{Ts: 101, Value: 7, Labels: labels.FromStrings("trace_id", "zxcv")}.Equals(e[0].Exemplars[0])) + testutil.RequireEqual(t, labels.FromStrings("a", "1"), s10.lset) + require.Nil(t, s11) // Series without samples should be garbage collected at head.Init(). + testutil.RequireEqual(t, labels.FromStrings("a", "4"), s50.lset) + testutil.RequireEqual(t, labels.FromStrings("a", "3"), s100.lset) - require.NotNil(t, s100.meta) - require.Equal(t, "foo", s100.meta.Unit) - require.Equal(t, "total foo", s100.meta.Help) + // Duplicate series record should not be written to the head. + require.Nil(t, s101) + // But it should have a WAL expiry set. + keepUntil, ok := head.getWALExpiry(101) + require.True(t, ok) + require.Equal(t, int64(101), keepUntil) + // Only the duplicate series record should have a WAL expiry set. + _, ok = head.getWALExpiry(50) + require.False(t, ok) - intervals, err := head.tombstones.Get(storage.SeriesRef(s100.ref)) - require.NoError(t, err) - require.Equal(t, tombstones.Intervals{{Mint: 0, Maxt: 100}}, intervals) - }) + expandChunk := func(c chunkenc.Iterator) (x []sample) { + for c.Next() == chunkenc.ValFloat { + t, v := c.At() + x = append(x, sample{t: t, f: v}) + } + require.NoError(t, c.Err()) + return x + } + + // Verify samples and exemplar for series 10. + c, _, _, err := s10.chunk(0, head.chunkDiskMapper, &head.memChunkPool) + require.NoError(t, err) + require.Equal(t, []sample{{0, 100, 2, nil, nil}, {0, 101, 5, nil, nil}}, expandChunk(c.chunk.Iterator(nil))) + + q, err := head.ExemplarQuerier(context.Background()) + require.NoError(t, err) + e, err := q.Select(0, 1000, []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "a", "1")}) + require.NoError(t, err) + require.NotEmpty(t, e) + require.NotEmpty(t, e[0].Exemplars) + require.True(t, exemplar.Exemplar{Ts: 100, Value: 1, Labels: labels.FromStrings("trace_id", "asdf")}.Equals(e[0].Exemplars[0])) + + // Verify samples for series 50 + c, _, _, err = s50.chunk(0, head.chunkDiskMapper, &head.memChunkPool) + require.NoError(t, err) + require.Equal(t, []sample{{0, 101, 6, nil, nil}}, expandChunk(c.chunk.Iterator(nil))) + + // Verify records for series 100 and its duplicate, series 101. + // The samples before the new series record should be discarded since a duplicate record + // is only possible when old samples were compacted. + c, _, _, err = s100.chunk(0, head.chunkDiskMapper, &head.memChunkPool) + require.NoError(t, err) + require.Equal(t, []sample{{0, 101, 7, nil, nil}}, expandChunk(c.chunk.Iterator(nil))) + + q, err = head.ExemplarQuerier(context.Background()) + require.NoError(t, err) + e, err = q.Select(0, 1000, []*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, "a", "3")}) + require.NoError(t, err) + require.NotEmpty(t, e) + require.NotEmpty(t, e[0].Exemplars) + require.True(t, exemplar.Exemplar{Ts: 101, Value: 7, Labels: labels.FromStrings("trace_id", "zxcv")}.Equals(e[0].Exemplars[0])) + + require.NotNil(t, s100.meta) + require.Equal(t, "foo", s100.meta.Unit) + require.Equal(t, "total foo", s100.meta.Help) + + intervals, err := head.tombstones.Get(storage.SeriesRef(s100.ref)) + require.NoError(t, err) + require.Equal(t, tombstones.Intervals{{Mint: 0, Maxt: 100}}, intervals) + }) + } } } @@ -1099,42 +1103,43 @@ func TestHead_WALCheckpointMultiRef(t *testing.T) { }, } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - h, w := newTestHead(t, 1000, compression.None, false) - - populateTestWL(t, w, tc.walEntries, nil) - first, _, err := wlog.Segments(w.Dir()) - require.NoError(t, err) - - require.NoError(t, h.Init(0)) - - keepUntil, ok := h.getWALExpiry(2) - require.True(t, ok) - require.Equal(t, tc.expectedWalExpiry, keepUntil) - - // Each truncation creates a new segment, so attempt truncations until a checkpoint is created - for { - h.lastWALTruncationTime.Store(0) // Reset so that it's always time to truncate the WAL - err := h.truncateWAL(tc.walTruncateMinT) + for _, enableStStorage := range []bool{false, true} { + for _, tc := range cases { + t.Run(tc.name+",stStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + h, w := newTestHead(t, 1000, compression.None, false) + populateTestWL(t, w, tc.walEntries, nil, enableStStorage) + first, _, err := wlog.Segments(w.Dir()) require.NoError(t, err) - f, _, err := wlog.Segments(w.Dir()) - require.NoError(t, err) - if f > first { - break + + require.NoError(t, h.Init(0)) + + keepUntil, ok := h.getWALExpiry(2) + require.True(t, ok) + require.Equal(t, tc.expectedWalExpiry, keepUntil) + + // Each truncation creates a new segment, so attempt truncations until a checkpoint is created + for { + h.lastWALTruncationTime.Store(0) // Reset so that it's always time to truncate the WAL + err := h.truncateWAL(tc.walTruncateMinT) + require.NoError(t, err) + f, _, err := wlog.Segments(w.Dir()) + require.NoError(t, err) + if f > first { + break + } } - } - // Read test WAL , checkpoint first - checkpointDir, _, err := wlog.LastCheckpoint(w.Dir()) - require.NoError(t, err) - cprecs := readTestWAL(t, checkpointDir) - recs := readTestWAL(t, w.Dir()) - recs = append(cprecs, recs...) + // Read test WAL , checkpoint first + checkpointDir, _, err := wlog.LastCheckpoint(w.Dir()) + require.NoError(t, err) + cprecs := readTestWAL(t, checkpointDir) + recs := readTestWAL(t, w.Dir()) + recs = append(cprecs, recs...) - // Use testutil.RequireEqual which handles labels properly with dedupelabels - testutil.RequireEqual(t, tc.expectedWalEntries, recs) - }) + // Use testutil.RequireEqual which handles labels properly with dedupelabels + testutil.RequireEqual(t, tc.expectedWalEntries, recs) + }) + } } } @@ -1685,29 +1690,31 @@ func TestMemSeries_truncateChunks_scenarios(t *testing.T) { } func TestHeadDeleteSeriesWithoutSamples(t *testing.T) { - for _, compress := range []compression.Type{compression.None, compression.Snappy, compression.Zstd} { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - entries := []any{ - []record.RefSeries{ - {Ref: 10, Labels: labels.FromStrings("a", "1")}, - }, - []record.RefSample{}, - []record.RefSeries{ - {Ref: 50, Labels: labels.FromStrings("a", "2")}, - }, - []record.RefSample{ - {Ref: 50, T: 80, V: 1}, - {Ref: 50, T: 90, V: 1}, - }, - } - head, w := newTestHead(t, 1000, compress, false) + for _, enableStStorage := range []bool{false, true} { + for _, compress := range []compression.Type{compression.None, compression.Snappy, compression.Zstd} { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + entries := []any{ + []record.RefSeries{ + {Ref: 10, Labels: labels.FromStrings("a", "1")}, + }, + []record.RefSample{}, + []record.RefSeries{ + {Ref: 50, Labels: labels.FromStrings("a", "2")}, + }, + []record.RefSample{ + {Ref: 50, T: 80, V: 1}, + {Ref: 50, T: 90, V: 1}, + }, + } + head, w := newTestHead(t, 1000, compress, false) - populateTestWL(t, w, entries, nil) + populateTestWL(t, w, entries, nil, enableStStorage) - require.NoError(t, head.Init(math.MinInt64)) + require.NoError(t, head.Init(math.MinInt64)) - require.NoError(t, head.Delete(context.Background(), 0, 100, labels.MustNewMatcher(labels.MatchEqual, "a", "1"))) - }) + require.NoError(t, head.Delete(context.Background(), 0, 100, labels.MustNewMatcher(labels.MatchEqual, "a", "1"))) + }) + } } } @@ -2381,26 +2388,26 @@ func TestGCChunkAccess(t *testing.T) { idx := h.indexRange(0, 1500) var ( - chunks []chunks.Meta + chnks []chunks.Meta builder labels.ScratchBuilder ) - require.NoError(t, idx.Series(1, &builder, &chunks)) + require.NoError(t, idx.Series(1, &builder, &chnks)) require.Equal(t, labels.FromStrings("a", "1"), builder.Labels()) - require.Len(t, chunks, 2) + require.Len(t, chnks, 2) cr, err := h.chunksRange(0, 1500, nil) require.NoError(t, err) - _, _, err = cr.ChunkOrIterable(chunks[0]) + _, _, err = cr.ChunkOrIterable(chnks[0]) require.NoError(t, err) - _, _, err = cr.ChunkOrIterable(chunks[1]) + _, _, err = cr.ChunkOrIterable(chnks[1]) require.NoError(t, err) require.NoError(t, h.Truncate(1500)) // Remove a chunk. - _, _, err = cr.ChunkOrIterable(chunks[0]) + _, _, err = cr.ChunkOrIterable(chnks[0]) require.Equal(t, storage.ErrNotFound, err) - _, _, err = cr.ChunkOrIterable(chunks[1]) + _, _, err = cr.ChunkOrIterable(chnks[1]) require.NoError(t, err) } @@ -2568,94 +2575,96 @@ func TestHead_ReturnsSortedLabelValues(t *testing.T) { // TestWalRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWalRepair_DecodingError(t *testing.T) { - var enc record.Encoder - for name, test := range map[string]struct { - corrFunc func(rec []byte) []byte // Func that applies the corruption to a record. - rec []byte - totalRecs int - expRecs int - }{ - "decode_series": { - func(rec []byte) []byte { - return rec[:3] + for _, enableStStorage := range []bool{false, true} { + enc := record.Encoder{EnableSTStorage: enableStStorage} + for name, test := range map[string]struct { + corrFunc func(rec []byte) []byte // Func that applies the corruption to a record. + rec []byte + totalRecs int + expRecs int + }{ + "decode_series": { + func(rec []byte) []byte { + return rec[:3] + }, + enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), + 9, + 5, }, - enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), - 9, - 5, - }, - "decode_samples": { - func(rec []byte) []byte { - return rec[:3] + "decode_samples": { + func(rec []byte) []byte { + return rec[:3] + }, + enc.Samples([]record.RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}), + 9, + 5, }, - enc.Samples([]record.RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}), - 9, - 5, - }, - "decode_tombstone": { - func(rec []byte) []byte { - return rec[:3] + "decode_tombstone": { + func(rec []byte) []byte { + return rec[:3] + }, + enc.Tombstones([]tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{}}}, []byte{}), + 9, + 5, }, - enc.Tombstones([]tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{}}}, []byte{}), - 9, - 5, - }, - } { - for _, compress := range []compression.Type{compression.None, compression.Snappy, compression.Zstd} { - t.Run(fmt.Sprintf("%s,compress=%s", name, compress), func(t *testing.T) { - dir := t.TempDir() + } { + for _, compress := range []compression.Type{compression.None, compression.Snappy, compression.Zstd} { + t.Run(fmt.Sprintf("%s,compress=%s,stStorage=%v", name, compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() - // Fill the wal and corrupt it. - { - w, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), compress) - require.NoError(t, err) + // Fill the wal and corrupt it. + { + w, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), compress) + require.NoError(t, err) - for i := 1; i <= test.totalRecs; i++ { - // At this point insert a corrupted record. - if i-1 == test.expRecs { - require.NoError(t, w.Log(test.corrFunc(test.rec))) - continue + for i := 1; i <= test.totalRecs; i++ { + // At this point insert a corrupted record. + if i-1 == test.expRecs { + require.NoError(t, w.Log(test.corrFunc(test.rec))) + continue + } + require.NoError(t, w.Log(test.rec)) } - require.NoError(t, w.Log(test.rec)) + + opts := DefaultHeadOptions() + opts.ChunkRange = 1 + opts.ChunkDirRoot = w.Dir() + h, err := NewHead(nil, nil, w, nil, opts, nil) + require.NoError(t, err) + require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) + initErr := h.Init(math.MinInt64) + + var cerr *wlog.CorruptionErr + require.ErrorAs(t, initErr, &cerr, "reading the wal didn't return corruption error") + require.NoError(t, h.Close()) // Head will close the wal as well. } - opts := DefaultHeadOptions() - opts.ChunkRange = 1 - opts.ChunkDirRoot = w.Dir() - h, err := NewHead(nil, nil, w, nil, opts, nil) - require.NoError(t, err) - require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) - initErr := h.Init(math.MinInt64) - - var cerr *wlog.CorruptionErr - require.ErrorAs(t, initErr, &cerr, "reading the wal didn't return corruption error") - require.NoError(t, h.Close()) // Head will close the wal as well. - } - - // Open the db to trigger a repair. - { - db, err := Open(dir, nil, nil, DefaultOptions(), nil) - require.NoError(t, err) - defer func() { - require.NoError(t, db.Close()) - }() - require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.walCorruptionsTotal)) - } - - // Read the wal content after the repair. - { - sr, err := wlog.NewSegmentsReader(filepath.Join(dir, "wal")) - require.NoError(t, err) - defer sr.Close() - r := wlog.NewReader(sr) - - var actRec int - for r.Next() { - actRec++ + // Open the db to trigger a repair. + { + db, err := Open(dir, nil, nil, DefaultOptions(), nil) + require.NoError(t, err) + defer func() { + require.NoError(t, db.Close()) + }() + require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.walCorruptionsTotal)) } - require.NoError(t, r.Err()) - require.Equal(t, test.expRecs, actRec, "Wrong number of intact records") - } - }) + + // Read the wal content after the repair. + { + sr, err := wlog.NewSegmentsReader(filepath.Join(dir, "wal")) + require.NoError(t, err) + defer sr.Close() + r := wlog.NewReader(sr) + + var actRec int + for r.Next() { + actRec++ + } + require.NoError(t, r.Err()) + require.Equal(t, test.expRecs, actRec, "Wrong number of intact records") + } + }) + } } } } @@ -2663,72 +2672,76 @@ func TestWalRepair_DecodingError(t *testing.T) { // TestWblRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWblRepair_DecodingError(t *testing.T) { - var enc record.Encoder - corrFunc := func(rec []byte) []byte { - return rec[:3] - } - rec := enc.Samples([]record.RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}) - totalRecs := 9 - expRecs := 5 - dir := t.TempDir() - - // Fill the wbl and corrupt it. - { - wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), compression.None) - require.NoError(t, err) - wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), compression.None) - require.NoError(t, err) - - for i := 1; i <= totalRecs; i++ { - // At this point insert a corrupted record. - if i-1 == expRecs { - require.NoError(t, wbl.Log(corrFunc(rec))) - continue + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + enc := record.Encoder{EnableSTStorage: enableStStorage} + corrFunc := func(rec []byte) []byte { + return rec[:3] } - require.NoError(t, wbl.Log(rec)) - } + rec := enc.Samples([]record.RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}) + totalRecs := 9 + expRecs := 5 + dir := t.TempDir() - opts := DefaultHeadOptions() - opts.ChunkRange = 1 - opts.ChunkDirRoot = wal.Dir() - opts.OutOfOrderCapMax.Store(30) - opts.OutOfOrderTimeWindow.Store(1000 * time.Minute.Milliseconds()) - h, err := NewHead(nil, nil, wal, wbl, opts, nil) - require.NoError(t, err) - require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) - initErr := h.Init(math.MinInt64) + // Fill the wbl and corrupt it. + { + wal, err := wlog.New(nil, nil, filepath.Join(dir, "wal"), compression.None) + require.NoError(t, err) + wbl, err := wlog.New(nil, nil, filepath.Join(dir, "wbl"), compression.None) + require.NoError(t, err) - var elb *errLoadWbl - require.ErrorAs(t, initErr, &elb) // Wbl errors are wrapped into errLoadWbl, make sure we can unwrap it. + for i := 1; i <= totalRecs; i++ { + // At this point insert a corrupted record. + if i-1 == expRecs { + require.NoError(t, wbl.Log(corrFunc(rec))) + continue + } + require.NoError(t, wbl.Log(rec)) + } - var cerr *wlog.CorruptionErr - require.ErrorAs(t, initErr, &cerr, "reading the wal didn't return corruption error") - require.NoError(t, h.Close()) // Head will close the wal as well. - } + opts := DefaultHeadOptions() + opts.ChunkRange = 1 + opts.ChunkDirRoot = wal.Dir() + opts.OutOfOrderCapMax.Store(30) + opts.OutOfOrderTimeWindow.Store(1000 * time.Minute.Milliseconds()) + h, err := NewHead(nil, nil, wal, wbl, opts, nil) + require.NoError(t, err) + require.Equal(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) + initErr := h.Init(math.MinInt64) - // Open the db to trigger a repair. - { - db, err := Open(dir, nil, nil, DefaultOptions(), nil) - require.NoError(t, err) - defer func() { - require.NoError(t, db.Close()) - }() - require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.walCorruptionsTotal)) - } + var elb *errLoadWbl + require.ErrorAs(t, initErr, &elb) // Wbl errors are wrapped into errLoadWbl, make sure we can unwrap it. - // Read the wbl content after the repair. - { - sr, err := wlog.NewSegmentsReader(filepath.Join(dir, "wbl")) - require.NoError(t, err) - defer sr.Close() - r := wlog.NewReader(sr) + var cerr *wlog.CorruptionErr + require.ErrorAs(t, initErr, &cerr, "reading the wal didn't return corruption error") + require.NoError(t, h.Close()) // Head will close the wal as well. + } - var actRec int - for r.Next() { - actRec++ - } - require.NoError(t, r.Err()) - require.Equal(t, expRecs, actRec, "Wrong number of intact records") + // Open the db to trigger a repair. + { + db, err := Open(dir, nil, nil, DefaultOptions(), nil) + require.NoError(t, err) + defer func() { + require.NoError(t, db.Close()) + }() + require.Equal(t, 1.0, prom_testutil.ToFloat64(db.head.metrics.walCorruptionsTotal)) + } + + // Read the wbl content after the repair. + { + sr, err := wlog.NewSegmentsReader(filepath.Join(dir, "wbl")) + require.NoError(t, err) + defer sr.Close() + r := wlog.NewReader(sr) + + var actRec int + for r.Next() { + actRec++ + } + require.NoError(t, r.Err()) + require.Equal(t, expRecs, actRec, "Wrong number of intact records") + } + }) } } @@ -4365,289 +4378,293 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { } func TestChunkSnapshot(t *testing.T) { - head, _ := newTestHead(t, 120*4, compression.None, false) - defer func() { - head.opts.EnableMemorySnapshotOnShutdown = false - require.NoError(t, head.Close()) - }() + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + head, _ := newTestHead(t, 120*4, compression.None, false) + defer func() { + head.opts.EnableMemorySnapshotOnShutdown = false + require.NoError(t, head.Close()) + }() - type ex struct { - seriesLabels labels.Labels - e exemplar.Exemplar - } - - numSeries := 10 - expSeries := make(map[string][]chunks.Sample) - expHist := make(map[string][]chunks.Sample) - expFloatHist := make(map[string][]chunks.Sample) - expTombstones := make(map[storage.SeriesRef]tombstones.Intervals) - expExemplars := make([]ex, 0) - histograms := tsdbutil.GenerateTestGaugeHistograms(481) - floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481) - - addExemplar := func(app storage.Appender, ref storage.SeriesRef, lbls labels.Labels, ts int64) { - e := ex{ - seriesLabels: lbls, - e: exemplar.Exemplar{ - Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), - Value: rand.Float64(), - Ts: ts, - }, - } - expExemplars = append(expExemplars, e) - _, err := app.AppendExemplar(ref, e.seriesLabels, e.e) - require.NoError(t, err) - } - - checkSamples := func() { - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) - require.Equal(t, expSeries, series) - } - checkHistograms := func() { - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*")) - require.Equal(t, expHist, series) - } - checkFloatHistograms := func() { - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*")) - require.Equal(t, expFloatHist, series) - } - checkTombstones := func() { - tr, err := head.Tombstones() - require.NoError(t, err) - actTombstones := make(map[storage.SeriesRef]tombstones.Intervals) - require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error { - for _, itv := range itvs { - actTombstones[ref].Add(itv) + type ex struct { + seriesLabels labels.Labels + e exemplar.Exemplar + } + + numSeries := 10 + expSeries := make(map[string][]chunks.Sample) + expHist := make(map[string][]chunks.Sample) + expFloatHist := make(map[string][]chunks.Sample) + expTombstones := make(map[storage.SeriesRef]tombstones.Intervals) + expExemplars := make([]ex, 0) + histograms := tsdbutil.GenerateTestGaugeHistograms(481) + floatHistogram := tsdbutil.GenerateTestGaugeFloatHistograms(481) + + addExemplar := func(app storage.Appender, ref storage.SeriesRef, lbls labels.Labels, ts int64) { + e := ex{ + seriesLabels: lbls, + e: exemplar.Exemplar{ + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), + Value: rand.Float64(), + Ts: ts, + }, + } + expExemplars = append(expExemplars, e) + _, err := app.AppendExemplar(ref, e.seriesLabels, e.e) + require.NoError(t, err) + } + + checkSamples := func() { + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "foo", "bar.*")) + require.Equal(t, expSeries, series) + } + checkHistograms := func() { + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "hist", "baz.*")) + require.Equal(t, expHist, series) + } + checkFloatHistograms := func() { + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, labels.MustNewMatcher(labels.MatchRegexp, "floathist", "bat.*")) + require.Equal(t, expFloatHist, series) + } + checkTombstones := func() { + tr, err := head.Tombstones() + require.NoError(t, err) + actTombstones := make(map[storage.SeriesRef]tombstones.Intervals) + require.NoError(t, tr.Iter(func(ref storage.SeriesRef, itvs tombstones.Intervals) error { + for _, itv := range itvs { + actTombstones[ref].Add(itv) + } + return nil + })) + require.Equal(t, expTombstones, actTombstones) + } + checkExemplars := func() { + actExemplars := make([]ex, 0, len(expExemplars)) + err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error { + actExemplars = append(actExemplars, ex{ + seriesLabels: seriesLabels, + e: e, + }) + return nil + }) + require.NoError(t, err) + // Verifies both existence of right exemplars and order of exemplars in the buffer. + testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})}) + } + + var ( + wlast, woffset int + err error + ) + + closeHeadAndCheckSnapshot := func() { + require.NoError(t, head.Close()) + + _, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot) + require.NoError(t, err) + require.Equal(t, wlast, sidx) + require.Equal(t, woffset, soffset) + } + + openHeadAndCheckReplay := func() { + w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None) + require.NoError(t, err) + head, err = NewHead(nil, nil, w, nil, head.opts, nil) + require.NoError(t, err) + require.NoError(t, head.Init(math.MinInt64)) + + checkSamples() + checkHistograms() + checkFloatHistograms() + checkTombstones() + checkExemplars() + } + + { // Initial data that goes into snapshot. + // Add some initial samples with >=1 m-map chunk. + app := head.Appender(context.Background()) + for i := 1; i <= numSeries; i++ { + lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) + lblStr := lbls.String() + lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) + lblsHistStr := lblsHist.String() + lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) + lblsFloatHistStr := lblsFloatHist.String() + + // 240 samples should m-map at least 1 chunk. + for ts := int64(1); ts <= 240; ts++ { + val := rand.Float64() + expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) + ref, err := app.Append(0, lbls, ts, val) + require.NoError(t, err) + + hist := histograms[int(ts)] + expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) + _, err = app.AppendHistogram(0, lblsHist, ts, hist, nil) + require.NoError(t, err) + + floatHist := floatHistogram[int(ts)] + expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) + _, err = app.AppendHistogram(0, lblsFloatHist, ts, nil, floatHist) + require.NoError(t, err) + + // Add an exemplar and to create multiple WAL records. + if ts%10 == 0 { + addExemplar(app, ref, lbls, ts) + require.NoError(t, app.Commit()) + app = head.Appender(context.Background()) + } + } + } + require.NoError(t, app.Commit()) + + // Add some tombstones. + enc := record.Encoder{EnableSTStorage: enableStStorage} + for i := 1; i <= numSeries; i++ { + ref := storage.SeriesRef(i) + itvs := tombstones.Intervals{ + {Mint: 1234, Maxt: 2345}, + {Mint: 3456, Maxt: 4567}, + } + for _, itv := range itvs { + expTombstones[ref].Add(itv) + } + head.tombstones.AddInterval(ref, itvs...) + err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ + {Ref: ref, Intervals: itvs}, + }, nil)) + require.NoError(t, err) + } + } + + // These references should be the ones used for the snapshot. + wlast, woffset, err = head.wal.LastSegmentAndOffset() + require.NoError(t, err) + if woffset != 0 && woffset < 32*1024 { + // The page is always filled before taking the snapshot. + woffset = 32 * 1024 + } + + { + // Creating snapshot and verifying it. + head.opts.EnableMemorySnapshotOnShutdown = true + closeHeadAndCheckSnapshot() // This will create a snapshot. + + // Test the replay of snapshot. + openHeadAndCheckReplay() + } + + { // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk. + // Add more samples. + app := head.Appender(context.Background()) + for i := 1; i <= numSeries; i++ { + lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) + lblStr := lbls.String() + lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) + lblsHistStr := lblsHist.String() + lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) + lblsFloatHistStr := lblsFloatHist.String() + + // 240 samples should m-map at least 1 chunk. + for ts := int64(241); ts <= 480; ts++ { + val := rand.Float64() + expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) + ref, err := app.Append(0, lbls, ts, val) + require.NoError(t, err) + + hist := histograms[int(ts)] + expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) + _, err = app.AppendHistogram(0, lblsHist, ts, hist, nil) + require.NoError(t, err) + + floatHist := floatHistogram[int(ts)] + expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) + _, err = app.AppendHistogram(0, lblsFloatHist, ts, nil, floatHist) + require.NoError(t, err) + + // Add an exemplar and to create multiple WAL records. + if ts%10 == 0 { + addExemplar(app, ref, lbls, ts) + require.NoError(t, app.Commit()) + app = head.Appender(context.Background()) + } + } + } + require.NoError(t, app.Commit()) + + // Add more tombstones. + enc := record.Encoder{EnableSTStorage: enableStStorage} + for i := 1; i <= numSeries; i++ { + ref := storage.SeriesRef(i) + itvs := tombstones.Intervals{ + {Mint: 12345, Maxt: 23456}, + {Mint: 34567, Maxt: 45678}, + } + for _, itv := range itvs { + expTombstones[ref].Add(itv) + } + head.tombstones.AddInterval(ref, itvs...) + err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ + {Ref: ref, Intervals: itvs}, + }, nil)) + require.NoError(t, err) + } + } + { + // Close Head and verify that new snapshot was not created. + head.opts.EnableMemorySnapshotOnShutdown = false + closeHeadAndCheckSnapshot() // This should not create a snapshot. + + // Test the replay of snapshot, m-map chunks, and WAL. + head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. + openHeadAndCheckReplay() + } + + // Creating another snapshot should delete the older snapshot and replay still works fine. + wlast, woffset, err = head.wal.LastSegmentAndOffset() + require.NoError(t, err) + if woffset != 0 && woffset < 32*1024 { + // The page is always filled before taking the snapshot. + woffset = 32 * 1024 + } + + { + // Close Head and verify that new snapshot was created. + closeHeadAndCheckSnapshot() + + // Verify that there is only 1 snapshot. + files, err := os.ReadDir(head.opts.ChunkDirRoot) + require.NoError(t, err) + snapshots := 0 + for i := len(files) - 1; i >= 0; i-- { + fi := files[i] + if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) { + snapshots++ + require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name()) + } + } + require.Equal(t, 1, snapshots) + + // Test the replay of snapshot. + head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. + + // Disabling exemplars to check that it does not hard fail replay + // https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870. + head.opts.EnableExemplarStorage = false + head.opts.MaxExemplars.Store(0) + expExemplars = expExemplars[:0] + + openHeadAndCheckReplay() + + require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) } - return nil - })) - require.Equal(t, expTombstones, actTombstones) - } - checkExemplars := func() { - actExemplars := make([]ex, 0, len(expExemplars)) - err := head.exemplars.IterateExemplars(func(seriesLabels labels.Labels, e exemplar.Exemplar) error { - actExemplars = append(actExemplars, ex{ - seriesLabels: seriesLabels, - e: e, - }) - return nil }) - require.NoError(t, err) - // Verifies both existence of right exemplars and order of exemplars in the buffer. - testutil.RequireEqualWithOptions(t, expExemplars, actExemplars, []cmp.Option{cmp.AllowUnexported(ex{})}) - } - - var ( - wlast, woffset int - err error - ) - - closeHeadAndCheckSnapshot := func() { - require.NoError(t, head.Close()) - - _, sidx, soffset, err := LastChunkSnapshot(head.opts.ChunkDirRoot) - require.NoError(t, err) - require.Equal(t, wlast, sidx) - require.Equal(t, woffset, soffset) - } - - openHeadAndCheckReplay := func() { - w, err := wlog.NewSize(nil, nil, head.wal.Dir(), 32768, compression.None) - require.NoError(t, err) - head, err = NewHead(nil, nil, w, nil, head.opts, nil) - require.NoError(t, err) - require.NoError(t, head.Init(math.MinInt64)) - - checkSamples() - checkHistograms() - checkFloatHistograms() - checkTombstones() - checkExemplars() - } - - { // Initial data that goes into snapshot. - // Add some initial samples with >=1 m-map chunk. - app := head.Appender(context.Background()) - for i := 1; i <= numSeries; i++ { - lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) - lblStr := lbls.String() - lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) - lblsHistStr := lblsHist.String() - lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) - lblsFloatHistStr := lblsFloatHist.String() - - // 240 samples should m-map at least 1 chunk. - for ts := int64(1); ts <= 240; ts++ { - val := rand.Float64() - expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) - ref, err := app.Append(0, lbls, ts, val) - require.NoError(t, err) - - hist := histograms[int(ts)] - expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) - _, err = app.AppendHistogram(0, lblsHist, ts, hist, nil) - require.NoError(t, err) - - floatHist := floatHistogram[int(ts)] - expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) - _, err = app.AppendHistogram(0, lblsFloatHist, ts, nil, floatHist) - require.NoError(t, err) - - // Add an exemplar and to create multiple WAL records. - if ts%10 == 0 { - addExemplar(app, ref, lbls, ts) - require.NoError(t, app.Commit()) - app = head.Appender(context.Background()) - } - } - } - require.NoError(t, app.Commit()) - - // Add some tombstones. - var enc record.Encoder - for i := 1; i <= numSeries; i++ { - ref := storage.SeriesRef(i) - itvs := tombstones.Intervals{ - {Mint: 1234, Maxt: 2345}, - {Mint: 3456, Maxt: 4567}, - } - for _, itv := range itvs { - expTombstones[ref].Add(itv) - } - head.tombstones.AddInterval(ref, itvs...) - err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ - {Ref: ref, Intervals: itvs}, - }, nil)) - require.NoError(t, err) - } - } - - // These references should be the ones used for the snapshot. - wlast, woffset, err = head.wal.LastSegmentAndOffset() - require.NoError(t, err) - if woffset != 0 && woffset < 32*1024 { - // The page is always filled before taking the snapshot. - woffset = 32 * 1024 - } - - { - // Creating snapshot and verifying it. - head.opts.EnableMemorySnapshotOnShutdown = true - closeHeadAndCheckSnapshot() // This will create a snapshot. - - // Test the replay of snapshot. - openHeadAndCheckReplay() - } - - { // Additional data to only include in WAL and m-mapped chunks and not snapshot. This mimics having an old snapshot on disk. - // Add more samples. - app := head.Appender(context.Background()) - for i := 1; i <= numSeries; i++ { - lbls := labels.FromStrings("foo", fmt.Sprintf("bar%d", i)) - lblStr := lbls.String() - lblsHist := labels.FromStrings("hist", fmt.Sprintf("baz%d", i)) - lblsHistStr := lblsHist.String() - lblsFloatHist := labels.FromStrings("floathist", fmt.Sprintf("bat%d", i)) - lblsFloatHistStr := lblsFloatHist.String() - - // 240 samples should m-map at least 1 chunk. - for ts := int64(241); ts <= 480; ts++ { - val := rand.Float64() - expSeries[lblStr] = append(expSeries[lblStr], sample{0, ts, val, nil, nil}) - ref, err := app.Append(0, lbls, ts, val) - require.NoError(t, err) - - hist := histograms[int(ts)] - expHist[lblsHistStr] = append(expHist[lblsHistStr], sample{0, ts, 0, hist, nil}) - _, err = app.AppendHistogram(0, lblsHist, ts, hist, nil) - require.NoError(t, err) - - floatHist := floatHistogram[int(ts)] - expFloatHist[lblsFloatHistStr] = append(expFloatHist[lblsFloatHistStr], sample{0, ts, 0, nil, floatHist}) - _, err = app.AppendHistogram(0, lblsFloatHist, ts, nil, floatHist) - require.NoError(t, err) - - // Add an exemplar and to create multiple WAL records. - if ts%10 == 0 { - addExemplar(app, ref, lbls, ts) - require.NoError(t, app.Commit()) - app = head.Appender(context.Background()) - } - } - } - require.NoError(t, app.Commit()) - - // Add more tombstones. - var enc record.Encoder - for i := 1; i <= numSeries; i++ { - ref := storage.SeriesRef(i) - itvs := tombstones.Intervals{ - {Mint: 12345, Maxt: 23456}, - {Mint: 34567, Maxt: 45678}, - } - for _, itv := range itvs { - expTombstones[ref].Add(itv) - } - head.tombstones.AddInterval(ref, itvs...) - err := head.wal.Log(enc.Tombstones([]tombstones.Stone{ - {Ref: ref, Intervals: itvs}, - }, nil)) - require.NoError(t, err) - } - } - { - // Close Head and verify that new snapshot was not created. - head.opts.EnableMemorySnapshotOnShutdown = false - closeHeadAndCheckSnapshot() // This should not create a snapshot. - - // Test the replay of snapshot, m-map chunks, and WAL. - head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. - openHeadAndCheckReplay() - } - - // Creating another snapshot should delete the older snapshot and replay still works fine. - wlast, woffset, err = head.wal.LastSegmentAndOffset() - require.NoError(t, err) - if woffset != 0 && woffset < 32*1024 { - // The page is always filled before taking the snapshot. - woffset = 32 * 1024 - } - - { - // Close Head and verify that new snapshot was created. - closeHeadAndCheckSnapshot() - - // Verify that there is only 1 snapshot. - files, err := os.ReadDir(head.opts.ChunkDirRoot) - require.NoError(t, err) - snapshots := 0 - for i := len(files) - 1; i >= 0; i-- { - fi := files[i] - if strings.HasPrefix(fi.Name(), chunkSnapshotPrefix) { - snapshots++ - require.Equal(t, chunkSnapshotDir(wlast, woffset), fi.Name()) - } - } - require.Equal(t, 1, snapshots) - - // Test the replay of snapshot. - head.opts.EnableMemorySnapshotOnShutdown = true // Enabled to read from snapshot. - - // Disabling exemplars to check that it does not hard fail replay - // https://github.com/prometheus/prometheus/issues/9437#issuecomment-933285870. - head.opts.EnableExemplarStorage = false - head.opts.MaxExemplars.Store(0) - expExemplars = expExemplars[:0] - - openHeadAndCheckReplay() - - require.Equal(t, 0.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) } } @@ -5375,70 +5392,74 @@ func TestAppendingDifferentEncodingToSameSeries(t *testing.T) { // Tests https://github.com/prometheus/prometheus/issues/9725. func TestChunkSnapshotReplayBug(t *testing.T) { - dir := t.TempDir() - wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, compression.Snappy) - require.NoError(t, err) + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + dir := t.TempDir() + wal, err := wlog.NewSize(nil, nil, filepath.Join(dir, "wal"), 32768, compression.Snappy) + require.NoError(t, err) - // Write few series records and samples such that the series references are not in order in the WAL - // for status_code="200". - var buf []byte - for i := 1; i <= 1000; i++ { - var ref chunks.HeadSeriesRef - if i <= 500 { - ref = chunks.HeadSeriesRef(i * 100) - } else { - ref = chunks.HeadSeriesRef((i - 500) * 50) - } - seriesRec := record.RefSeries{ - Ref: ref, - Labels: labels.FromStrings( - "__name__", "request_duration", - "status_code", "200", - "foo", fmt.Sprintf("baz%d", rand.Int()), - ), - } - // Add a sample so that the series is not garbage collected. - samplesRec := record.RefSample{Ref: ref, T: 1000, V: 1000} - var enc record.Encoder + // Write few series records and samples such that the series references are not in order in the WAL + // for status_code="200". + var buf []byte + for i := 1; i <= 1000; i++ { + var ref chunks.HeadSeriesRef + if i <= 500 { + ref = chunks.HeadSeriesRef(i * 100) + } else { + ref = chunks.HeadSeriesRef((i - 500) * 50) + } + seriesRec := record.RefSeries{ + Ref: ref, + Labels: labels.FromStrings( + "__name__", "request_duration", + "status_code", "200", + "foo", fmt.Sprintf("baz%d", rand.Int()), + ), + } + // Add a sample so that the series is not garbage collected. + samplesRec := record.RefSample{Ref: ref, T: 1000, V: 1000} + enc := record.Encoder{EnableSTStorage: enableStStorage} - rec := enc.Series([]record.RefSeries{seriesRec}, buf) - buf = rec[:0] - require.NoError(t, wal.Log(rec)) - rec = enc.Samples([]record.RefSample{samplesRec}, buf) - buf = rec[:0] - require.NoError(t, wal.Log(rec)) + rec := enc.Series([]record.RefSeries{seriesRec}, buf) + buf = rec[:0] + require.NoError(t, wal.Log(rec)) + rec = enc.Samples([]record.RefSample{samplesRec}, buf) + buf = rec[:0] + require.NoError(t, wal.Log(rec)) + } + + // Write a corrupt snapshot to fail the replay on startup. + snapshotName := chunkSnapshotDir(0, 100) + cpdir := filepath.Join(dir, snapshotName) + require.NoError(t, os.MkdirAll(cpdir, 0o777)) + + err = os.WriteFile(filepath.Join(cpdir, "00000000"), []byte{1, 5, 3, 5, 6, 7, 4, 2, 2}, 0o777) + require.NoError(t, err) + + opts := DefaultHeadOptions() + opts.ChunkDirRoot = dir + opts.EnableMemorySnapshotOnShutdown = true + head, err := NewHead(nil, nil, wal, nil, opts, nil) + require.NoError(t, err) + require.NoError(t, head.Init(math.MinInt64)) + defer func() { + require.NoError(t, head.Close()) + }() + + // Snapshot replay should error out. + require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + + // Querying `request_duration{status_code!="200"}` should return no series since all of + // them have status_code="200". + q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) + require.NoError(t, err) + series := query(t, q, + labels.MustNewMatcher(labels.MatchEqual, "__name__", "request_duration"), + labels.MustNewMatcher(labels.MatchNotEqual, "status_code", "200"), + ) + require.Empty(t, series, "there should be no series found") + }) } - - // Write a corrupt snapshot to fail the replay on startup. - snapshotName := chunkSnapshotDir(0, 100) - cpdir := filepath.Join(dir, snapshotName) - require.NoError(t, os.MkdirAll(cpdir, 0o777)) - - err = os.WriteFile(filepath.Join(cpdir, "00000000"), []byte{1, 5, 3, 5, 6, 7, 4, 2, 2}, 0o777) - require.NoError(t, err) - - opts := DefaultHeadOptions() - opts.ChunkDirRoot = dir - opts.EnableMemorySnapshotOnShutdown = true - head, err := NewHead(nil, nil, wal, nil, opts, nil) - require.NoError(t, err) - require.NoError(t, head.Init(math.MinInt64)) - defer func() { - require.NoError(t, head.Close()) - }() - - // Snapshot replay should error out. - require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) - - // Querying `request_duration{status_code!="200"}` should return no series since all of - // them have status_code="200". - q, err := NewBlockQuerier(head, math.MinInt64, math.MaxInt64) - require.NoError(t, err) - series := query(t, q, - labels.MustNewMatcher(labels.MatchEqual, "__name__", "request_duration"), - labels.MustNewMatcher(labels.MatchNotEqual, "status_code", "200"), - ) - require.Empty(t, series, "there should be no series found") } func TestChunkSnapshotTakenAfterIncompleteSnapshot(t *testing.T) { diff --git a/tsdb/head_wal.go b/tsdb/head_wal.go index b323f0dbf6..2c957d8c08 100644 --- a/tsdb/head_wal.go +++ b/tsdb/head_wal.go @@ -170,7 +170,7 @@ func (h *Head) loadWAL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch return } decoded <- series - case record.Samples: + case record.Samples, record.SamplesV2: samples := h.wlReplaySamplesPool.Get()[:0] samples, err = dec.Samples(r.Record(), samples) if err != nil { @@ -799,7 +799,7 @@ func (h *Head) loadWBL(r *wlog.Reader, syms *labels.SymbolTable, multiRef map[ch var err error rec := r.Record() switch dec.Type(rec) { - case record.Samples: + case record.Samples, record.SamplesV2: samples := h.wlReplaySamplesPool.Get()[:0] samples, err = dec.Samples(rec, samples) if err != nil { @@ -1401,7 +1401,7 @@ func (h *Head) ChunkSnapshot() (*ChunkSnapshotStats, error) { // Assuming 100 bytes (overestimate) per exemplar, that's ~1MB. maxExemplarsPerRecord := 10000 batch := make([]record.RefExemplar, 0, maxExemplarsPerRecord) - enc := record.Encoder{} + enc := record.Encoder{EnableSTStorage: h.opts.EnableSTStorage} flushExemplars := func() error { if len(batch) == 0 { return nil diff --git a/tsdb/record/bench_test.go b/tsdb/record/bench_test.go new file mode 100644 index 0000000000..f65cb34ff3 --- /dev/null +++ b/tsdb/record/bench_test.go @@ -0,0 +1,207 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package record_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/prometheus/prometheus/tsdb/compression" + "github.com/prometheus/prometheus/tsdb/record" + "github.com/prometheus/prometheus/util/testrecord" +) + +func zeroOutSTs(samples []record.RefSample) []record.RefSample { + out := make([]record.RefSample, len(samples)) + for i := range samples { + out[i] = samples[i] + out[i].ST = 0 + } + return out +} + +func TestEncodeDecode(t *testing.T) { + for _, enableStStorage := range []bool{false, true} { + for _, tcase := range []testrecord.RefSamplesCase{ + testrecord.Realistic1000Samples, + testrecord.Realistic1000WithVariableSTSamples, + testrecord.Realistic1000WithConstSTSamples, + testrecord.WorstCase1000, + testrecord.WorstCase1000WithSTSamples, + } { + var ( + dec record.Decoder + buf []byte + enc = record.Encoder{EnableSTStorage: enableStStorage} + ) + + s := testrecord.GenTestRefSamplesCase(t, tcase) + + { + got, err := dec.Samples(enc.Samples(s, nil), nil) + require.NoError(t, err) + // if ST is off, we expect all STs to be zero + expected := s + if !enableStStorage { + expected = zeroOutSTs(s) + } + + require.Equal(t, expected, got) + } + + // With byte buffer (append!) + { + buf = make([]byte, 10, 1e5) + got, err := dec.Samples(enc.Samples(s, buf)[10:], nil) + require.NoError(t, err) + + expected := s + if !enableStStorage { + expected = zeroOutSTs(s) + } + require.Equal(t, expected, got) + } + + // With sample slice + { + samples := make([]record.RefSample, 0, len(s)+1) + got, err := dec.Samples(enc.Samples(s, nil), samples) + require.NoError(t, err) + expected := s + if !enableStStorage { + expected = zeroOutSTs(s) + } + require.Equal(t, expected, got) + } + + // With compression. + { + buf := enc.Samples(s, nil) + + cEnc, err := compression.NewEncoder() + require.NoError(t, err) + buf, _, err = cEnc.Encode(compression.Zstd, buf, nil) + require.NoError(t, err) + + buf, err = compression.NewDecoder().Decode(compression.Zstd, buf, nil) + require.NoError(t, err) + + got, err := dec.Samples(buf, nil) + require.NoError(t, err) + expected := s + if !enableStStorage { + expected = zeroOutSTs(s) + } + require.Equal(t, expected, got) + } + } + } +} + +var ( + compressions = []compression.Type{compression.None, compression.Snappy, compression.Zstd} + dataCases = []testrecord.RefSamplesCase{ + testrecord.Realistic1000Samples, + testrecord.Realistic1000WithVariableSTSamples, + testrecord.Realistic1000WithConstSTSamples, + testrecord.WorstCase1000, + testrecord.WorstCase1000WithSTSamples, + } + UseV2 = true +) + +/* + export bench=encode-v2 && go test ./tsdb/record/... \ + -run '^$' -bench '^BenchmarkEncode_Samples' \ + -benchtime 5s -count 6 -cpu 2 -timeout 999m \ + | tee ${bench}.txt +*/ +func BenchmarkEncode_Samples(b *testing.B) { + for _, compr := range compressions { + for _, data := range dataCases { + b.Run(fmt.Sprintf("compr=%v/data=%v", compr, data), func(b *testing.B) { + var ( + samples = testrecord.GenTestRefSamplesCase(b, data) + enc = record.Encoder{EnableSTStorage: UseV2} + buf []byte + cBuf []byte + ) + + cEnc, err := compression.NewEncoder() + require.NoError(b, err) + + // Warm up. + buf = enc.Samples(samples, buf[:0]) + cBuf, _, err = cEnc.Encode(compr, buf, cBuf[:0]) + require.NoError(b, err) + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + buf = enc.Samples(samples, buf[:0]) + b.ReportMetric(float64(len(buf)), "B/rec") + + cBuf, _, _ = cEnc.Encode(compr, buf, cBuf[:0]) + b.ReportMetric(float64(len(cBuf)), "B/compressed-rec") + } + }) + } + } +} + +/* + export bench=decode-v2 && go test ./tsdb/record/... \ + -run '^$' -bench '^BenchmarkDecode_Samples' \ + -benchtime 5s -count 6 -cpu 2 -timeout 999m \ + | tee ${bench}.txt +*/ +func BenchmarkDecode_Samples(b *testing.B) { + for _, compr := range compressions { + for _, data := range dataCases { + b.Run(fmt.Sprintf("compr=%v/data=%v", compr, data), func(b *testing.B) { + var ( + samples = testrecord.GenTestRefSamplesCase(b, data) + enc = record.Encoder{EnableSTStorage: UseV2} + dec record.Decoder + cDec = compression.NewDecoder() + cBuf []byte + samplesBuf []record.RefSample + ) + + buf := enc.Samples(samples, nil) + + cEnc, err := compression.NewEncoder() + require.NoError(b, err) + + buf, _, err = cEnc.Encode(compr, buf, nil) + require.NoError(b, err) + + // Warm up. + cBuf, err = cDec.Decode(compr, buf, cBuf[:0]) + require.NoError(b, err) + samplesBuf, err = dec.Samples(cBuf, samplesBuf[:0]) + require.NoError(b, err) + + b.ReportAllocs() + b.ResetTimer() + for b.Loop() { + cBuf, _ = cDec.Decode(compr, buf, cBuf[:0]) + samplesBuf, _ = dec.Samples(cBuf, samplesBuf[:0]) + } + }) + } + } +} diff --git a/tsdb/record/record.go b/tsdb/record/record.go index 106b8e51bc..d03c8c62d3 100644 --- a/tsdb/record/record.go +++ b/tsdb/record/record.go @@ -58,6 +58,8 @@ const ( CustomBucketsHistogramSamples Type = 9 // CustomBucketsFloatHistogramSamples is used to match WAL records of type Float Histogram with custom buckets. CustomBucketsFloatHistogramSamples Type = 10 + // SamplesV2 is an enhanced sample record with an encoding scheme that allows storing float samples with timestamp and an optional ST per sample. + SamplesV2 Type = 11 ) func (rt Type) String() string { @@ -66,6 +68,8 @@ func (rt Type) String() string { return "series" case Samples: return "samples" + case SamplesV2: + return "samples-v2" case Tombstones: return "tombstones" case Exemplars: @@ -157,12 +161,12 @@ type RefSeries struct { Labels labels.Labels } -// RefSample is a timestamp/value pair associated with a reference to a series. +// RefSample is a timestamp/st/value struct associated with a reference to a series. // TODO(beorn7): Perhaps make this "polymorphic", including histogram and float-histogram pointers? Then get rid of RefHistogramSample. type RefSample struct { - Ref chunks.HeadSeriesRef - T int64 - V float64 + Ref chunks.HeadSeriesRef + ST, T int64 + V float64 } // RefMetadata is the metadata associated with a series ID. @@ -182,6 +186,7 @@ type RefExemplar struct { } // RefHistogramSample is a histogram. +// TODO(owilliams): Add support for ST. type RefHistogramSample struct { Ref chunks.HeadSeriesRef T int64 @@ -189,6 +194,7 @@ type RefHistogramSample struct { } // RefFloatHistogramSample is a float histogram. +// TODO(owilliams): Add support for ST. type RefFloatHistogramSample struct { Ref chunks.HeadSeriesRef T int64 @@ -220,7 +226,7 @@ func (*Decoder) Type(rec []byte) Type { return Unknown } switch t := Type(rec[0]); t { - case Series, Samples, Tombstones, Exemplars, MmapMarkers, Metadata, HistogramSamples, FloatHistogramSamples, CustomBucketsHistogramSamples, CustomBucketsFloatHistogramSamples: + case Series, Samples, SamplesV2, Tombstones, Exemplars, MmapMarkers, Metadata, HistogramSamples, FloatHistogramSamples, CustomBucketsHistogramSamples, CustomBucketsFloatHistogramSamples: return t } return Unknown @@ -311,12 +317,20 @@ func (d *Decoder) DecodeLabels(dec *encoding.Decbuf) labels.Labels { } // Samples appends samples in rec to the given slice. -func (*Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) { +func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) { dec := encoding.Decbuf{B: rec} - - if Type(dec.Byte()) != Samples { - return nil, errors.New("invalid record type") + switch typ := dec.Byte(); Type(typ) { + case Samples: + return d.samplesV1(&dec, samples) + case SamplesV2: + return d.samplesV2(&dec, samples) + default: + return nil, fmt.Errorf("invalid record type %v, expected Samples(2) or SamplesV2(11)", typ) } +} + +// samplesV1 appends samples in rec to the given slice, while ignoring ST information. +func (*Decoder) samplesV1(dec *encoding.Decbuf, samples []RefSample) ([]RefSample, error) { if dec.Len() == 0 { return samples, nil } @@ -349,6 +363,60 @@ func (*Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) { return samples, nil } +// samplesV2 appends samples in rec to the given slice using the V2 algorithm, +// which is more efficient and supports ST (See Encoder.samplesV2 definition). +func (*Decoder) samplesV2(dec *encoding.Decbuf, samples []RefSample) ([]RefSample, error) { + if dec.Len() == 0 { + return samples, nil + } + // Allow 1 byte for each varint and 8 for the value; the output slice must be at least that big. + if minSize := dec.Len() / (1 + 1 + 8); cap(samples) < minSize { + samples = make([]RefSample, 0, minSize) + } + var firstT, firstST int64 + for len(dec.B) > 0 && dec.Err() == nil { + var prev RefSample + var ref, t, ST int64 + var val uint64 + + if len(samples) == 0 { + ref = dec.Varint64() + firstT = dec.Varint64() + t = firstT + ST = dec.Varint64() + firstST = ST + } else { + prev = samples[len(samples)-1] + ref = int64(prev.Ref) + dec.Varint64() + t = firstT + dec.Varint64() + stMarker := dec.Byte() + switch stMarker { + case noST: + case sameST: + ST = prev.ST + default: + ST = firstST + dec.Varint64() + } + } + + val = dec.Be64() + samples = append(samples, RefSample{ + Ref: chunks.HeadSeriesRef(ref), + ST: ST, + T: t, + V: math.Float64frombits(val), + }) + } + + if dec.Err() != nil { + return nil, fmt.Errorf("decode error after %d samples: %w", len(samples), dec.Err()) + } + if len(dec.B) > 0 { + return nil, fmt.Errorf("unexpected %d bytes left in entry", len(dec.B)) + } + return samples, nil +} + // Tombstones appends tombstones in rec to the given slice. func (*Decoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) { dec := encoding.Decbuf{B: rec} @@ -656,7 +724,11 @@ func DecodeFloatHistogram(buf *encoding.Decbuf, fh *histogram.FloatHistogram) { // Encoder encodes series, sample, and tombstones records. // The zero value is ready to use. -type Encoder struct{} +type Encoder struct { + // EnableSTStorage enables the SamplesV2 encoding, which is more efficient + // than V1 and supports start time per sample. + EnableSTStorage bool +} // Series appends the encoded series to b and returns the resulting slice. func (*Encoder) Series(series []RefSeries, b []byte) []byte { @@ -702,7 +774,16 @@ func EncodeLabels(buf *encoding.Encbuf, lbls labels.Labels) { } // Samples appends the encoded samples to b and returns the resulting slice. -func (*Encoder) Samples(samples []RefSample, b []byte) []byte { +// Depending on the ST existence it either writes Samples or SamplesWithST record. +func (e *Encoder) Samples(samples []RefSample, b []byte) []byte { + if e.EnableSTStorage { + return e.samplesV2(samples, b) + } + return e.samplesV1(samples, b) +} + +// Samples appends the encoded samples to b and returns the resulting slice. +func (*Encoder) samplesV1(samples []RefSample, b []byte) []byte { buf := encoding.Encbuf{B: b} buf.PutByte(byte(Samples)) @@ -725,6 +806,56 @@ func (*Encoder) Samples(samples []RefSample, b []byte) []byte { return buf.Get() } +const ( + // Start timestamp marker values for indicating trivial cases. + + noST byte = iota // Sample has no start time. + sameST // Sample timestamp exists and is the same as the start time of the previous series. + explicitST // Explicit start timestamp value, delta to first start time. +) + +// samplesV2 appends the encoded samples to b and returns the resulting slice +// using a more efficient per-sample delta encoding and allows for ST +// storage. +func (*Encoder) samplesV2(samples []RefSample, b []byte) []byte { + buf := encoding.Encbuf{B: b} + buf.PutByte(byte(SamplesV2)) + + if len(samples) == 0 { + return buf.Get() + } + + // Store first ref, timestamp, ST, and value. + first := samples[0] + buf.PutVarint64(int64(first.Ref)) + buf.PutVarint64(first.T) + buf.PutVarint64(first.ST) + buf.PutBE64(math.Float64bits(first.V)) + + // Subsequent values are delta to the immediate previous values, and in the + // case of start timestamp, use the marker byte to indicate what the value should + // be if it's one of the trivial cases. + for i := 1; i < len(samples); i++ { + s := samples[i] + prev := samples[i-1] + + buf.PutVarint64(int64(s.Ref) - int64(prev.Ref)) + buf.PutVarint64(s.T - first.T) + + switch s.ST { + case 0: + buf.PutByte(noST) + case prev.ST: + buf.PutByte(sameST) + default: + buf.PutByte(explicitST) + buf.PutVarint64(s.ST - first.ST) + } + buf.PutBE64(math.Float64bits(s.V)) + } + return buf.Get() +} + // Tombstones appends the encoded tombstones to b and returns the resulting slice. func (*Encoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte { buf := encoding.Encbuf{B: b} diff --git a/tsdb/record/record_test.go b/tsdb/record/record_test.go index 8ebd805d4d..5f401ebd9d 100644 --- a/tsdb/record/record_test.go +++ b/tsdb/record/record_test.go @@ -76,15 +76,63 @@ func TestRecord_EncodeDecode(t *testing.T) { require.NoError(t, err) require.Equal(t, metadata, decMetadata) + // Without ST. samples := []RefSample{ {Ref: 0, T: 12423423, V: 1.2345}, {Ref: 123, T: -1231, V: -123}, {Ref: 2, T: 0, V: 99999}, } - decSamples, err := dec.Samples(enc.Samples(samples, nil), nil) + encoded := enc.Samples(samples, nil) + require.Equal(t, Samples, dec.Type(encoded)) + decSamples, err := dec.Samples(encoded, nil) require.NoError(t, err) require.Equal(t, samples, decSamples) + enc = Encoder{EnableSTStorage: true} + // Without ST again, but with V1 encoder that enables SamplesV2 + samples = []RefSample{ + {Ref: 0, T: 12423423, V: 1.2345}, + {Ref: 123, T: -1231, V: -123}, + {Ref: 2, T: 0, V: 99999}, + } + encoded = enc.Samples(samples, nil) + require.Equal(t, SamplesV2, dec.Type(encoded)) + decSamples, err = dec.Samples(encoded, nil) + require.NoError(t, err) + require.Equal(t, samples, decSamples) + + // With ST. + samplesWithST := []RefSample{ + {Ref: 0, T: 12423423, ST: 14, V: 1.2345}, + {Ref: 123, T: -1231, ST: 14, V: -123}, + {Ref: 2, T: 0, ST: 14, V: 99999}, + } + encoded = enc.Samples(samplesWithST, nil) + require.Equal(t, SamplesV2, dec.Type(encoded)) + decSamples, err = dec.Samples(encoded, nil) + require.NoError(t, err) + require.Equal(t, samplesWithST, decSamples) + + // With ST (ST[i] == T[i-1]) + samplesWithSTDelta := []RefSample{ + {Ref: 0, T: 12423400, ST: 12423300, V: 1.2345}, + {Ref: 123, T: 12423500, ST: 12423400, V: -123}, + {Ref: 2, T: 12423600, ST: 12423500, V: 99999}, + } + decSamples, err = dec.Samples(enc.Samples(samplesWithSTDelta, nil), nil) + require.NoError(t, err) + require.Equal(t, samplesWithSTDelta, decSamples) + + // With ST (ST[i] == ST[i-1]) + samplesWithConstST := []RefSample{ + {Ref: 0, T: 12423400, ST: 12423300, V: 1.2345}, + {Ref: 123, T: 12423500, ST: 12423300, V: -123}, + {Ref: 2, T: 12423600, ST: 12423300, V: 99999}, + } + decSamples, err = dec.Samples(enc.Samples(samplesWithConstST, nil), nil) + require.NoError(t, err) + require.Equal(t, samplesWithConstST, decSamples) + // Intervals get split up into single entries. So we don't get back exactly // what we put in. tstones := []tombstones.Stone{ @@ -227,252 +275,262 @@ func TestRecord_EncodeDecode(t *testing.T) { } func TestRecord_DecodeInvalidHistogramSchema(t *testing.T) { - for _, schema := range []int32{-100, 100} { - t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) { - var enc Encoder + for _, enableStStorage := range []bool{false, true} { + for _, schema := range []int32{-100, 100} { + t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableStStorage), func(t *testing.T) { + enc := Encoder{EnableSTStorage: enableStStorage} - var output bytes.Buffer - logger := promslog.New(&promslog.Config{Writer: &output}) - dec := NewDecoder(labels.NewSymbolTable(), logger) - histograms := []RefHistogramSample{ - { - Ref: 56, - T: 1234, - H: &histogram.Histogram{ - Count: 5, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 18.4 * rand.Float64(), - Schema: schema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) + dec := NewDecoder(labels.NewSymbolTable(), logger) + histograms := []RefHistogramSample{ + { + Ref: 56, + T: 1234, + H: &histogram.Histogram{ + Count: 5, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: schema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, }, - PositiveBuckets: []int64{1, 1, -1, 0}, }, - }, - } - histSamples, _ := enc.HistogramSamples(histograms, nil) - decHistograms, err := dec.HistogramSamples(histSamples, nil) - require.NoError(t, err) - require.Empty(t, decHistograms) - require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record") - }) + } + histSamples, _ := enc.HistogramSamples(histograms, nil) + decHistograms, err := dec.HistogramSamples(histSamples, nil) + require.NoError(t, err) + require.Empty(t, decHistograms) + require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record") + }) + } } } func TestRecord_DecodeInvalidFloatHistogramSchema(t *testing.T) { - for _, schema := range []int32{-100, 100} { - t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) { - var enc Encoder + for _, enableStStorage := range []bool{false, true} { + for _, schema := range []int32{-100, 100} { + t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableStStorage), func(t *testing.T) { + enc := Encoder{EnableSTStorage: enableStStorage} - var output bytes.Buffer - logger := promslog.New(&promslog.Config{Writer: &output}) - dec := NewDecoder(labels.NewSymbolTable(), logger) - histograms := []RefFloatHistogramSample{ - { - Ref: 56, - T: 1234, - FH: &histogram.FloatHistogram{ - Count: 5, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 18.4 * rand.Float64(), - Schema: schema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) + dec := NewDecoder(labels.NewSymbolTable(), logger) + histograms := []RefFloatHistogramSample{ + { + Ref: 56, + T: 1234, + FH: &histogram.FloatHistogram{ + Count: 5, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: schema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []float64{1, 1, -1, 0}, }, - PositiveBuckets: []float64{1, 1, -1, 0}, }, - }, - } - histSamples, _ := enc.FloatHistogramSamples(histograms, nil) - decHistograms, err := dec.FloatHistogramSamples(histSamples, nil) - require.NoError(t, err) - require.Empty(t, decHistograms) - require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record") - }) + } + histSamples, _ := enc.FloatHistogramSamples(histograms, nil) + decHistograms, err := dec.FloatHistogramSamples(histSamples, nil) + require.NoError(t, err) + require.Empty(t, decHistograms) + require.Contains(t, output.String(), "skipping histogram with unknown schema in WAL record") + }) + } } } func TestRecord_DecodeTooHighResolutionHistogramSchema(t *testing.T) { - for _, schema := range []int32{9, 52} { - t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) { - var enc Encoder + for _, enableStStorage := range []bool{false, true} { + for _, schema := range []int32{9, 52} { + t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableStStorage), func(t *testing.T) { + enc := Encoder{EnableSTStorage: enableStStorage} - var output bytes.Buffer - logger := promslog.New(&promslog.Config{Writer: &output}) - dec := NewDecoder(labels.NewSymbolTable(), logger) - histograms := []RefHistogramSample{ - { - Ref: 56, - T: 1234, - H: &histogram.Histogram{ - Count: 5, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 18.4 * rand.Float64(), - Schema: schema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) + dec := NewDecoder(labels.NewSymbolTable(), logger) + histograms := []RefHistogramSample{ + { + Ref: 56, + T: 1234, + H: &histogram.Histogram{ + Count: 5, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: schema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, }, - PositiveBuckets: []int64{1, 1, -1, 0}, }, - }, - } - histSamples, _ := enc.HistogramSamples(histograms, nil) - decHistograms, err := dec.HistogramSamples(histSamples, nil) - require.NoError(t, err) - require.Len(t, decHistograms, 1) - require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].H.Schema) - }) + } + histSamples, _ := enc.HistogramSamples(histograms, nil) + decHistograms, err := dec.HistogramSamples(histSamples, nil) + require.NoError(t, err) + require.Len(t, decHistograms, 1) + require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].H.Schema) + }) + } } } func TestRecord_DecodeTooHighResolutionFloatHistogramSchema(t *testing.T) { - for _, schema := range []int32{9, 52} { - t.Run(fmt.Sprintf("schema=%d", schema), func(t *testing.T) { - var enc Encoder + for _, enableStStorage := range []bool{false, true} { + for _, schema := range []int32{9, 52} { + t.Run(fmt.Sprintf("schema=%d,stStorage=%v", schema, enableStStorage), func(t *testing.T) { + enc := Encoder{EnableSTStorage: enableStStorage} - var output bytes.Buffer - logger := promslog.New(&promslog.Config{Writer: &output}) - dec := NewDecoder(labels.NewSymbolTable(), logger) - histograms := []RefFloatHistogramSample{ - { - Ref: 56, - T: 1234, - FH: &histogram.FloatHistogram{ - Count: 5, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 18.4 * rand.Float64(), - Schema: schema, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, + var output bytes.Buffer + logger := promslog.New(&promslog.Config{Writer: &output}) + dec := NewDecoder(labels.NewSymbolTable(), logger) + histograms := []RefFloatHistogramSample{ + { + Ref: 56, + T: 1234, + FH: &histogram.FloatHistogram{ + Count: 5, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: schema, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []float64{1, 1, -1, 0}, }, - PositiveBuckets: []float64{1, 1, -1, 0}, }, - }, - } - histSamples, _ := enc.FloatHistogramSamples(histograms, nil) - decHistograms, err := dec.FloatHistogramSamples(histSamples, nil) - require.NoError(t, err) - require.Len(t, decHistograms, 1) - require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].FH.Schema) - }) + } + histSamples, _ := enc.FloatHistogramSamples(histograms, nil) + decHistograms, err := dec.FloatHistogramSamples(histSamples, nil) + require.NoError(t, err) + require.Len(t, decHistograms, 1) + require.Equal(t, histogram.ExponentialSchemaMax, decHistograms[0].FH.Schema) + }) + } } } // TestRecord_Corrupted ensures that corrupted records return the correct error. // Bugfix check for pull/521 and pull/523. func TestRecord_Corrupted(t *testing.T) { - var enc Encoder - dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) + for _, enableStStorage := range []bool{false, true} { + enc := Encoder{EnableSTStorage: enableStStorage} + dec := NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) - t.Run("Test corrupted series record", func(t *testing.T) { - series := []RefSeries{ - { - Ref: 100, - Labels: labels.FromStrings("abc", "def", "123", "456"), - }, - } - - corrupted := enc.Series(series, nil)[:8] - _, err := dec.Series(corrupted, nil) - require.Equal(t, err, encoding.ErrInvalidSize) - }) - - t.Run("Test corrupted sample record", func(t *testing.T) { - samples := []RefSample{ - {Ref: 0, T: 12423423, V: 1.2345}, - } - - corrupted := enc.Samples(samples, nil)[:8] - _, err := dec.Samples(corrupted, nil) - require.ErrorIs(t, err, encoding.ErrInvalidSize) - }) - - t.Run("Test corrupted tombstone record", func(t *testing.T) { - tstones := []tombstones.Stone{ - {Ref: 123, Intervals: tombstones.Intervals{ - {Mint: -1000, Maxt: 1231231}, - {Mint: 5000, Maxt: 0}, - }}, - } - - corrupted := enc.Tombstones(tstones, nil)[:8] - _, err := dec.Tombstones(corrupted, nil) - require.Equal(t, err, encoding.ErrInvalidSize) - }) - - t.Run("Test corrupted exemplar record", func(t *testing.T) { - exemplars := []RefExemplar{ - {Ref: 0, T: 12423423, V: 1.2345, Labels: labels.FromStrings("trace_id", "asdf")}, - } - - corrupted := enc.Exemplars(exemplars, nil)[:8] - _, err := dec.Exemplars(corrupted, nil) - require.ErrorIs(t, err, encoding.ErrInvalidSize) - }) - - t.Run("Test corrupted metadata record", func(t *testing.T) { - meta := []RefMetadata{ - {Ref: 147, Type: uint8(Counter), Unit: "unit", Help: "help"}, - } - - corrupted := enc.Metadata(meta, nil)[:8] - _, err := dec.Metadata(corrupted, nil) - require.ErrorIs(t, err, encoding.ErrInvalidSize) - }) - - t.Run("Test corrupted histogram record", func(t *testing.T) { - histograms := []RefHistogramSample{ - { - Ref: 56, - T: 1234, - H: &histogram.Histogram{ - Count: 5, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 18.4 * rand.Float64(), - Schema: 1, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{1, 1, -1, 0}, + t.Run("Test corrupted series record", func(t *testing.T) { + series := []RefSeries{ + { + Ref: 100, + Labels: labels.FromStrings("abc", "def", "123", "456"), }, - }, - { - Ref: 67, - T: 5678, - H: &histogram.Histogram{ - Count: 8, - ZeroThreshold: 0.001, - Sum: 35.5, - Schema: -53, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 2, Length: 2}, - }, - PositiveBuckets: []int64{2, -1, 2, 0}, - CustomValues: []float64{0, 2, 4, 6, 8}, - }, - }, - } + } - corruptedHists, customBucketsHists := enc.HistogramSamples(histograms, nil) - corruptedHists = corruptedHists[:8] - corruptedCustomBucketsHists := enc.CustomBucketsHistogramSamples(customBucketsHists, nil) - corruptedCustomBucketsHists = corruptedCustomBucketsHists[:8] - _, err := dec.HistogramSamples(corruptedHists, nil) - require.ErrorIs(t, err, encoding.ErrInvalidSize) - _, err = dec.HistogramSamples(corruptedCustomBucketsHists, nil) - require.ErrorIs(t, err, encoding.ErrInvalidSize) - }) + corrupted := enc.Series(series, nil)[:8] + _, err := dec.Series(corrupted, nil) + require.Equal(t, err, encoding.ErrInvalidSize) + }) + + t.Run("Test corrupted sample record", func(t *testing.T) { + samples := []RefSample{ + {Ref: 0, T: 12423423, V: 1.2345}, + } + + corrupted := enc.Samples(samples, nil)[:8] + _, err := dec.Samples(corrupted, nil) + require.ErrorIs(t, err, encoding.ErrInvalidSize) + }) + + t.Run("Test corrupted tombstone record", func(t *testing.T) { + tstones := []tombstones.Stone{ + {Ref: 123, Intervals: tombstones.Intervals{ + {Mint: -1000, Maxt: 1231231}, + {Mint: 5000, Maxt: 0}, + }}, + } + + corrupted := enc.Tombstones(tstones, nil)[:8] + _, err := dec.Tombstones(corrupted, nil) + require.Equal(t, err, encoding.ErrInvalidSize) + }) + + t.Run("Test corrupted exemplar record", func(t *testing.T) { + exemplars := []RefExemplar{ + {Ref: 0, T: 12423423, V: 1.2345, Labels: labels.FromStrings("trace_id", "asdf")}, + } + + corrupted := enc.Exemplars(exemplars, nil)[:8] + _, err := dec.Exemplars(corrupted, nil) + require.ErrorIs(t, err, encoding.ErrInvalidSize) + }) + + t.Run("Test corrupted metadata record", func(t *testing.T) { + meta := []RefMetadata{ + {Ref: 147, Type: uint8(Counter), Unit: "unit", Help: "help"}, + } + + corrupted := enc.Metadata(meta, nil)[:8] + _, err := dec.Metadata(corrupted, nil) + require.ErrorIs(t, err, encoding.ErrInvalidSize) + }) + + t.Run("Test corrupted histogram record", func(t *testing.T) { + histograms := []RefHistogramSample{ + { + Ref: 56, + T: 1234, + H: &histogram.Histogram{ + Count: 5, + ZeroCount: 2, + ZeroThreshold: 0.001, + Sum: 18.4 * rand.Float64(), + Schema: 1, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 1, Length: 2}, + }, + PositiveBuckets: []int64{1, 1, -1, 0}, + }, + }, + { + Ref: 67, + T: 5678, + H: &histogram.Histogram{ + Count: 8, + ZeroThreshold: 0.001, + Sum: 35.5, + Schema: -53, + PositiveSpans: []histogram.Span{ + {Offset: 0, Length: 2}, + {Offset: 2, Length: 2}, + }, + PositiveBuckets: []int64{2, -1, 2, 0}, + CustomValues: []float64{0, 2, 4, 6, 8}, + }, + }, + } + + corruptedHists, customBucketsHists := enc.HistogramSamples(histograms, nil) + corruptedHists = corruptedHists[:8] + corruptedCustomBucketsHists := enc.CustomBucketsHistogramSamples(customBucketsHists, nil) + corruptedCustomBucketsHists = corruptedCustomBucketsHists[:8] + _, err := dec.HistogramSamples(corruptedHists, nil) + require.ErrorIs(t, err, encoding.ErrInvalidSize) + _, err = dec.HistogramSamples(corruptedCustomBucketsHists, nil) + require.ErrorIs(t, err, encoding.ErrInvalidSize) + }) + } } func TestRecord_Type(t *testing.T) { @@ -487,6 +545,16 @@ func TestRecord_Type(t *testing.T) { recordType = dec.Type(enc.Samples(samples, nil)) require.Equal(t, Samples, recordType) + // With EnableSTStorage set, all Samples are V2 + enc = Encoder{EnableSTStorage: true} + samples = []RefSample{{Ref: 123, T: 12345, V: 1.2345}} + recordType = dec.Type(enc.Samples(samples, nil)) + require.Equal(t, SamplesV2, recordType) + + samplesST := []RefSample{{Ref: 123, ST: 1, T: 12345, V: 1.2345}} + recordType = dec.Type(enc.Samples(samplesST, nil)) + require.Equal(t, SamplesV2, recordType) + tstones := []tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{{Mint: 1, Maxt: 2}}}} recordType = dec.Type(enc.Tombstones(tstones, nil)) require.Equal(t, Tombstones, recordType) @@ -716,24 +784,26 @@ func BenchmarkWAL_HistogramEncoding(b *testing.B) { make: initNHCBRefs, }, } { - for _, labelCount := range []int{0, 10, 50} { - for _, histograms := range []int{10, 100, 1000} { - for _, buckets := range []int{0, 1, 10, 100} { - b.Run(fmt.Sprintf("type=%s/labels=%d/histograms=%d/buckets=%d", maker.name, labelCount, histograms, buckets), func(b *testing.B) { - series, samples, nhcbs := maker.make(labelCount, histograms, buckets) - enc := Encoder{} - for b.Loop() { - var buf []byte - enc.Series(series, buf) - enc.Samples(samples, buf) - var leftOver []RefHistogramSample - _, leftOver = enc.HistogramSamples(nhcbs, buf) - if len(leftOver) > 0 { - enc.CustomBucketsHistogramSamples(leftOver, buf) + for _, enableStStorage := range []bool{false, true} { + for _, labelCount := range []int{0, 10, 50} { + for _, histograms := range []int{10, 100, 1000} { + for _, buckets := range []int{0, 1, 10, 100} { + b.Run(fmt.Sprintf("type=%s/labels=%d/histograms=%d/buckets=%d", maker.name, labelCount, histograms, buckets), func(b *testing.B) { + series, samples, nhcbs := maker.make(labelCount, histograms, buckets) + enc := Encoder{EnableSTStorage: enableStStorage} + for b.Loop() { + var buf []byte + enc.Series(series, buf) + enc.Samples(samples, buf) + var leftOver []RefHistogramSample + _, leftOver = enc.HistogramSamples(nhcbs, buf) + if len(leftOver) > 0 { + enc.CustomBucketsHistogramSamples(leftOver, buf) + } + b.ReportMetric(float64(len(buf)), "recordBytes/ops") } - b.ReportMetric(float64(len(buf)), "recordBytes/ops") - } - }) + }) + } } } } diff --git a/tsdb/wlog/checkpoint.go b/tsdb/wlog/checkpoint.go index 6742141fbc..86a858e70a 100644 --- a/tsdb/wlog/checkpoint.go +++ b/tsdb/wlog/checkpoint.go @@ -92,7 +92,7 @@ const CheckpointPrefix = "checkpoint." // segmented format as the original WAL itself. // This makes it easy to read it through the WAL package and concatenate // it with the original WAL. -func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64) (*CheckpointStats, error) { +func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.HeadSeriesRef) bool, mint int64, enableStStorage bool) (*CheckpointStats, error) { stats := &CheckpointStats{} var sgmReader io.ReadCloser @@ -156,7 +156,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He metadata []record.RefMetadata st = labels.NewSymbolTable() // Needed for decoding; labels do not outlive this function. dec = record.NewDecoder(st, logger) - enc record.Encoder + enc = record.Encoder{EnableSTStorage: enableStStorage} buf []byte recs [][]byte @@ -190,7 +190,7 @@ func Checkpoint(logger *slog.Logger, w *WL, from, to int, keep func(id chunks.He stats.TotalSeries += len(series) stats.DroppedSeries += len(series) - len(repl) - case record.Samples: + case record.Samples, record.SamplesV2: samples, err = dec.Samples(rec, samples) if err != nil { return nil, fmt.Errorf("decode samples: %w", err) diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index 97ca2e768d..18a2c2d3dc 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -171,249 +171,255 @@ func TestCheckpoint(t *testing.T) { } } - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() - var enc record.Encoder - // Create a dummy segment to bump the initial number. - seg, err := CreateSegment(dir, 100) - require.NoError(t, err) - require.NoError(t, seg.Close()) - - // Manually create checkpoint for 99 and earlier. - w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress) - require.NoError(t, err) - - // Add some data we expect to be around later. - err = w.Log(enc.Series([]record.RefSeries{ - {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")}, - {Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")}, - }, nil)) - require.NoError(t, err) - // Log an unknown record, that might have come from a future Prometheus version. - require.NoError(t, w.Log([]byte{255})) - require.NoError(t, w.Close()) - - // Start a WAL and write records to it as usual. - w, err = NewSize(nil, nil, dir, 128*1024, compress) - require.NoError(t, err) - - samplesInWAL, histogramsInWAL, floatHistogramsInWAL := 0, 0, 0 - var last int64 - for i := 0; ; i++ { - _, n, err := Segments(w.Dir()) + enc := record.Encoder{EnableSTStorage: enableStStorage} + // Create a dummy segment to bump the initial number. + seg, err := CreateSegment(dir, 100) require.NoError(t, err) - if n >= 106 { - break - } - // Write some series initially. - if i == 0 { - b := enc.Series([]record.RefSeries{ - {Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")}, - {Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")}, - {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, - {Ref: 5, Labels: labels.FromStrings("a", "b", "c", "5")}, + require.NoError(t, seg.Close()) + + // Manually create checkpoint for 99 and earlier. + w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress) + require.NoError(t, err) + + // Add some data we expect to be around later. + err = w.Log(enc.Series([]record.RefSeries{ + {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")}, + {Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")}, + }, nil)) + require.NoError(t, err) + // Log an unknown record, that might have come from a future Prometheus version. + require.NoError(t, w.Log([]byte{255})) + require.NoError(t, w.Close()) + + // Start a WAL and write records to it as usual. + w, err = NewSize(nil, nil, dir, 128*1024, compress) + require.NoError(t, err) + + samplesInWAL, histogramsInWAL, floatHistogramsInWAL := 0, 0, 0 + var last int64 + for i := 0; ; i++ { + _, n, err := Segments(w.Dir()) + require.NoError(t, err) + if n >= 106 { + break + } + // Write some series initially. + if i == 0 { + b := enc.Series([]record.RefSeries{ + {Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")}, + {Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")}, + {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, + {Ref: 5, Labels: labels.FromStrings("a", "b", "c", "5")}, + }, nil) + require.NoError(t, w.Log(b)) + + b = enc.Metadata([]record.RefMetadata{ + {Ref: 2, Unit: "unit", Help: "help"}, + {Ref: 3, Unit: "unit", Help: "help"}, + {Ref: 4, Unit: "unit", Help: "help"}, + {Ref: 5, Unit: "unit", Help: "help"}, + }, nil) + require.NoError(t, w.Log(b)) + } + // Write samples until the WAL has enough segments. + // Make them have drifting timestamps within a record to see that they + // get filtered properly. + b := enc.Samples([]record.RefSample{ + {Ref: 0, T: last, V: float64(i)}, + {Ref: 1, T: last + 10000, V: float64(i)}, + {Ref: 2, T: last + 20000, V: float64(i)}, + {Ref: 3, T: last + 30000, V: float64(i)}, + }, nil) + require.NoError(t, w.Log(b)) + samplesInWAL += 4 + h := makeHistogram(i) + b, _ = enc.HistogramSamples([]record.RefHistogramSample{ + {Ref: 0, T: last, H: h}, + {Ref: 1, T: last + 10000, H: h}, + {Ref: 2, T: last + 20000, H: h}, + {Ref: 3, T: last + 30000, H: h}, + }, nil) + require.NoError(t, w.Log(b)) + histogramsInWAL += 4 + cbh := makeCustomBucketHistogram(i) + b = enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{ + {Ref: 0, T: last, H: cbh}, + {Ref: 1, T: last + 10000, H: cbh}, + {Ref: 2, T: last + 20000, H: cbh}, + {Ref: 3, T: last + 30000, H: cbh}, + }, nil) + require.NoError(t, w.Log(b)) + histogramsInWAL += 4 + fh := makeFloatHistogram(i) + b, _ = enc.FloatHistogramSamples([]record.RefFloatHistogramSample{ + {Ref: 0, T: last, FH: fh}, + {Ref: 1, T: last + 10000, FH: fh}, + {Ref: 2, T: last + 20000, FH: fh}, + {Ref: 3, T: last + 30000, FH: fh}, + }, nil) + require.NoError(t, w.Log(b)) + floatHistogramsInWAL += 4 + cbfh := makeCustomBucketFloatHistogram(i) + b = enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{ + {Ref: 0, T: last, FH: cbfh}, + {Ref: 1, T: last + 10000, FH: cbfh}, + {Ref: 2, T: last + 20000, FH: cbfh}, + {Ref: 3, T: last + 30000, FH: cbfh}, + }, nil) + require.NoError(t, w.Log(b)) + floatHistogramsInWAL += 4 + + b = enc.Exemplars([]record.RefExemplar{ + {Ref: 1, T: last, V: float64(i), Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i))}, }, nil) require.NoError(t, w.Log(b)) + // Write changing metadata for each series. In the end, only the latest + // version should end up in the checkpoint. b = enc.Metadata([]record.RefMetadata{ - {Ref: 2, Unit: "unit", Help: "help"}, - {Ref: 3, Unit: "unit", Help: "help"}, - {Ref: 4, Unit: "unit", Help: "help"}, - {Ref: 5, Unit: "unit", Help: "help"}, + {Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, }, nil) require.NoError(t, w.Log(b)) + + last += 100 } - // Write samples until the WAL has enough segments. - // Make them have drifting timestamps within a record to see that they - // get filtered properly. - b := enc.Samples([]record.RefSample{ - {Ref: 0, T: last, V: float64(i)}, - {Ref: 1, T: last + 10000, V: float64(i)}, - {Ref: 2, T: last + 20000, V: float64(i)}, - {Ref: 3, T: last + 30000, V: float64(i)}, - }, nil) - require.NoError(t, w.Log(b)) - samplesInWAL += 4 - h := makeHistogram(i) - b, _ = enc.HistogramSamples([]record.RefHistogramSample{ - {Ref: 0, T: last, H: h}, - {Ref: 1, T: last + 10000, H: h}, - {Ref: 2, T: last + 20000, H: h}, - {Ref: 3, T: last + 30000, H: h}, - }, nil) - require.NoError(t, w.Log(b)) - histogramsInWAL += 4 - cbh := makeCustomBucketHistogram(i) - b = enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{ - {Ref: 0, T: last, H: cbh}, - {Ref: 1, T: last + 10000, H: cbh}, - {Ref: 2, T: last + 20000, H: cbh}, - {Ref: 3, T: last + 30000, H: cbh}, - }, nil) - require.NoError(t, w.Log(b)) - histogramsInWAL += 4 - fh := makeFloatHistogram(i) - b, _ = enc.FloatHistogramSamples([]record.RefFloatHistogramSample{ - {Ref: 0, T: last, FH: fh}, - {Ref: 1, T: last + 10000, FH: fh}, - {Ref: 2, T: last + 20000, FH: fh}, - {Ref: 3, T: last + 30000, FH: fh}, - }, nil) - require.NoError(t, w.Log(b)) - floatHistogramsInWAL += 4 - cbfh := makeCustomBucketFloatHistogram(i) - b = enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{ - {Ref: 0, T: last, FH: cbfh}, - {Ref: 1, T: last + 10000, FH: cbfh}, - {Ref: 2, T: last + 20000, FH: cbfh}, - {Ref: 3, T: last + 30000, FH: cbfh}, - }, nil) - require.NoError(t, w.Log(b)) - floatHistogramsInWAL += 4 + require.NoError(t, w.Close()) - b = enc.Exemplars([]record.RefExemplar{ - {Ref: 1, T: last, V: float64(i), Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", i))}, - }, nil) - require.NoError(t, w.Log(b)) + stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { + return x%2 == 0 + }, last/2, enableStStorage) + require.NoError(t, err) + require.NoError(t, w.Truncate(107)) + require.NoError(t, DeleteCheckpoints(w.Dir(), 106)) + require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples) + require.Positive(t, stats.DroppedSamples) - // Write changing metadata for each series. In the end, only the latest - // version should end up in the checkpoint. - b = enc.Metadata([]record.RefMetadata{ - {Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, - {Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, - {Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, - {Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, - }, nil) - require.NoError(t, w.Log(b)) + // Only the new checkpoint should be left. + files, err := os.ReadDir(dir) + require.NoError(t, err) + require.Len(t, files, 1) + require.Equal(t, "checkpoint.00000106", files[0].Name()) - last += 100 - } - require.NoError(t, w.Close()) + sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.00000106")) + require.NoError(t, err) + defer sr.Close() - stats, err := Checkpoint(promslog.NewNopLogger(), w, 100, 106, func(x chunks.HeadSeriesRef) bool { - return x%2 == 0 - }, last/2) - require.NoError(t, err) - require.NoError(t, w.Truncate(107)) - require.NoError(t, DeleteCheckpoints(w.Dir(), 106)) - require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples) - require.Positive(t, stats.DroppedSamples) + dec := record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) + var series []record.RefSeries + var metadata []record.RefMetadata + r := NewReader(sr) - // Only the new checkpoint should be left. - files, err := os.ReadDir(dir) - require.NoError(t, err) - require.Len(t, files, 1) - require.Equal(t, "checkpoint.00000106", files[0].Name()) + samplesInCheckpoint, histogramsInCheckpoint, floatHistogramsInCheckpoint := 0, 0, 0 + for r.Next() { + rec := r.Record() - sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.00000106")) - require.NoError(t, err) - defer sr.Close() - - dec := record.NewDecoder(labels.NewSymbolTable(), promslog.NewNopLogger()) - var series []record.RefSeries - var metadata []record.RefMetadata - r := NewReader(sr) - - samplesInCheckpoint, histogramsInCheckpoint, floatHistogramsInCheckpoint := 0, 0, 0 - for r.Next() { - rec := r.Record() - - switch dec.Type(rec) { - case record.Series: - series, err = dec.Series(rec, series) - require.NoError(t, err) - case record.Samples: - samples, err := dec.Samples(rec, nil) - require.NoError(t, err) - for _, s := range samples { - require.GreaterOrEqual(t, s.T, last/2, "sample with wrong timestamp") + switch dec.Type(rec) { + case record.Series: + series, err = dec.Series(rec, series) + require.NoError(t, err) + case record.Samples, record.SamplesV2: + samples, err := dec.Samples(rec, nil) + require.NoError(t, err) + for _, s := range samples { + require.GreaterOrEqual(t, s.T, last/2, "sample with wrong timestamp") + } + samplesInCheckpoint += len(samples) + case record.HistogramSamples, record.CustomBucketsHistogramSamples: + histograms, err := dec.HistogramSamples(rec, nil) + require.NoError(t, err) + for _, h := range histograms { + require.GreaterOrEqual(t, h.T, last/2, "histogram with wrong timestamp") + } + histogramsInCheckpoint += len(histograms) + case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: + floatHistograms, err := dec.FloatHistogramSamples(rec, nil) + require.NoError(t, err) + for _, h := range floatHistograms { + require.GreaterOrEqual(t, h.T, last/2, "float histogram with wrong timestamp") + } + floatHistogramsInCheckpoint += len(floatHistograms) + case record.Exemplars: + exemplars, err := dec.Exemplars(rec, nil) + require.NoError(t, err) + for _, e := range exemplars { + require.GreaterOrEqual(t, e.T, last/2, "exemplar with wrong timestamp") + } + case record.Metadata: + metadata, err = dec.Metadata(rec, metadata) + require.NoError(t, err) } - samplesInCheckpoint += len(samples) - case record.HistogramSamples, record.CustomBucketsHistogramSamples: - histograms, err := dec.HistogramSamples(rec, nil) - require.NoError(t, err) - for _, h := range histograms { - require.GreaterOrEqual(t, h.T, last/2, "histogram with wrong timestamp") - } - histogramsInCheckpoint += len(histograms) - case record.FloatHistogramSamples, record.CustomBucketsFloatHistogramSamples: - floatHistograms, err := dec.FloatHistogramSamples(rec, nil) - require.NoError(t, err) - for _, h := range floatHistograms { - require.GreaterOrEqual(t, h.T, last/2, "float histogram with wrong timestamp") - } - floatHistogramsInCheckpoint += len(floatHistograms) - case record.Exemplars: - exemplars, err := dec.Exemplars(rec, nil) - require.NoError(t, err) - for _, e := range exemplars { - require.GreaterOrEqual(t, e.T, last/2, "exemplar with wrong timestamp") - } - case record.Metadata: - metadata, err = dec.Metadata(rec, metadata) - require.NoError(t, err) } - } - require.NoError(t, r.Err()) - // Making sure we replayed some samples. We expect >50% samples to be still present. - require.Greater(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.5) - require.Less(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.8) - require.Greater(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.5) - require.Less(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.8) - require.Greater(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.5) - require.Less(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.8) + require.NoError(t, r.Err()) + // Making sure we replayed some samples. We expect >50% samples to be still present. + require.Greater(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.5) + require.Less(t, float64(samplesInCheckpoint)/float64(samplesInWAL), 0.8) + require.Greater(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.5) + require.Less(t, float64(histogramsInCheckpoint)/float64(histogramsInWAL), 0.8) + require.Greater(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.5) + require.Less(t, float64(floatHistogramsInCheckpoint)/float64(floatHistogramsInWAL), 0.8) - expectedRefSeries := []record.RefSeries{ - {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")}, - {Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")}, - {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, - } - testutil.RequireEqual(t, expectedRefSeries, series) + expectedRefSeries := []record.RefSeries{ + {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")}, + {Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")}, + {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, + } + testutil.RequireEqual(t, expectedRefSeries, series) - expectedRefMetadata := []record.RefMetadata{ - {Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, - {Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, - {Ref: 4, Unit: "unit", Help: "help"}, - } - sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref }) - require.Equal(t, expectedRefMetadata, metadata) - }) + expectedRefMetadata := []record.RefMetadata{ + {Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, + {Ref: 4, Unit: "unit", Help: "help"}, + } + sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref }) + require.Equal(t, expectedRefMetadata, metadata) + }) + } } } func TestCheckpointNoTmpFolderAfterError(t *testing.T) { - // Create a new wlog with invalid data. - dir := t.TempDir() - w, err := NewSize(nil, nil, dir, 64*1024, compression.None) - require.NoError(t, err) - var enc record.Encoder - require.NoError(t, w.Log(enc.Series([]record.RefSeries{ - {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "2")}, - }, nil))) - require.NoError(t, w.Close()) + for _, enableStStorage := range []bool{false, true} { + t.Run("enableStStorage="+strconv.FormatBool(enableStStorage), func(t *testing.T) { + // Create a new wlog with invalid data. + dir := t.TempDir() + w, err := NewSize(nil, nil, dir, 64*1024, compression.None) + require.NoError(t, err) + enc := record.Encoder{EnableSTStorage: enableStStorage} + require.NoError(t, w.Log(enc.Series([]record.RefSeries{ + {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "2")}, + }, nil))) + require.NoError(t, w.Close()) - // Corrupt data. - f, err := os.OpenFile(filepath.Join(w.Dir(), "00000000"), os.O_WRONLY, 0o666) - require.NoError(t, err) - _, err = f.WriteAt([]byte{42}, 1) - require.NoError(t, err) - require.NoError(t, f.Close()) + // Corrupt data. + f, err := os.OpenFile(filepath.Join(w.Dir(), "00000000"), os.O_WRONLY, 0o666) + require.NoError(t, err) + _, err = f.WriteAt([]byte{42}, 1) + require.NoError(t, err) + require.NoError(t, f.Close()) - // Run the checkpoint and since the wlog contains corrupt data this should return an error. - _, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0) - require.Error(t, err) + // Run the checkpoint and since the wlog contains corrupt data this should return an error. + _, err = Checkpoint(promslog.NewNopLogger(), w, 0, 1, nil, 0, enableStStorage) + require.Error(t, err) - // Walk the wlog dir to make sure there are no tmp folder left behind after the error. - err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error { - if err != nil { - return fmt.Errorf("access err %q: %w", path, err) - } - if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") { - return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name()) - } - return nil - }) - require.NoError(t, err) + // Walk the wlog dir to make sure there are no tmp folder left behind after the error. + err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error { + if err != nil { + return fmt.Errorf("access err %q: %w", path, err) + } + if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") { + return fmt.Errorf("wlog dir contains temporary folder:%s", info.Name()) + } + return nil + }) + require.NoError(t, err) + }) + } } diff --git a/tsdb/wlog/watcher.go b/tsdb/wlog/watcher.go index a841a44fc8..83453463eb 100644 --- a/tsdb/wlog/watcher.go +++ b/tsdb/wlog/watcher.go @@ -519,7 +519,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { } w.writer.StoreSeries(series, segmentNum) - case record.Samples: + case record.Samples, record.SamplesV2: // If we're not tailing a segment we can ignore any samples records we see. // This speeds up replay of the WAL by > 10x. if !tail { diff --git a/tsdb/wlog/watcher_test.go b/tsdb/wlog/watcher_test.go index b9a6504298..e29aac4d47 100644 --- a/tsdb/wlog/watcher_test.go +++ b/tsdb/wlog/watcher_test.go @@ -17,6 +17,7 @@ import ( "math/rand" "os" "path" + "path/filepath" "runtime" "sync" "testing" @@ -144,145 +145,147 @@ func TestTailSamples(t *testing.T) { const exemplarsCount = 25 const histogramsCount = 50 for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - now := time.Now() + for _, enableStStorage := range []bool{false, true} { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + now := time.Now() - dir := t.TempDir() + dir := t.TempDir() - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) - - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) - require.NoError(t, err) - defer func() { - require.NoError(t, w.Close()) - }() - - // Write to the initial segment then checkpoint. - for i := range seriesCount { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - { - Ref: chunks.HeadSeriesRef(ref), - Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), - }, - }, nil) - require.NoError(t, w.Log(series)) - - for range samplesCount { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - { - Ref: chunks.HeadSeriesRef(inner), - T: now.UnixNano() + 1, - V: float64(i), - }, - }, nil) - require.NoError(t, w.Log(sample)) - } - - for range exemplarsCount { - inner := rand.Intn(ref + 1) - exemplar := enc.Exemplars([]record.RefExemplar{ - { - Ref: chunks.HeadSeriesRef(inner), - T: now.UnixNano() + 1, - V: float64(i), - Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", inner)), - }, - }, nil) - require.NoError(t, w.Log(exemplar)) - } - - for range histogramsCount { - inner := rand.Intn(ref + 1) - hist := &histogram.Histogram{ - Schema: 2, - ZeroThreshold: 1e-128, - ZeroCount: 0, - Count: 2, - Sum: 0, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, - PositiveBuckets: []int64{int64(i) + 1}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, - NegativeBuckets: []int64{int64(-i) - 1}, - } - - histograms, _ := enc.HistogramSamples([]record.RefHistogramSample{{ - Ref: chunks.HeadSeriesRef(inner), - T: now.UnixNano() + 1, - H: hist, - }}, nil) - require.NoError(t, w.Log(histograms)) - - customBucketHist := &histogram.Histogram{ - Schema: -53, - ZeroThreshold: 1e-128, - ZeroCount: 0, - Count: 2, - Sum: 0, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, - CustomValues: []float64{float64(i) + 2}, - } - - customBucketHistograms := enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{{ - Ref: chunks.HeadSeriesRef(inner), - T: now.UnixNano() + 1, - H: customBucketHist, - }}, nil) - require.NoError(t, w.Log(customBucketHistograms)) - - floatHistograms, _ := enc.FloatHistogramSamples([]record.RefFloatHistogramSample{{ - Ref: chunks.HeadSeriesRef(inner), - T: now.UnixNano() + 1, - FH: hist.ToFloat(nil), - }}, nil) - require.NoError(t, w.Log(floatHistograms)) - - customBucketFloatHistograms := enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{{ - Ref: chunks.HeadSeriesRef(inner), - T: now.UnixNano() + 1, - FH: customBucketHist.ToFloat(nil), - }}, nil) - require.NoError(t, w.Log(customBucketFloatHistograms)) - } - } - - // Start read after checkpoint, no more data written. - first, last, err := Segments(w.Dir()) - require.NoError(t, err) - - wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, true, true, true) - watcher.SetStartTime(now) - - // Set the Watcher's metrics so they're not nil pointers. - watcher.SetMetrics() - for i := first; i <= last; i++ { - segment, err := OpenReadSegment(SegmentName(watcher.walDir, i)) + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) require.NoError(t, err) - reader := NewLiveReader(nil, NewLiveReaderMetrics(nil), segment) - // Use tail true so we can ensure we got the right number of samples. - watcher.readSegment(reader, i, true) - require.NoError(t, segment.Close()) - } + enc := record.Encoder{EnableSTStorage: enableStStorage} + w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) + require.NoError(t, err) + defer func() { + require.NoError(t, w.Close()) + }() - expectedSeries := seriesCount - expectedSamples := seriesCount * samplesCount - expectedExemplars := seriesCount * exemplarsCount - expectedHistograms := seriesCount * histogramsCount * 2 - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumSeries() >= expectedSeries + // Write to the initial segment then checkpoint. + for i := range seriesCount { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + { + Ref: chunks.HeadSeriesRef(ref), + Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), + }, + }, nil) + require.NoError(t, w.Log(series)) + + for range samplesCount { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + { + Ref: chunks.HeadSeriesRef(inner), + T: now.UnixNano() + 1, + V: float64(i), + }, + }, nil) + require.NoError(t, w.Log(sample)) + } + + for range exemplarsCount { + inner := rand.Intn(ref + 1) + exemplar := enc.Exemplars([]record.RefExemplar{ + { + Ref: chunks.HeadSeriesRef(inner), + T: now.UnixNano() + 1, + V: float64(i), + Labels: labels.FromStrings("trace_id", fmt.Sprintf("trace-%d", inner)), + }, + }, nil) + require.NoError(t, w.Log(exemplar)) + } + + for range histogramsCount { + inner := rand.Intn(ref + 1) + hist := &histogram.Histogram{ + Schema: 2, + ZeroThreshold: 1e-128, + ZeroCount: 0, + Count: 2, + Sum: 0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + PositiveBuckets: []int64{int64(i) + 1}, + NegativeSpans: []histogram.Span{{Offset: 0, Length: 1}}, + NegativeBuckets: []int64{int64(-i) - 1}, + } + + histograms, _ := enc.HistogramSamples([]record.RefHistogramSample{{ + Ref: chunks.HeadSeriesRef(inner), + T: now.UnixNano() + 1, + H: hist, + }}, nil) + require.NoError(t, w.Log(histograms)) + + customBucketHist := &histogram.Histogram{ + Schema: -53, + ZeroThreshold: 1e-128, + ZeroCount: 0, + Count: 2, + Sum: 0, + PositiveSpans: []histogram.Span{{Offset: 0, Length: 1}}, + CustomValues: []float64{float64(i) + 2}, + } + + customBucketHistograms := enc.CustomBucketsHistogramSamples([]record.RefHistogramSample{{ + Ref: chunks.HeadSeriesRef(inner), + T: now.UnixNano() + 1, + H: customBucketHist, + }}, nil) + require.NoError(t, w.Log(customBucketHistograms)) + + floatHistograms, _ := enc.FloatHistogramSamples([]record.RefFloatHistogramSample{{ + Ref: chunks.HeadSeriesRef(inner), + T: now.UnixNano() + 1, + FH: hist.ToFloat(nil), + }}, nil) + require.NoError(t, w.Log(floatHistograms)) + + customBucketFloatHistograms := enc.CustomBucketsFloatHistogramSamples([]record.RefFloatHistogramSample{{ + Ref: chunks.HeadSeriesRef(inner), + T: now.UnixNano() + 1, + FH: customBucketHist.ToFloat(nil), + }}, nil) + require.NoError(t, w.Log(customBucketFloatHistograms)) + } + } + + // Start read after checkpoint, no more data written. + first, last, err := Segments(w.Dir()) + require.NoError(t, err) + + wt := newWriteToMock(0) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, true, true, true) + watcher.SetStartTime(now) + + // Set the Watcher's metrics so they're not nil pointers. + watcher.SetMetrics() + for i := first; i <= last; i++ { + segment, err := OpenReadSegment(SegmentName(watcher.walDir, i)) + require.NoError(t, err) + + reader := NewLiveReader(nil, NewLiveReaderMetrics(nil), segment) + // Use tail true so we can ensure we got the right number of samples. + watcher.readSegment(reader, i, true) + require.NoError(t, segment.Close()) + } + + expectedSeries := seriesCount + expectedSamples := seriesCount * samplesCount + expectedExemplars := seriesCount * exemplarsCount + expectedHistograms := seriesCount * histogramsCount * 2 + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumSeries() >= expectedSeries + }) + require.Equal(t, expectedSeries, wt.checkNumSeries(), "did not receive the expected number of series") + require.Equal(t, expectedSamples, wt.samplesAppended, "did not receive the expected number of samples") + require.Equal(t, expectedExemplars, wt.exemplarsAppended, "did not receive the expected number of exemplars") + require.Equal(t, expectedHistograms, wt.histogramsAppended, "did not receive the expected number of histograms") + require.Equal(t, expectedHistograms, wt.floatHistogramsAppended, "did not receive the expected number of float histograms") }) - require.Equal(t, expectedSeries, wt.checkNumSeries(), "did not receive the expected number of series") - require.Equal(t, expectedSamples, wt.samplesAppended, "did not receive the expected number of samples") - require.Equal(t, expectedExemplars, wt.exemplarsAppended, "did not receive the expected number of exemplars") - require.Equal(t, expectedHistograms, wt.histogramsAppended, "did not receive the expected number of histograms") - require.Equal(t, expectedHistograms, wt.floatHistogramsAppended, "did not receive the expected number of float histograms") - }) + } } } @@ -291,64 +294,66 @@ func TestReadToEndNoCheckpoint(t *testing.T) { const seriesCount = 10 const samplesCount = 250 - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) + require.NoError(t, err) - w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) - require.NoError(t, err) - defer func() { - require.NoError(t, w.Close()) - }() + w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) + require.NoError(t, err) + defer func() { + require.NoError(t, w.Close()) + }() - var recs [][]byte + var recs [][]byte - enc := record.Encoder{} + enc := record.Encoder{EnableSTStorage: enableStStorage} - for i := range seriesCount { - series := enc.Series([]record.RefSeries{ - { - Ref: chunks.HeadSeriesRef(i), - Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), - }, - }, nil) - recs = append(recs, series) - for j := range samplesCount { - sample := enc.Samples([]record.RefSample{ + for i := range seriesCount { + series := enc.Series([]record.RefSeries{ { - Ref: chunks.HeadSeriesRef(j), - T: int64(i), - V: float64(i), + Ref: chunks.HeadSeriesRef(i), + Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), }, }, nil) + recs = append(recs, series) + for j := range samplesCount { + sample := enc.Samples([]record.RefSample{ + { + Ref: chunks.HeadSeriesRef(j), + T: int64(i), + V: float64(i), + }, + }, nil) - recs = append(recs, sample) + recs = append(recs, sample) - // Randomly batch up records. - if rand.Intn(4) < 3 { - require.NoError(t, w.Log(recs...)) - recs = recs[:0] + // Randomly batch up records. + if rand.Intn(4) < 3 { + require.NoError(t, w.Log(recs...)) + recs = recs[:0] + } } } - } - require.NoError(t, w.Log(recs...)) - overwriteReadTimeout(t, time.Second) - _, _, err = Segments(w.Dir()) - require.NoError(t, err) + require.NoError(t, w.Log(recs...)) + overwriteReadTimeout(t, time.Second) + _, _, err = Segments(w.Dir()) + require.NoError(t, err) - wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) - go watcher.Start() + wt := newWriteToMock(0) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + go watcher.Start() - expected := seriesCount - require.Eventually(t, func() bool { - return wt.checkNumSeries() == expected - }, 20*time.Second, 1*time.Second) - watcher.Stop() - }) + expected := seriesCount + require.Eventually(t, func() bool { + return wt.checkNumSeries() == expected + }, 20*time.Second, 1*time.Second) + watcher.Stop() + }) + } } } @@ -359,184 +364,119 @@ func TestReadToEndWithCheckpoint(t *testing.T) { const seriesCount = 10 const samplesCount = 250 - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) + require.NoError(t, err) - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, segmentSize, compress) - require.NoError(t, err) - defer func() { - require.NoError(t, w.Close()) - }() + enc := record.Encoder{EnableSTStorage: enableStStorage} + w, err := NewSize(nil, nil, wdir, segmentSize, compress) + require.NoError(t, err) + defer func() { + require.NoError(t, w.Close()) + }() - // Write to the initial segment then checkpoint. - for i := range seriesCount { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - { - Ref: chunks.HeadSeriesRef(ref), - Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), - }, - }, nil) - require.NoError(t, w.Log(series)) - // Add in an unknown record type, which should be ignored. - require.NoError(t, w.Log([]byte{255})) - - for range samplesCount { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ + // Write to the initial segment then checkpoint. + for i := range seriesCount { + ref := i + 100 + series := enc.Series([]record.RefSeries{ { - Ref: chunks.HeadSeriesRef(inner), - T: int64(i), - V: float64(i), + Ref: chunks.HeadSeriesRef(ref), + Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), }, }, nil) - require.NoError(t, w.Log(sample)) + require.NoError(t, w.Log(series)) + // Add in an unknown record type, which should be ignored. + require.NoError(t, w.Log([]byte{255})) + + for range samplesCount { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + { + Ref: chunks.HeadSeriesRef(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + require.NoError(t, w.Log(sample)) + } } - } - Checkpoint(promslog.NewNopLogger(), w, 0, 1, func(chunks.HeadSeriesRef) bool { return true }, 0) - w.Truncate(1) + Checkpoint(promslog.NewNopLogger(), w, 0, 1, func(chunks.HeadSeriesRef) bool { return true }, 0, enableStStorage) + w.Truncate(1) - // Write more records after checkpointing. - for i := range seriesCount { - series := enc.Series([]record.RefSeries{ - { - Ref: chunks.HeadSeriesRef(i), - Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), - }, - }, nil) - require.NoError(t, w.Log(series)) - - for j := range samplesCount { - sample := enc.Samples([]record.RefSample{ + // Write more records after checkpointing. + for i := range seriesCount { + series := enc.Series([]record.RefSeries{ { - Ref: chunks.HeadSeriesRef(j), - T: int64(i), - V: float64(i), + Ref: chunks.HeadSeriesRef(i), + Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), }, }, nil) - require.NoError(t, w.Log(sample)) + require.NoError(t, w.Log(series)) + + for j := range samplesCount { + sample := enc.Samples([]record.RefSample{ + { + Ref: chunks.HeadSeriesRef(j), + T: int64(i), + V: float64(i), + }, + }, nil) + require.NoError(t, w.Log(sample)) + } } - } - _, _, err = Segments(w.Dir()) - require.NoError(t, err) - overwriteReadTimeout(t, time.Second) - wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) - go watcher.Start() + _, _, err = Segments(w.Dir()) + require.NoError(t, err) + overwriteReadTimeout(t, time.Second) + wt := newWriteToMock(0) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + go watcher.Start() - expected := seriesCount * 2 + expected := seriesCount * 2 - require.Eventually(t, func() bool { - return wt.checkNumSeries() == expected - }, 10*time.Second, 1*time.Second) - watcher.Stop() - }) + require.Eventually(t, func() bool { + return wt.checkNumSeries() == expected + }, 10*time.Second, 1*time.Second) + watcher.Stop() + }) + } } } func TestReadCheckpoint(t *testing.T) { - t.Parallel() pageSize := 32 * 1024 const seriesCount = 10 const samplesCount = 250 - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) + require.NoError(t, err) - f, err := os.Create(SegmentName(wdir, 30)) - require.NoError(t, err) - require.NoError(t, f.Close()) + f, err := os.Create(SegmentName(wdir, 30)) + require.NoError(t, err) + require.NoError(t, f.Close()) - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) - require.NoError(t, err) - t.Cleanup(func() { - require.NoError(t, w.Close()) - }) + enc := record.Encoder{EnableSTStorage: enableStStorage} + w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, w.Close()) + }) - // Write to the initial segment then checkpoint. - for i := range seriesCount { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - { - Ref: chunks.HeadSeriesRef(ref), - Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), - }, - }, nil) - require.NoError(t, w.Log(series)) - - for range samplesCount { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - { - Ref: chunks.HeadSeriesRef(inner), - T: int64(i), - V: float64(i), - }, - }, nil) - require.NoError(t, w.Log(sample)) - } - } - _, err = w.NextSegmentSync() - require.NoError(t, err) - _, err = Checkpoint(promslog.NewNopLogger(), w, 30, 31, func(chunks.HeadSeriesRef) bool { return true }, 0) - require.NoError(t, err) - require.NoError(t, w.Truncate(32)) - - // Start read after checkpoint, no more data written. - _, _, err = Segments(w.Dir()) - require.NoError(t, err) - - wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) - go watcher.Start() - - expectedSeries := seriesCount - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumSeries() >= expectedSeries - }) - watcher.Stop() - require.Equal(t, expectedSeries, wt.checkNumSeries()) - }) - } -} - -func TestReadCheckpointMultipleSegments(t *testing.T) { - pageSize := 32 * 1024 - - const segments = 1 - const seriesCount = 20 - const samplesCount = 300 - - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() - - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) - - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, pageSize, compress) - require.NoError(t, err) - - // Write a bunch of data. - for i := range segments { - for j := range seriesCount { - ref := j + (i * 100) + // Write to the initial segment then checkpoint. + for i := range seriesCount { + ref := i + 100 series := enc.Series([]record.RefSeries{ { Ref: chunks.HeadSeriesRef(ref), @@ -557,57 +497,132 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { require.NoError(t, w.Log(sample)) } } - } - require.NoError(t, w.Close()) - - // At this point we should have at least 6 segments, lets create a checkpoint dir of the first 5. - checkpointDir := dir + "/wal/checkpoint.000004" - err = os.Mkdir(checkpointDir, 0o777) - require.NoError(t, err) - for i := 0; i <= 4; i++ { - err := os.Rename(SegmentName(dir+"/wal", i), SegmentName(checkpointDir, i)) + _, err = w.NextSegmentSync() require.NoError(t, err) - } + _, err = Checkpoint(promslog.NewNopLogger(), w, 30, 31, func(chunks.HeadSeriesRef) bool { return true }, 0, enableStStorage) + require.NoError(t, err) + require.NoError(t, w.Truncate(32)) - wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) - watcher.MaxSegment = -1 + // Start read after checkpoint, no more data written. + _, _, err = Segments(w.Dir()) + require.NoError(t, err) - // Set the Watcher's metrics so they're not nil pointers. - watcher.SetMetrics() + wt := newWriteToMock(0) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + go watcher.Start() - lastCheckpoint, _, err := LastCheckpoint(watcher.walDir) - require.NoError(t, err) + expectedSeries := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumSeries() >= expectedSeries + }) + watcher.Stop() + require.Equal(t, expectedSeries, wt.checkNumSeries()) + }) + } + } +} - err = watcher.readCheckpoint(lastCheckpoint, (*Watcher).readSegment) - require.NoError(t, err) - }) +func TestReadCheckpointMultipleSegments(t *testing.T) { + pageSize := 32 * 1024 + + const segments = 1 + const seriesCount = 40 + const samplesCount = 500 + + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() + + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) + require.NoError(t, err) + + enc := record.Encoder{EnableSTStorage: enableStStorage} + w, err := NewSize(nil, nil, wdir, pageSize, compress) + require.NoError(t, err) + + // Write a bunch of data. + for i := range segments { + for j := range seriesCount { + ref := j + (i * 100) + series := enc.Series([]record.RefSeries{ + { + Ref: chunks.HeadSeriesRef(ref), + Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), + }, + }, nil) + require.NoError(t, w.Log(series)) + + for range samplesCount { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + { + Ref: chunks.HeadSeriesRef(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + require.NoError(t, w.Log(sample)) + } + } + } + require.NoError(t, w.Close()) + + // At this point we should have at least 6 segments, lets create a checkpoint dir of the first 5. + checkpointDir := dir + "/wal/checkpoint.000004" + err = os.Mkdir(checkpointDir, 0o777) + require.NoError(t, err) + for i := 0; i <= 4; i++ { + err := os.Rename(SegmentName(dir+"/wal", i), SegmentName(checkpointDir, i)) + require.NoError(t, err) + } + + wt := newWriteToMock(0) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher.MaxSegment = -1 + + // Set the Watcher's metrics so they're not nil pointers. + watcher.SetMetrics() + + lastCheckpoint, _, err := LastCheckpoint(watcher.walDir) + require.NoError(t, err) + + err = watcher.readCheckpoint(lastCheckpoint, (*Watcher).readSegment) + require.NoError(t, err) + }) + } } } func TestCheckpointSeriesReset(t *testing.T) { - segmentSize := 32 * 1024 + segmentSize := 64 * 1024 // We need something similar to this # of series and samples // in order to get enough segments for us to checkpoint. - const seriesCount = 20 - const samplesCount = 350 + const seriesCount = 30 + const samplesCount = 700 testCases := []struct { - compress compression.Type - segments int + compress compression.Type + enableStStorage bool + segments int }{ - {compress: compression.None, segments: 14}, - {compress: compression.Snappy, segments: 13}, + {compress: compression.None, enableStStorage: false, segments: 24}, + {compress: compression.Snappy, enableStStorage: false, segments: 23}, + {compress: compression.None, enableStStorage: true, segments: 20}, + {compress: compression.Snappy, enableStStorage: true, segments: 20}, } + dir := t.TempDir() for _, tc := range testCases { - t.Run(fmt.Sprintf("compress=%s", tc.compress), func(t *testing.T) { - dir := t.TempDir() - - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", tc.compress, tc.enableStStorage), func(t *testing.T) { + subdir := filepath.Join(dir, fmt.Sprintf("%s-%v", tc.compress, tc.enableStStorage)) + err := os.MkdirAll(subdir, 0o777) + require.NoError(t, err) + wdir := filepath.Join(subdir, "wal") + err = os.MkdirAll(wdir, 0o777) require.NoError(t, err) - enc := record.Encoder{} + enc := record.Encoder{EnableSTStorage: tc.enableStStorage} w, err := NewSize(nil, nil, wdir, segmentSize, tc.compress) require.NoError(t, err) defer func() { @@ -643,7 +658,7 @@ func TestCheckpointSeriesReset(t *testing.T) { overwriteReadTimeout(t, time.Second) wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, subdir, false, false, false) watcher.MaxSegment = -1 go watcher.Start() @@ -655,13 +670,13 @@ func TestCheckpointSeriesReset(t *testing.T) { return wt.checkNumSeries() == seriesCount }, 10*time.Second, 1*time.Second) - _, err = Checkpoint(promslog.NewNopLogger(), w, 2, 4, func(chunks.HeadSeriesRef) bool { return true }, 0) + _, err = Checkpoint(promslog.NewNopLogger(), w, 2, 4, func(chunks.HeadSeriesRef) bool { return true }, 0, true) require.NoError(t, err) err = w.Truncate(5) require.NoError(t, err) - _, cpi, err := LastCheckpoint(path.Join(dir, "wal")) + _, cpi, err := LastCheckpoint(wdir) require.NoError(t, err) err = watcher.garbageCollectSeries(cpi + 1) require.NoError(t, err) @@ -678,66 +693,67 @@ func TestCheckpointSeriesReset(t *testing.T) { } func TestRun_StartupTime(t *testing.T) { - t.Parallel() const pageSize = 32 * 1024 - const segments = 10 - const seriesCount = 20 - const samplesCount = 300 + const segments = 20 + const seriesCount = 40 + const samplesCount = 500 - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) + require.NoError(t, err) - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, pageSize, compress) - require.NoError(t, err) + enc := record.Encoder{EnableSTStorage: enableStStorage} + w, err := NewSize(nil, nil, wdir, pageSize, compress) + require.NoError(t, err) - for i := range segments { - for j := range seriesCount { - ref := j + (i * 100) - series := enc.Series([]record.RefSeries{ - { - Ref: chunks.HeadSeriesRef(ref), - Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), - }, - }, nil) - require.NoError(t, w.Log(series)) - - for range samplesCount { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ + for i := range segments { + for j := range seriesCount { + ref := j + (i * 100) + series := enc.Series([]record.RefSeries{ { - Ref: chunks.HeadSeriesRef(inner), - T: int64(i), - V: float64(i), + Ref: chunks.HeadSeriesRef(ref), + Labels: labels.FromStrings("__name__", fmt.Sprintf("metric_%d", i)), }, }, nil) - require.NoError(t, w.Log(sample)) + require.NoError(t, w.Log(series)) + + for range samplesCount { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + { + Ref: chunks.HeadSeriesRef(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + require.NoError(t, w.Log(sample)) + } } } - } - require.NoError(t, w.Close()) + require.NoError(t, w.Close()) - wt := newWriteToMock(0) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) - watcher.MaxSegment = segments + wt := newWriteToMock(0) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher.MaxSegment = segments - watcher.SetMetrics() - startTime := time.Now() + watcher.SetMetrics() + startTime := time.Now() - err = watcher.Run() - require.Less(t, time.Since(startTime), readTimeout) - require.NoError(t, err) - }) + err = watcher.Run() + require.Less(t, time.Since(startTime), readTimeout) + require.NoError(t, err) + }) + } } } -func generateWALRecords(w *WL, segment, seriesCount, samplesCount int) error { - enc := record.Encoder{} +func generateWALRecords(w *WL, segment, seriesCount, samplesCount int, enableStStorage bool) error { + enc := record.Encoder{EnableSTStorage: enableStStorage} for j := range seriesCount { ref := j + (segment * 100) series := enc.Series([]record.RefSeries{ @@ -777,61 +793,63 @@ func TestRun_AvoidNotifyWhenBehind(t *testing.T) { const seriesCount = 10 const samplesCount = 50 - for _, compress := range compression.Types() { - t.Run(fmt.Sprintf("compress=%s", compress), func(t *testing.T) { - dir := t.TempDir() + for _, enableStStorage := range []bool{false, true} { + for _, compress := range compression.Types() { + t.Run(fmt.Sprintf("compress=%s,stStorage=%v", compress, enableStStorage), func(t *testing.T) { + dir := t.TempDir() - wdir := path.Join(dir, "wal") - err := os.Mkdir(wdir, 0o777) - require.NoError(t, err) + wdir := path.Join(dir, "wal") + err := os.Mkdir(wdir, 0o777) + require.NoError(t, err) - w, err := NewSize(nil, nil, wdir, segmentSize, compress) - require.NoError(t, err) - // Write to 00000000, the watcher will read series from it. - require.NoError(t, generateWALRecords(w, 0, seriesCount, samplesCount)) - // Create 00000001, the watcher will tail it once started. - w.NextSegment() - - // Set up the watcher and run it in the background. - wt := newWriteToMock(time.Millisecond) - watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) - watcher.SetMetrics() - watcher.MaxSegment = segmentsToRead - - var g errgroup.Group - g.Go(func() error { - startTime := time.Now() - err = watcher.Run() - if err != nil { - return err - } - // If the watcher was to wait for readTicker to read every new segment, it would need readTimeout * segmentsToRead. - d := time.Since(startTime) - if d > readTimeout { - return fmt.Errorf("watcher ran for %s, it shouldn't rely on readTicker=%s to read the new segments", d, readTimeout) - } - return nil - }) - - // The watcher went through 00000000 and is tailing the next one. - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumSeries() == seriesCount - }) - - // In the meantime, add some new segments in bulk. - // We should end up with segmentsToWrite + 1 segments now. - for i := 1; i < segmentsToWrite; i++ { - require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount)) + w, err := NewSize(nil, nil, wdir, segmentSize, compress) + require.NoError(t, err) + // Write to 00000000, the watcher will read series from it. + require.NoError(t, generateWALRecords(w, 0, seriesCount, samplesCount, enableStStorage)) + // Create 00000001, the watcher will tail it once started. w.NextSegment() - } - // Wait for the watcher. - require.NoError(t, g.Wait()) + // Set up the watcher and run it in the background. + wt := newWriteToMock(time.Millisecond) + watcher := NewWatcher(wMetrics, nil, nil, "", wt, dir, false, false, false) + watcher.SetMetrics() + watcher.MaxSegment = segmentsToRead - // All series and samples were read. - require.Equal(t, (segmentsToRead+1)*seriesCount, wt.checkNumSeries()) // Series from 00000000 are also read. - require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) - require.NoError(t, w.Close()) - }) + var g errgroup.Group + g.Go(func() error { + startTime := time.Now() + err = watcher.Run() + if err != nil { + return err + } + // If the watcher was to wait for readTicker to read every new segment, it would need readTimeout * segmentsToRead. + d := time.Since(startTime) + if d > readTimeout { + return fmt.Errorf("watcher ran for %s, it shouldn't rely on readTicker=%s to read the new segments", d, readTimeout) + } + return nil + }) + + // The watcher went through 00000000 and is tailing the next one. + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumSeries() == seriesCount + }) + + // In the meantime, add some new segments in bulk. + // We should end up with segmentsToWrite + 1 segments now. + for i := 1; i < segmentsToWrite; i++ { + require.NoError(t, generateWALRecords(w, i, seriesCount, samplesCount, enableStStorage)) + w.NextSegment() + } + + // Wait for the watcher. + require.NoError(t, g.Wait()) + + // All series and samples were read. + require.Equal(t, (segmentsToRead+1)*seriesCount, wt.checkNumSeries()) // Series from 00000000 are also read. + require.Equal(t, segmentsToRead*seriesCount*samplesCount, wt.samplesAppended) + require.NoError(t, w.Close()) + }) + } } } diff --git a/util/testrecord/record.go b/util/testrecord/record.go new file mode 100644 index 0000000000..e5071d42c8 --- /dev/null +++ b/util/testrecord/record.go @@ -0,0 +1,96 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testrecord + +import ( + "math" + "testing" + + "github.com/prometheus/prometheus/tsdb/chunks" + "github.com/prometheus/prometheus/tsdb/record" +) + +type RefSamplesCase string + +const ( + Realistic1000Samples RefSamplesCase = "real1000" + Realistic1000WithVariableSTSamples RefSamplesCase = "real1000-vst" + Realistic1000WithConstSTSamples RefSamplesCase = "real1000-cst" + WorstCase1000 RefSamplesCase = "worst1000" + WorstCase1000WithSTSamples RefSamplesCase = "worst1000-st" +) + +func GenTestRefSamplesCase(t testing.TB, c RefSamplesCase) []record.RefSample { + t.Helper() + + ret := make([]record.RefSample, 1e3) + switch c { + // Samples are across series, so likely all have the same timestamp. + case Realistic1000Samples: + for i := range ret { + ret[i].Ref = chunks.HeadSeriesRef(i) + ret[i].T = int64(12423423) + ret[i].V = highVarianceFloat(i) + } + // Likely the start times will all be the same with deltas. + case Realistic1000WithConstSTSamples: + for i := range ret { + ret[i].Ref = chunks.HeadSeriesRef(i) + ret[i].ST = int64(12423423) + ret[i].T = int64(12423423 + 15) + ret[i].V = highVarianceFloat(i) + } + // Maybe series have different start times though + case Realistic1000WithVariableSTSamples: + for i := range ret { + ret[i].Ref = chunks.HeadSeriesRef(i) + ret[i].ST = int64((12423423 / 9) * (i % 10)) + ret[i].T = int64(12423423) + ret[i].V = highVarianceFloat(i) + } + case WorstCase1000: + for i := range ret { + ret[i].Ref = chunks.HeadSeriesRef(i) + ret[i].T = highVarianceInt(i) + ret[i].V = highVarianceFloat(i) + } + case WorstCase1000WithSTSamples: + for i := range ret { + ret[i].Ref = chunks.HeadSeriesRef(i) + + // Worst case is when the values are significantly different + // to each other which breaks delta encoding. + ret[i].ST = highVarianceInt(i+1) / 1024 // Make sure ST is not comparable to T + ret[i].T = highVarianceInt(i) + ret[i].V = highVarianceFloat(i) + } + default: + t.Fatal("unknown case", c) + } + return ret +} + +func highVarianceInt(i int) int64 { + if i%2 == 0 { + return math.MinInt32 + } + return math.MaxInt32 +} + +func highVarianceFloat(i int) float64 { + if i%2 == 0 { + return math.SmallestNonzeroFloat32 + } + return math.MaxFloat32 +}