promtool: Add --lint flag to check metrics command to allow disabling linting (#17669)

* promtool: allow cardinality with metrics linting and add --lint to check metrics Signed-off-by: ADITYA TIWARI <adityatiwari342005@gmail.com> * fix/ci: Simplify test case variable declaration Remove unnecessary variable declaration in test cases. Signed-off-by: ADITYA TIWARI <142050150+ADITYATIWARI342005@users.noreply.github.com> * promtool: avoid Tee for --lint=none Signed-off-by: ADITYA TIWARI <adityatiwari342005@gmail.com> * promtool: validate at least one feature enabled in check metrics addresses feedback to ensure the command does something useful now fails with clear error when both --lint=none and no --extended flag. Signed-off-by: ADITYA TIWARI <adityatiwari342005@gmail.com> --------- Signed-off-by: ADITYA TIWARI <adityatiwari342005@gmail.com> Signed-off-by: ADITYA TIWARI <142050150+ADITYATIWARI342005@users.noreply.github.com>
2025-12-18 21:26:32 -05:00 · 2025-12-18 15:49:06 +05:30 · 2025-12-18 15:49:06 +05:30 · 146080186d
commit 146080186d
parent 962341f621
3 changed files with 150 additions and 20 deletions
--- a/cmd/promtool/main.go
+++ b/cmd/promtool/main.go
@ -162,7 +162,11 @@ func main() {
 	checkRulesIgnoreUnknownFields := checkRulesCmd.Flag("ignore-unknown-fields", "Ignore unknown fields in the rule files. This is useful when you want to extend rule files with custom metadata. Ensure that those fields are removed before loading them into the Prometheus server as it performs strict checks by default.").Default("false").Bool()

 	checkMetricsCmd := checkCmd.Command("metrics", checkMetricsUsage)
-	checkMetricsExtended := checkCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
+	checkMetricsExtended := checkMetricsCmd.Flag("extended", "Print extended information related to the cardinality of the metrics.").Bool()
+	checkMetricsLint := checkMetricsCmd.Flag(
+		"lint",
+		"Linting checks to apply for metrics. Available options are: all, none. Use --lint=none to disable metrics linting.",
+	).Default(lintOptionAll).String()
 	agentMode := checkConfigCmd.Flag("agent", "Check config file for Prometheus in Agent mode.").Bool()

 	queryCmd := app.Command("query", "Run query against a Prometheus server.")
@ -375,7 +379,7 @@ func main() {
 		os.Exit(CheckRules(newRulesLintConfig(*checkRulesLint, *checkRulesLintFatal, *checkRulesIgnoreUnknownFields, model.UTF8Validation), *ruleFiles...))

 	case checkMetricsCmd.FullCommand():
-		os.Exit(CheckMetrics(*checkMetricsExtended))
+		os.Exit(CheckMetrics(*checkMetricsExtended, *checkMetricsLint))

 	case pushMetricsCmd.FullCommand():
 		os.Exit(PushMetrics(remoteWriteURL, httpRoundTripper, *pushMetricsHeaders, *pushMetricsTimeout, *pushMetricsProtoMsg, *pushMetricsLabels, *metricFiles...))
@ -1018,36 +1022,53 @@ func ruleMetric(rule rulefmt.Rule) string {
 }

 var checkMetricsUsage = strings.TrimSpace(`
-Pass Prometheus metrics over stdin to lint them for consistency and correctness.
+Pass Prometheus metrics over stdin to lint them for consistency and correctness, and optionally perform cardinality analysis.

 examples:

 $ cat metrics.prom | promtool check metrics

-$ curl -s http://localhost:9090/metrics | promtool check metrics
+$ curl -s http://localhost:9090/metrics | promtool check metrics --extended
+
+$ curl -s http://localhost:9100/metrics | promtool check metrics --extended --lint=none
 `)

 // CheckMetrics performs a linting pass on input metrics.
-func CheckMetrics(extended bool) int {
-	var buf bytes.Buffer
-	tee := io.TeeReader(os.Stdin, &buf)
-	l := promlint.New(tee)
-	problems, err := l.Lint()
-	if err != nil {
-		fmt.Fprintln(os.Stderr, "error while linting:", err)
+func CheckMetrics(extended bool, lint string) int {
+	// Validate that at least one feature is enabled.
+	if !extended && lint == lintOptionNone {
+		fmt.Fprintln(os.Stderr, "error: at least one of --extended or linting must be enabled")
+		fmt.Fprintln(os.Stderr, "Use --extended for cardinality analysis, or remove --lint=none to enable linting")
 		return failureExitCode
 	}

-	for _, p := range problems {
-		fmt.Fprintln(os.Stderr, p.Metric, p.Text)
+	var buf bytes.Buffer
+	var (
+		problems []promlint.Problem
+		reader   io.Reader
+		err      error
+	)
+
+	if lint != lintOptionNone {
+		tee := io.TeeReader(os.Stdin, &buf)
+		l := promlint.New(tee)
+		problems, err = l.Lint()
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "error while linting:", err)
+			return failureExitCode
+		}
+		for _, p := range problems {
+			fmt.Fprintln(os.Stderr, p.Metric, p.Text)
+		}
+		reader = &buf
+	} else {
+		reader = os.Stdin
 	}

-	if len(problems) > 0 {
-		return lintErrExitCode
-	}
+	hasLintProblems := len(problems) > 0

 	if extended {
-		stats, total, err := checkMetricsExtended(&buf)
+		stats, total, err := checkMetricsExtended(reader)
 		if err != nil {
 			fmt.Fprintln(os.Stderr, err)
 			return failureExitCode
@ -1061,6 +1082,10 @@ func CheckMetrics(extended bool) int {
 		w.Flush()
 	}

+	if hasLintProblems {
+		return lintErrExitCode
+	}
+
 	return successExitCode
 }

--- a/cmd/promtool/main_test.go
+++ b/cmd/promtool/main_test.go
@ -18,6 +18,7 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"io"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
@ -402,6 +403,99 @@ func TestCheckMetricsExtended(t *testing.T) {
 	}, stats)
 }

+func TestCheckMetricsLintOptions(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("Skipping on windows")
+	}
+
+	const testMetrics = `
+# HELP testMetric_CamelCase A test metric with camelCase
+# TYPE testMetric_CamelCase gauge
+testMetric_CamelCase{label="value1"} 1
+`
+
+	tests := []struct {
+		name        string
+		lint        string
+		extended    bool
+		wantErrCode int
+		wantLint    bool
+		wantCard    bool
+	}{
+		{
+			name:        "default_all_with_extended",
+			lint:        lintOptionAll,
+			extended:    true,
+			wantErrCode: lintErrExitCode,
+			wantLint:    true,
+			wantCard:    true,
+		},
+		{
+			name:        "lint_none_with_extended",
+			lint:        lintOptionNone,
+			extended:    true,
+			wantErrCode: successExitCode,
+			wantLint:    false,
+			wantCard:    true,
+		},
+		{
+			name:        "both_disabled_fails",
+			lint:        lintOptionNone,
+			extended:    false,
+			wantErrCode: failureExitCode,
+			wantLint:    false,
+			wantCard:    false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			r, w, err := os.Pipe()
+			require.NoError(t, err)
+			_, err = w.WriteString(testMetrics)
+			require.NoError(t, err)
+			w.Close()
+
+			oldStdin := os.Stdin
+			os.Stdin = r
+			defer func() { os.Stdin = oldStdin }()
+
+			oldStdout := os.Stdout
+			oldStderr := os.Stderr
+			rOut, wOut, err := os.Pipe()
+			require.NoError(t, err)
+			rErr, wErr, err := os.Pipe()
+			require.NoError(t, err)
+			os.Stdout = wOut
+			os.Stderr = wErr
+
+			code := CheckMetrics(tt.extended, tt.lint)
+
+			wOut.Close()
+			wErr.Close()
+			os.Stdout = oldStdout
+			os.Stderr = oldStderr
+
+			var outBuf, errBuf bytes.Buffer
+			_, _ = io.Copy(&outBuf, rOut)
+			_, _ = io.Copy(&errBuf, rErr)
+
+			require.Equal(t, tt.wantErrCode, code)
+			if tt.wantLint {
+				require.Contains(t, errBuf.String(), "testMetric_CamelCase")
+			} else {
+				require.NotContains(t, errBuf.String(), "testMetric_CamelCase")
+			}
+
+			if tt.wantCard {
+				require.Contains(t, outBuf.String(), "Cardinality")
+			} else {
+				require.NotContains(t, outBuf.String(), "Cardinality")
+			}
+		})
+	}
+}
+
 func TestExitCodes(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping test in short mode.")
--- a/docs/command-line/promtool.md
+++ b/docs/command-line/promtool.md
@ -59,7 +59,6 @@ Check the resources for validity.
 | Flag | Description | Default |
 | --- | --- | --- |
 | <code class="text-nowrap">--query.lookback-delta</code> | The server's maximum query lookback duration. | `5m` |
-| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. |  |



@ -192,13 +191,25 @@ Check if the rule files are valid or not.

 ##### `promtool check metrics`

-Pass Prometheus metrics over stdin to lint them for consistency and correctness.
+Pass Prometheus metrics over stdin to lint them for consistency and correctness, and optionally perform cardinality analysis.

 examples:

 $ cat metrics.prom | promtool check metrics

-$ curl -s http://localhost:9090/metrics | promtool check metrics
+$ curl -s http://localhost:9090/metrics | promtool check metrics `--extended`
+
+$ curl -s http://localhost:9100/metrics | promtool check metrics `--extended` `--lint`=none
+
+
+
+###### Flags
+
+| Flag | Description | Default |
+| --- | --- | --- |
+| <code class="text-nowrap">--extended</code> | Print extended information related to the cardinality of the metrics. |  |
+| <code class="text-nowrap">--lint</code> | Linting checks to apply for metrics. Available options are: all, none. Use --lint=none to disable metrics linting. | `all` |
+