Add unauthenticated probe listener for health endpoints

Signed-off-by: Jainil Rana <jainil@Jainils-MacBook-Air.local>
This commit is contained in:
Jainil Rana 2026-01-28 20:26:11 -05:00
parent 5399eb9526
commit 8ca9dc102c
4 changed files with 176 additions and 1 deletions

View file

@ -375,6 +375,8 @@ func main() {
a.Flag("web.listen-address", "Address to listen on for UI, API, and telemetry. Can be repeated.").
Default("0.0.0.0:9090").StringsVar(&cfg.web.ListenAddresses)
a.Flag("web.probe-listen-address", "Address to listen on for unauthenticated health probes (/-/healthy, /-/ready). Can be repeated.").
Default("").StringsVar(&cfg.web.ProbeListenAddresses)
a.Flag("auto-gomaxprocs", "Automatically set GOMAXPROCS to match Linux container CPU quota").
Default("true").BoolVar(&cfg.maxprocsEnable)
@ -1104,6 +1106,12 @@ func main() {
os.Exit(1)
}
probeListeners, err := webHandler.ProbeListeners()
if err != nil {
logger.Error("Unable to start probe listener", "err", err)
os.Exit(1)
}
err = toolkit_web.Validate(*webConfig)
if err != nil {
logger.Error("Unable to validate web configuration file", "err", err)
@ -1165,6 +1173,21 @@ func main() {
},
)
}
{
if len(probeListeners) > 0 {
g.Add(
func() error {
if err := webHandler.RunProbes(ctxWeb, probeListeners); err != nil {
return fmt.Errorf("error starting the probe server: %w", err)
}
return nil
},
func(error) {
cancelWeb()
},
)
}
}
if !agentMode {
// Rule manager.
g.Add(

View file

@ -14,6 +14,7 @@ The Prometheus monitoring server
| <code class="text-nowrap">--config.file</code> | Prometheus configuration file path. | `prometheus.yml` |
| <code class="text-nowrap">--config.auto-reload-interval</code> | Specifies the interval for checking and automatically reloading the Prometheus configuration file upon detecting changes. | `30s` |
| <code class="text-nowrap">--web.listen-address</code> <code class="text-nowrap">...<code class="text-nowrap"> | Address to listen on for UI, API, and telemetry. Can be repeated. | `0.0.0.0:9090` |
| <code class="text-nowrap">--web.probe-listen-address</code> <code class="text-nowrap">...<code class="text-nowrap"> | Address to listen on for unauthenticated health probes (/-/healthy, /-/ready). Can be repeated. | |
| <code class="text-nowrap">--auto-gomaxprocs</code> | Automatically set GOMAXPROCS to match Linux container CPU quota | `true` |
| <code class="text-nowrap">--auto-gomemlimit</code> | Automatically set GOMEMLIMIT to match Linux container or system memory limit | `true` |
| <code class="text-nowrap">--auto-gomemlimit.ratio</code> | The ratio of reserved GOMEMLIMIT memory to the detected maximum container or system memory | `0.9` |

View file

@ -17,6 +17,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
@ -272,7 +273,9 @@ type Options struct {
NotificationsSub func() (<-chan notifications.Notification, func(), bool)
Flags map[string]string
ListenAddresses []string
ListenAddresses []string
ProbeListenAddresses []string
CORSOrigin *regexp.Regexp
ReadTimeout time.Duration
MaxConnections int
@ -672,6 +675,22 @@ func (h *Handler) Listeners() ([]net.Listener, error) {
return listeners, nil
}
func (h *Handler) ProbeListeners() ([]net.Listener, error) {
if len(h.options.ProbeListenAddresses) == 0 {
return nil, nil
}
var listeners []net.Listener
sem := netconnlimit.NewSharedSemaphore(h.options.MaxConnections)
for _, address := range h.options.ProbeListenAddresses {
listener, err := h.Listener(address, sem)
if err != nil {
return listeners, err
}
listeners = append(listeners, listener)
}
return listeners, nil
}
// Listener creates the TCP listener for web requests.
func (h *Handler) Listener(address string, sem chan struct{}) (net.Listener, error) {
h.logger.Info("Start listening for connections", "address", address)
@ -741,6 +760,58 @@ func (h *Handler) Run(ctx context.Context, listeners []net.Listener, webConfig s
}
}
func (h *Handler) RunProbes(ctx context.Context, listeners []net.Listener) error {
if len(listeners) == 0 {
var err error
listeners, err = h.ProbeListeners()
if err != nil || len(listeners) == 0 {
return err
}
}
mux := http.NewServeMux()
mux.HandleFunc("/-/healthy", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet && r.Method != http.MethodHead {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
fmt.Fprintf(w, "%s is Healthy.\n", h.options.AppName)
})
readyHandler := h.testReady(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet && r.Method != http.MethodHead {
w.WriteHeader(http.StatusMethodNotAllowed)
return
}
fmt.Fprintf(w, "%s is Ready.\n", h.options.AppName)
})
mux.Handle("/-/ready", readyHandler)
errlog := slog.NewLogLogger(h.logger.Handler(), slog.LevelError)
httpSrv := &http.Server{
Handler: mux,
ErrorLog: errlog,
ReadTimeout: h.options.ReadTimeout,
}
errCh := make(chan error, len(listeners))
for _, l := range listeners {
go func(ln net.Listener) {
errCh <- httpSrv.Serve(ln)
}(l)
}
select {
case err := <-errCh:
if errors.Is(err, http.ErrServerClosed) {
return nil
}
return err
case <-ctx.Done():
_ = httpSrv.Shutdown(ctx)
return nil
}
}
func (h *Handler) consoles(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
name := route.Param(ctx, "filepath")

View file

@ -32,6 +32,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/config"
@ -39,6 +40,7 @@ import (
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/scrape"
"github.com/prometheus/prometheus/tsdb"
"github.com/prometheus/prometheus/util/features"
"github.com/prometheus/prometheus/util/testutil"
)
@ -209,6 +211,84 @@ func TestReadyAndHealthy(t *testing.T) {
cleanupTestResponse(t, resp)
}
func TestProbeServer(t *testing.T) {
logger := promslog.NewNopLogger()
reg := prometheus.NewRegistry()
dbDir := t.TempDir()
db, err := tsdb.Open(dbDir, nil, nil, nil, nil)
require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, db.Close())
})
port := fmt.Sprintf(":%d", testutil.RandomUnprivilegedPort(t))
opts := &Options{
ListenAddresses: []string{port},
RoutePrefix: "/",
ExternalURL: &url.URL{
Scheme: "http",
Host: "localhost" + port,
Path: "/",
},
LocalStorage: &dbAdapter{db},
TSDBDir: dbDir,
ScrapeManager: &scrape.Manager{},
RuleManager: &rules.Manager{},
Registerer: reg,
Gatherer: reg,
FeatureRegistry: features.DefaultRegistry,
AppName: "Prometheus",
}
opts.Flags = map[string]string{}
h := New(logger, opts)
ln, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer ln.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
errCh := make(chan error, 1)
go func() {
errCh <- h.RunProbes(ctx, []net.Listener{ln})
}()
baseURL := "http://" + ln.Addr().String()
resp, err := http.Get(baseURL + "/-/healthy")
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode)
_ = resp.Body.Close()
resp, err = http.Get(baseURL + "/-/ready")
require.NoError(t, err)
require.Equal(t, http.StatusServiceUnavailable, resp.StatusCode)
_ = resp.Body.Close()
h.SetReady(Ready)
resp, err = http.Get(baseURL + "/-/ready")
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode)
_ = resp.Body.Close()
resp, err = http.Get(baseURL + "/metrics")
require.NoError(t, err)
require.Equal(t, http.StatusNotFound, resp.StatusCode)
_ = resp.Body.Close()
cancel()
select {
case <-errCh:
case <-time.After(2 * time.Second):
t.Fatal("probe server did not stop")
}
}
func TestRoutePrefix(t *testing.T) {
t.Parallel()
dbDir := t.TempDir()