Merge pull request #133844 from pohly/invariants

E2E: check log output to detect DATA RACEs
2026-02-03 20:40:26 -05:00 · 2026-02-04 06:30:27 +05:30 · 2026-02-04 06:30:27 +05:30 · cde1fe6a40
commit cde1fe6a40
parent 1861c1a072 474e0ec8ba
7 changed files with 1995 additions and 50 deletions
--- a/hack/ginkgo-e2e.sh
+++ b/hack/ginkgo-e2e.sh
@ -213,6 +213,13 @@ fi
 # is not used.
 suite_args+=(--report-complete-ginkgo --report-complete-junit)
 # Additional e2e.test arguments. Split into individual arguments at spaces.
 # For more complex arguments pass additional arguments to the script.
 if [[ -n "${KUBE_E2E_TEST_ARGS:-}" ]]; then
  # shellcheck disable=SC2206 # Splitting at word boundaries is intentional here.
  suite_args+=(${KUBE_E2E_TEST_ARGS})
 fi
 # When SIGTERM doesn't reach the E2E test suite binaries, ginkgo will exit
 # without collecting information from about the currently running and
 # potentially stuck tests. This seems to happen when Prow shuts down a test
--- a/test/e2e/dra/kind.yaml
+++ b/test/e2e/dra/kind.yaml
@ -12,6 +12,11 @@ nodes:
 - role: worker
 - role: worker
 kubeadmConfigPatches:
  - |
    kind: KubeletConfiguration
    apiVersion: kubelet.config.k8s.io/v1beta1
    enableSystemLogHandler: true
    enableSystemLogQuery: true
  # v1beta4 for the future (v1.35.0+ ?)
  # https://github.com/kubernetes-sigs/kind/issues/3847
  # TODO: drop v1beta3 when kind makes the switch
@ -78,3 +83,4 @@ kubeadmConfigPatches:
 #    --config <(cat test/e2e/dra/kind.yaml; echo "  <some feature>: true")
 featureGates:
  DynamicResourceAllocation: true
  NodeLogQuery: true
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@ -55,6 +55,7 @@ import (
 	_ "k8s.io/kubernetes/test/e2e/dra"
 	_ "k8s.io/kubernetes/test/e2e/instrumentation"
 	_ "k8s.io/kubernetes/test/e2e/invariants"
 	"k8s.io/kubernetes/test/e2e/invariants/logcheck"
 	_ "k8s.io/kubernetes/test/e2e/kubectl"
 	_ "k8s.io/kubernetes/test/e2e/lifecycle"
 	_ "k8s.io/kubernetes/test/e2e/lifecycle/bootstrap"
@ -78,6 +79,7 @@ func handleFlags() {
 	config.CopyFlags(config.Flags, flag.CommandLine)
 	framework.RegisterCommonFlags(flag.CommandLine)
 	framework.RegisterClusterFlags(flag.CommandLine)
 	logcheck.RegisterFlags(flag.CommandLine)
 	flag.Parse()
 }
--- a/test/e2e/invariants/logcheck/logcheck.go
+++ b/test/e2e/invariants/logcheck/logcheck.go
@ -0,0 +1,667 @@
 /*
 Copyright The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 // Package logcheck extends a Ginkgo test suite with optional monitoring
 // of system log files. Right now it can detect DATA RACE problems.
 //
 // This may get expanded in the future, for example to check also
 // for unhandled and potentially unexpected errors (https://github.com/kubernetes/kubernetes/issues/122005)
 // or log spam (https://github.com/kubernetes/kubernetes/issues/109297).
 //
 // Because it grabs log output as it is produced, it is possible to dump *all*
 // data into $ARTIFACTS/system-logs, in contrast to other approaches which
 // potentially only capture the tail of the output (log rotation).
 //
 // The additional check(s) must be opt-in because they are not portable.
 // The package registers additional command line flags for that.
 // Dedicated jobs are necessary which enable them.
 //
 // For DATA RACE, the binaries in the cluster must be compiled with race
 // detection (https://github.com/kubernetes/kubernetes/pull/133834/files).
 // It's also necessary to specify which containers to check. Other output
 // (e.g. kubelet in a kind cluster) currently cannot be checked.
 //
 // In ginkgo.ReportBeforeSuite, the main Ginkgo worker process starts
 // monitoring log output:
 //
 //   - The podlogs package watches pods in namespaces of interest (by default,
 //     any that have "system" in their name). For each container in those pods,
 //     a logOutputChecker instance is created and is passed log output lines
 //     via Write. podlogs streams output, similar to "kubectl logs -f".
 //
 //   - Kubelet log output is retrieved periodically via the node log query
 //     feature. Start and end times of each chunk are chosen so that there
 //     are no gaps. The output is stripped of additional journald line
 //     headers and then passed to one logOutputChecker per node.
 //
 //   - logOutputChecker detects DATA RACE reports based on their prefix and suffix lines.
 //
 //   - Log entries emitted during monitoring are buffered.
 //
 // In ginkgo.ReportAfterSuite, the log output is emitted as stdout of the check
 // together with a summary of what was checked without detecting problems.
 // DATA RACEs and unexpected log errors are turned into a failure.
 package logcheck
 import (
 	"bufio"
 	"context"
 	"errors"
 	"flag"
 	"fmt"
 	"io"
 	"maps"
 	"os"
 	"path"
 	"regexp"
 	"slices"
 	"strconv"
 	"strings"
 	"sync"
 	"time"
 	"github.com/onsi/ginkgo/v2"
 	v1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/klog/v2"
 	"k8s.io/klog/v2/textlogger"
 	"k8s.io/kubernetes/test/e2e/framework"
 	"k8s.io/kubernetes/test/e2e/storage/podlogs"
 )
 const (
 	// defaultNamespacesRE is the default regular expression for which namespaces need to be checked,
 	// if checking is enabled: anything which contains the word "system".
 	//
 	// If we had an API for identifying "system" namespaces, we could use that, but we
 	// don't have that yet (https://github.com/kubernetes/enhancements/issues/5708).
 	defaultNamespacesRE = `system`
 	// defaultNodesRE is the default regular expression for which nodes
 	// should have their kubelet log output checked via the log query
 	// feature (https://kubernetes.io/docs/concepts/cluster-administration/system-logs/#log-query).
 	defaultNodesRE = `.*`
 	dataRaceStart     = "WARNING: DATA RACE\n"
 	dataRaceEnd       = "==================\n"
 	maxBacktraceLines = 20
 )
 var (
 	enabledLogChecks = logCheck{
 		namespaces: regexpValue{
 			re: regexp.MustCompile(defaultNamespacesRE),
 		},
 		nodes: regexpValue{
 			re: regexp.MustCompile(defaultNodesRE),
 		},
 	}
 	lc       *logChecker
 	lcLogger klog.Logger
 )
 // logCheck determines what gets checked in log output.
 type logCheck struct {
 	// namespaces contains a regular expression which determines which namespaces need to be checked.
 	// All containers in those namespaces are checked.
 	// If the regular expression is nil, the default is used.
 	namespaces regexpValue
 	// namespaces contains a regular expression which determines which nodes need to be checked
 	// by retrieving the kubelet log via the log query feature (https://kubernetes.io/docs/concepts/cluster-administration/system-logs/#log-query).
 	nodes regexpValue
 	// dataRaces enables checking for "DATA RACE" reports.
 	dataRaces bool
 	// More checks may get added in the future.
 }
 // any returns true if any log output check is enabled.
 func (c logCheck) any() bool {
 	// Needs to be extended when adding new checks.
 	return c.dataRaces
 }
 // regexpValue implements flag.Value for a regular expression.
 type regexpValue struct {
 	re *regexp.Regexp
 }
 var _ flag.Value = &regexpValue{}
 func (r *regexpValue) String() string { return r.re.String() }
 func (r *regexpValue) Set(expr string) error {
 	re, err := regexp.Compile(expr)
 	if err != nil {
 		// This already starts with "error parsing regexp" and
 		// the caller adds the expression string,
 		// so no need to wrap the error here.
 		return err
 	}
 	r.re = re
 	return nil
 }
 // RegisterFlags adds command line flags for configuring the package to the given flag set.
 // They have "logcheck" as prefix.
 func RegisterFlags(fs *flag.FlagSet) {
 	fs.BoolVar(&enabledLogChecks.dataRaces, "logcheck-data-races", false, "enables checking logs for DATA RACE warnings")
 	fs.Var(&enabledLogChecks.namespaces, "logcheck-namespaces-regexp", "all namespaces matching this regular expressions get checked")
 	fs.Var(&enabledLogChecks.nodes, "logcheck-nodes-regexp", "all kubelets on nodes matching this regular expressions get checked")
 }
 var _ = ginkgo.ReportBeforeSuite(func(ctx ginkgo.SpecContext, report ginkgo.Report) {
 	if report.SuiteConfig.DryRun {
 		return
 	}
 	// This is only reached in the main process.
 	initialize()
 })
 // SIG Testing runs this as a service for the SIGs which own the code.
 var _ = ginkgo.ReportAfterSuite("[sig-testing] Log Check", func(ctx ginkgo.SpecContext, report ginkgo.Report) {
 	if report.SuiteConfig.DryRun {
 		return
 	}
 	// This is reached only in the main process after the test run has completed.
 	finalize()
 })
 // initialize gets called once before tests start to run.
 // It sets up monitoring of system component log output, if requested.
 func initialize() {
 	if !enabledLogChecks.any() {
 		return
 	}
 	config, err := framework.LoadConfig()
 	framework.ExpectNoError(err, "loading client config")
 	client, err := kubernetes.NewForConfig(config)
 	framework.ExpectNoError(err, "creating client")
 	ctx, l, err := newLogChecker(context.Background(), client, enabledLogChecks, framework.TestContext.ReportDir)
 	framework.ExpectNoError(err, "set up log checker")
 	lc = l
 	lcLogger = klog.FromContext(ctx)
 	lc.start(ctx, podlogs.CopyAllLogs, kubeletLogQuery)
 }
 // finalize gets called once after tests have run, in the process where initialize was also called.
 func finalize() {
 	if lc == nil {
 		return
 	}
 	failure, stdout := lc.stop(lcLogger)
 	_, _ = ginkgo.GinkgoWriter.Write([]byte(stdout))
 	if failure != "" {
 		// Reports as post-suite failure.
 		ginkgo.Fail(failure)
 	}
 }
 func newLogChecker(ctx context.Context, client kubernetes.Interface, check logCheck, logDir string) (context.Context, *logChecker, error) {
 	var logFile string
 	if logDir != "" {
 		// Redirect log output. Also, check for errors when we stop, those mustn't go unnoticed in a CI job.
 		logFile = path.Join(logDir, "system-logs.log")
 		output, err := os.Create(logFile)
 		if err != nil {
 			return ctx, nil, fmt.Errorf("create log file: %w", err)
 		}
 		// Allow increasing verbosity via the command line, but always use at
 		// least 4 when writing into a file - we can afford that.
 		vflag := flag.CommandLine.Lookup("v")
 		v, _ := strconv.Atoi(vflag.Value.String())
 		if v < 5 {
 			v = 5
 		}
 		logger := textlogger.NewLogger(textlogger.NewConfig(textlogger.Output(output), textlogger.Verbosity((int(v)))))
 		ctx = klog.NewContext(ctx, logger)
 	}
 	if check.namespaces.re == nil {
 		check.namespaces.re = regexp.MustCompile(defaultNamespacesRE)
 	}
 	if check.nodes.re == nil {
 		check.nodes.re = regexp.MustCompile(defaultNodesRE)
 	}
 	ctx, cancel := context.WithCancelCause(ctx)
 	return ctx, &logChecker{
 		wg:        newWaitGroup(),
 		client:    client,
 		cancel:    cancel,
 		check:     check,
 		logDir:    logDir,
 		logFile:   logFile,
 		dataRaces: make(map[string][][]string),
 	}, nil
 }
 type logChecker struct {
 	client  kubernetes.Interface
 	cancel  func(err error)
 	check   logCheck
 	logDir  string
 	logFile string
 	// wg tracks background activity.
 	wg *waitGroup
 	// mutex protects all following fields.
 	mutex sync.Mutex
 	// All data races detected so far, indexed by "<namespace>/<pod name>/<container name>"
 	// or "kubelet/<node name>".
 	//
 	// The last entry is initially empty while waiting for the next data race.
 	//
 	// Only entities for which at least some output was received get added here,
 	// therefore also the "<entity>: okay" part of the report only appears for those.
 	dataRaces map[string][][]string
 }
 // stop cancels pod monitoring, waits until that is shut down, and then produces text for a failure message (ideally empty) and stdout.
 func (l *logChecker) stop(logger klog.Logger) (failure, stdout string) {
 	logger.V(4).Info("Asking log monitors to stop")
 	l.cancel(errors.New("asked to stop"))
 	// Wait for completion.
 	l.wg.wait()
 	// Now we can proceed and produce the report.
 	l.mutex.Lock()
 	defer l.mutex.Unlock()
 	var failureBuffer strings.Builder
 	var stdoutBuffer strings.Builder
 	if l.logFile != "" {
 		logData, err := os.ReadFile(l.logFile)
 		if err != nil {
 			stdoutBuffer.WriteString(fmt.Sprintf("Reading %s failed: %v", l.logFile, err))
 		} else {
 			stdoutBuffer.Write(logData)
 		}
 		// Find all error log lines. A bit crude (doesn't handle multi-line outout), but good enough
 		// because the full log is available.
 		errorLogs := regexp.MustCompile(`(?m)^E.*\n`).FindAllString(string(logData), -1)
 		if len(errorLogs) > 0 {
 			failureBuffer.WriteString("Unexpected errors during log data collection (see stdout for full log):\n\n")
 			for _, errorLog := range errorLogs {
 				// Indented to make it a verbatim block in Markdown.
 				failureBuffer.WriteString("    ")
 				failureBuffer.WriteString(errorLog)
 			}
 		}
 	}
 	keys := slices.AppendSeq([]string(nil), maps.Keys(l.dataRaces))
 	slices.Sort(keys)
 	for _, k := range keys {
 		races := l.dataRaces[k]
 		buffer := &failureBuffer
 		if len(races) == 0 {
 			buffer = &stdoutBuffer
 		}
 		if buffer.Len() > 0 {
 			buffer.WriteString("\n")
 		}
 		buffer.WriteString("#### " + k + "\n")
 		if len(races) == 0 {
 			buffer.WriteString("\nOkay.\n")
 			continue
 		}
 		for _, race := range races {
 			indent := "    "
 			buffer.WriteString("\n")
 			if len(races) > 1 {
 				// Format as bullet-point list.
 				buffer.WriteString("- DATA RACE:\n  \n")
 				// This also shifts the text block to the right.
 				indent += "  "
 			} else {
 				// Single line of intro text, then the text block.
 				buffer.WriteString("DATA RACE:\n\n")
 			}
 			// Indented lines are stack backtraces. Those can be long,
 			// so collect them as we iterate over lines and truncate in the middle.
 			var backtrace []string
 			dumpBacktrace := func() {
 				if len(backtrace) == 0 {
 					return
 				}
 				if len(backtrace) > maxBacktraceLines {
 					head := backtrace[0 : maxBacktraceLines/2]
 					tail := backtrace[len(backtrace)-maxBacktraceLines/2:]
 					backtrace = slices.Clone(head)
 					backtrace = append(backtrace, "  ...\n")
 					backtrace = append(backtrace, tail...)
 				}
 				for _, line := range backtrace {
 					buffer.WriteString(indent)
 					buffer.WriteString(line)
 				}
 				backtrace = nil
 			}
 			for _, line := range race {
 				if !strings.HasPrefix(line, " ") {
 					// Non-backtrace line => flush and write the line.
 					dumpBacktrace()
 					buffer.WriteString(indent)
 					buffer.WriteString(line)
 					continue
 				}
 				backtrace = append(backtrace, line)
 			}
 			dumpBacktrace()
 		}
 	}
 	return failureBuffer.String(), stdoutBuffer.String()
 }
 func (l *logChecker) start(ctx context.Context, startCopyAllLogs func(ctx context.Context, cs kubernetes.Interface, ns string, to podlogs.LogOutput) error, startNodeLog func(ctx context.Context, cs kubernetes.Interface, wg *waitGroup, nodeName string) io.Reader) {
 	if !l.check.any() {
 		return
 	}
 	logger := klog.FromContext(ctx)
 	// Figure out which namespace(s) to watch.
 	namespaces, err := l.client.CoreV1().Namespaces().List(ctx, metav1.ListOptions{})
 	if err != nil {
 		logger.Error(err, "Failed to list namespaces")
 		return
 	}
 	// Same for nodes.
 	nodes, err := l.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
 	if err != nil {
 		logger.Error(err, "Failed to list nodes")
 		return
 	}
 	for _, namespace := range namespaces.Items {
 		if !l.check.namespaces.re.MatchString(namespace.Name) {
 			continue
 		}
 		to := podlogs.LogOutput{
 			StatusWriter:  &statusWriter{logger: logger, namespace: &namespace},
 			LogPathPrefix: path.Join(l.logDir, namespace.Name) + "/",
 			LogOpen: func(podName, containerName string) io.Writer {
 				return l.logOpen(logger, namespace.Name, podName, containerName)
 			},
 		}
 		logger.Info("Watching", "namespace", klog.KObj(&namespace))
 		if err := startCopyAllLogs(ctx, l.client, namespace.Name, to); err != nil {
 			logger.Error(err, "Log output collection failed", "namespace", klog.KObj(&namespace))
 		}
 	}
 	for _, node := range nodes.Items {
 		if !l.check.nodes.re.MatchString(node.Name) {
 			continue
 		}
 		logger.Info("Watching", "node", klog.KObj(&node))
 		kubeletLog := startNodeLog(ctx, l.client, l.wg, node.Name)
 		l.wg.goIfNotShuttingDown(nil, func() {
 			scanner := bufio.NewScanner(kubeletLog)
 			writer := logOutputChecker{
 				logger: logger,
 				k:      "kubelet/" + node.Name,
 				l:      l,
 			}
 			for scanner.Scan() {
 				// We need to strip whatever headers might have been added by the log storage
 				// that was queried the log query feature. We don't exactly know what that might be,
 				// so let's use a regexp that matches all known line headers.
 				//
 				// Unknown lines are passed through, which is okayish (they get treated like
 				// unknown, raw output from the kubelet).
 				line := scanner.Text()
 				line = logQueryLineHeaderRE.ReplaceAllString(line, "")
 				_, _ = writer.Write([]byte(line + "\n"))
 			}
 			if err := scanner.Err(); err != nil {
 				logger.Error(err, "Reading kubelet log failed", "node", klog.KObj(&node))
 			}
 		})
 	}
 }
 // journaldHeader matches journald lines containing output from the kubelet:
 //
 //	Jan 06 15:20:26.641748 kind-worker2 kubelet[311]: I0106 15:20:26.641743     311 labels.go:289] ...
 //
 // We also get messages from systemd itself. Those are not matched:
 //
 //	Jan 07 08:35:52.139136 kind-worker3 systemd[1]: Started kubelet.service - kubelet: The Kubernetes Node Agent.
 const journaldHeader = `... .. ..:..:......... \S+ kubelet\[\d+\]: `
 // logQueryLineHeaderRE combines all supported log query data formats.
 // Currently only journald is supported.
 var logQueryLineHeaderRE = regexp.MustCompile(`^(?:` + journaldHeader + `)`)
 func (l *logChecker) logOpen(logger klog.Logger, names ...string) io.Writer {
 	if !l.check.dataRaces {
 		return nil
 	}
 	if !l.wg.add(1) {
 		return io.Discard
 	}
 	k := strings.Join(names, "/")
 	logger.Info("Starting to check for data races", "container", k)
 	return &logOutputChecker{logger: logger, k: k, l: l}
 }
 type statusWriter struct {
 	logger    klog.Logger
 	namespace *v1.Namespace
 }
 // Write gets called with text that describes problems encountered while monitoring pods and their output.
 func (s *statusWriter) Write(data []byte) (int, error) {
 	msg := string(data)
 	msg = strings.TrimRight(msg, "\n")
 	if strings.HasPrefix(msg, "WARNING:") {
 		// Warnings are informational, podlogs recovers from problems (like "connection refused"
 		// under load, "pod not found" when a pod terminates while trying to read log output).
 		// They don't need to be treated as a logcheck failure when looking at logs later.
 		s.logger.Info("PodLogs status", "namespace", klog.KObj(s.namespace), "msg", msg)
 	} else {
 		s.logger.Error(nil, "PodLogs status", "namespace", klog.KObj(s.namespace), "msg", msg)
 	}
 	return len(msg), nil
 }
 // logOutputChecker receives log output for one component and detects embedded DATA RACE reports.
 type logOutputChecker struct {
 	logger     klog.Logger
 	k          string
 	l          *logChecker
 	inDataRace bool
 }
 var (
 	_ io.Writer = &logOutputChecker{}
 	_ io.Closer = &logOutputChecker{}
 )
 // Write gets called for each line of output received from a container or kubelet instance.
 // The line ends with a newline.
 func (p *logOutputChecker) Write(l []byte) (int, error) {
 	line := string(l)
 	p.l.mutex.Lock()
 	defer p.l.mutex.Unlock()
 	races := p.l.dataRaces[p.k]
 	switch {
 	case p.inDataRace && line != dataRaceEnd:
 		races[len(races)-1] = append(races[len(races)-1], line)
 	case p.inDataRace && line == dataRaceEnd:
 		// Stop collecting data race lines.
 		p.inDataRace = false
 		p.logger.Info("Completed data race", "container", p.k, "count", len(races), "dataRace", strings.Join(races[len(races)-1], ""))
 	case !p.inDataRace && line == dataRaceStart:
 		// Start collecting data race lines.
 		p.inDataRace = true
 		races = append(races, nil)
 		p.logger.Info("Started new data race", "container", p.k, "count", len(races))
 	default:
 		// Some other log output.
 	}
 	p.l.dataRaces[p.k] = races
 	return len(l), nil
 }
 // Close gets called once all output is processed.
 func (p *logOutputChecker) Close() error {
 	p.logger.Info("Done checking for data races", "container", p.k)
 	p.l.wg.done()
 	return nil
 }
 // kubeletLogQuery sets up repeatedly querying the log of kubelet on the given node
 // via the log query feature.
 //
 // All background goroutines stop when the context gets canceled and are added
 // to the given WaitGroup before kubeletLogQuery returns. The reader will
 // stop providing data when that happens.
 //
 // The lines in the data written to the reader may contain journald headers or other,
 // platform specific headers (this is not specified for the log query feature).
 func kubeletLogQuery(ctx context.Context, cs kubernetes.Interface, wg *waitGroup, nodeName string) io.Reader {
 	logger := klog.FromContext(ctx)
 	logger = klog.LoggerWithName(logger, "KubeletLogQuery")
 	logger = klog.LoggerWithValues(logger, "node", klog.KRef("", nodeName))
 	reader, writer := io.Pipe()
 	wg.goIfNotShuttingDown(func() {
 		_ = writer.Close()
 	}, func() {
 		logger.V(4).Info("Started")
 		defer func() {
 			logger.V(4).Info("Stopped", "reason", context.Cause(ctx))
 		}()
 		// How long to wait between queries is a compromise between "too long" (= too much data)
 		// and "too short" (= too much overhead because of frequent queries). It's not clear
 		// where the sweet spot is. With the current default, there were 12 calls for one worker node
 		// during a ~1h pull-kubernetes-e2e-kind-alpha-beta-features run, with an average result
 		// size of ~16MB.
 		//
 		// All log checking activities share the same client instance. It's created specifically
 		// for that purpose, so client-side throttling does not affect other tests.
 		ticker := time.NewTicker(300 * time.Second)
 		defer ticker.Stop()
 		var since time.Time
 		for {
 			select {
 			case <-ctx.Done():
 				logger.V(4).Info("Asked to stop, will query log one last time", "reason", context.Cause(ctx))
 			case <-ticker.C:
 				logger.V(6).Info("Starting periodic log query")
 			}
 			// Query once also when asked to stop via cancelation, to get the tail of the output.
 			// We use a timeout to prevent blocking forever here.
 			err := func() error {
 				ctx := context.WithoutCancel(ctx)
 				ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
 				defer cancel()
 				now := time.Now()
 				untilTime := now.Format(time.RFC3339)
 				sinceTime := ""
 				if !since.IsZero() {
 					sinceTime = since.Format(time.RFC3339)
 				}
 				// The next query will use the current end time as start time. Both start and end
 				// are inclusive, so unfortunately there is a risk that we get the same log
 				// output for that time stamp twice. There's no good way to avoid that.
 				// Let's hope it's rare and doesn't matter when it occurs. Missing
 				// output would be worse because then the end marker of a DATA RACE
 				// report could be missed.
 				since = now
 				req := cs.CoreV1().RESTClient().Post().
 					Resource("nodes").
 					Name(nodeName).
 					SubResource("proxy").
 					Suffix("logs").
 					Param("query", "kubelet").
 					Param("untilTime", untilTime)
 				if sinceTime != "" {
 					req = req.Param("sinceTime", sinceTime)
 				}
 				data, err := req.DoRaw(ctx)
 				if loggerV := logger.V(4); loggerV.Enabled() {
 					head := string(data)
 					if len(head) > 30 {
 						head = head[:30]
 						head += "..."
 					}
 					loggerV.Info("Queried log", "sinceTime", sinceTime, "endTime", now, "len", len(data), "data", head, "err", err)
 				}
 				// Let's process whatever data we have. The exact result of the query is a bit
 				// underspecified. This is based on observed behavior in Kubernetes 1.35.
 				// HTML seems to be what the kubelet responds when the feature is disabled?!
 				// May or may not be intentional according to a Slack discussion,
 				// to be clarified in https://github.com/kubernetes/kubernetes/issues/136275.
 				if strings.HasPrefix(string(data), "<!doctype html>") {
 					return fmt.Errorf("unexpected result of log query (feature disabled?): %q", string(data))
 				}
 				// Skip the special string that is used for "no data available".
 				if string(data) != "-- No entries --\n" {
 					_, _ = writer.Write(data)
 				}
 				return err
 			}()
 			if err != nil {
 				logger.Error(err, "Log query failed")
 				return
 			}
 			if ctx.Err() != nil {
 				return
 			}
 		}
 	})
 	return reader
 }
--- a/test/e2e/invariants/logcheck/logcheck_test.go
+++ b/test/e2e/invariants/logcheck/logcheck_test.go
--- a/test/e2e/invariants/logcheck/waitgroup.go
+++ b/test/e2e/invariants/logcheck/waitgroup.go
@ -0,0 +1,91 @@
 /*
 Copyright The Kubernetes Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 package logcheck
 import (
 	"sync"
 )
 // waitGroup, in contrast to sync.WaitGroup, supports races between
 // go and wait. Once wait is called, no new goroutines are started.
 type waitGroup struct {
 	mutex sync.Mutex
 	cond  *sync.Cond
 	shuttingDown bool
 	running      int
 }
 func (wg *waitGroup) wait() {
 	wg.mutex.Lock()
 	defer wg.mutex.Unlock()
 	wg.shuttingDown = true
 	for wg.running > 0 {
 		wg.cond.Wait()
 	}
 }
 func (wg *waitGroup) add(n int) bool {
 	wg.mutex.Lock()
 	defer wg.mutex.Unlock()
 	if wg.shuttingDown {
 		return false
 	}
 	wg.running += n
 	return true
 }
 func (wg *waitGroup) done() {
 	wg.mutex.Lock()
 	defer wg.mutex.Unlock()
 	wg.running--
 	wg.cond.Broadcast()
 }
 // goIfNotShuttingDown executes the callback in a goroutine if the wait group
 // is not already shutting down. It always calls the cleanup callback once, either way.
 func (wg *waitGroup) goIfNotShuttingDown(cleanup, cb func()) {
 	wg.mutex.Lock()
 	defer wg.mutex.Unlock()
 	if cleanup == nil {
 		cleanup = func() {}
 	}
 	if wg.shuttingDown {
 		// Clean up directly.
 		cleanup()
 		return
 	}
 	wg.running++
 	go func() {
 		defer wg.done()
 		defer cleanup()
 		cb()
 	}()
 }
 func newWaitGroup() *waitGroup {
 	var wg waitGroup
 	wg.cond = sync.NewCond(&wg.mutex)
 	return &wg
 }
--- a/test/e2e/storage/podlogs/podlogs.go
+++ b/test/e2e/storage/podlogs/podlogs.go
@ -41,20 +41,36 @@ import (
 )
 // LogOutput determines where output from CopyAllLogs goes.
 //
 // Error messages about receiving log output is kept
 // separate from the log output and optionally goes to StatusWriter
 //
 // The log output can go to one or more possible destinations.
 type LogOutput struct {
-	// If not nil, errors will be logged here.
+	// If not nil, errors encountered will be logged here.
 	StatusWriter io.Writer
-	// If not nil, all output goes to this writer with "<pod>/<container>:" as prefix.
+	// If not nil, all container output goes to this writer with "<pod>/<container>:" as prefix.
 	LogWriter io.Writer
 	// Base directory for one log file per container.
-	// The full path of each log file will be <log path prefix><pod>-<container>.log.
+	// The full path of each log file will be <log path prefix><pod>-<container>.log,
 	// if not empty.
 	LogPathPrefix string
 	// LogOpen, if not nil, gets called whenever log output watching starts for
 	// a certain container. Returning nil means that the output can be discarded
 	// unless it gets written elsewhere.
 	//
 	// The container's stdout and stderr output get written to the returned writer.
 	// The writer gets closed once all output is processed if the writer implements io.Closer.
 	// Each write is a single line, including a newline.
 	// Write errors are ignored.
 	LogOpen func(podName, containerName string) io.Writer
 }
 // Matches harmless errors from pkg/kubelet/kubelet_pods.go.
-var expectedErrors = regexp.MustCompile(`container .* in pod .* is (terminated|waiting to start|not available)|the server could not find the requested resource`)
+var expectedErrors = regexp.MustCompile(`container .* in pod .* is (terminated|waiting to start|not available)|the server could not find the requested resource|context canceled`)
 // CopyPodLogs is basically CopyPodLogs for all current or future pods in the given namespace ns
 func CopyAllLogs(ctx context.Context, cs clientset.Interface, ns string, to LogOutput) error {
@ -92,7 +108,7 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 			FieldSelector: fmt.Sprintf("metadata.name=%s", podName),
 		}
 	}
-	watcher, err := cs.CoreV1().Pods(ns).Watch(context.TODO(), options)
+	watcher, err := cs.CoreV1().Pods(ns).Watch(ctx, options)
 	if err != nil {
 		return fmt.Errorf("cannot create Pod event watcher: %w", err)
@ -104,15 +120,17 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 		active := map[string]bool{}
 		// Key is pod/container/container-id, true if we have ever started to capture its output.
 		started := map[string]bool{}
 		// Key is pod/container/container-id, value the time stamp of the last log line that has been seen.
 		latest := map[string]*meta.Time{}
 		check := func() {
 			m.Lock()
 			defer m.Unlock()
-			pods, err := cs.CoreV1().Pods(ns).List(context.TODO(), options)
+			pods, err := cs.CoreV1().Pods(ns).List(ctx, options)
 			if err != nil {
-				if to.StatusWriter != nil {
+				if ctx.Err() == nil && to.StatusWriter != nil {
-					fmt.Fprintf(to.StatusWriter, "ERROR: get pod list in %s: %s\n", ns, err)
+					_, _ = fmt.Fprintf(to.StatusWriter, "ERROR: get pod list in %s: %s\n", ns, err)
 				}
 				return
 			}
@ -139,41 +157,28 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 							pod.Status.ContainerStatuses[i].State.Terminated == nil) {
 						continue
 					}
 					readCloser, err := logsForPod(ctx, cs, ns, pod.ObjectMeta.Name,
 						&v1.PodLogOptions{
 							Container: c.Name,
 							Follow:    true,
 						})
 					if err != nil {
 						// We do get "normal" errors here, like trying to read too early.
 						// We can ignore those.
 						if to.StatusWriter != nil &&
 							expectedErrors.FindStringIndex(err.Error()) == nil {
 							fmt.Fprintf(to.StatusWriter, "WARNING: pod log: %s: %s\n", name, err)
 						}
 						continue
 					}
 					// Determine where we write. If this fails, we intentionally return without clearing
 					// the active[name] flag, which prevents trying over and over again to
 					// create the output file.
-					var out io.Writer
+					var logWithPrefix, logWithoutPrefix, output io.Writer
 					var closer io.Closer
 					var prefix string
 					podHandled := false
 					if to.LogWriter != nil {
-						out = to.LogWriter
+						podHandled = true
 						logWithPrefix = to.LogWriter
 						nodeName := pod.Spec.NodeName
 						if len(nodeName) > 10 {
 							nodeName = nodeName[0:4] + ".." + nodeName[len(nodeName)-4:]
 						}
 						prefix = name + "@" + nodeName + ": "
-					} else {
+					}
-						var err error
+					if to.LogPathPrefix != "" {
 						podHandled = true
 						filename := to.LogPathPrefix + pod.ObjectMeta.Name + "-" + c.Name + ".log"
-						err = os.MkdirAll(path.Dir(filename), 0755)
+						if err := os.MkdirAll(path.Dir(filename), 0755); err != nil {
 						if err != nil {
 							if to.StatusWriter != nil {
-								fmt.Fprintf(to.StatusWriter, "ERROR: pod log: create directory for %s: %s\n", filename, err)
+								_, _ = fmt.Fprintf(to.StatusWriter, "ERROR: pod log: create directory for %s: %s\n", filename, err)
 							}
 							return
 						}
@ -182,26 +187,66 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 						file, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 						if err != nil {
 							if to.StatusWriter != nil {
-								fmt.Fprintf(to.StatusWriter, "ERROR: pod log: create file %s: %s\n", filename, err)
+								_, _ = fmt.Fprintf(to.StatusWriter, "ERROR: pod log: create file %s: %s\n", filename, err)
 							}
 							return
 						}
-						closer = file
+						logWithoutPrefix = file
 						out = file
 					}
-					go func() {
+					if to.LogOpen != nil {
-						if closer != nil {
+						if writer := to.LogOpen(pod.Name, c.Name); writer != nil {
-							defer closer.Close()
+							podHandled = true
 							output = writer
 						}
 					}
 					if !podHandled {
 						// No-one is interested in this pod, don't bother...
 						continue
 					}
 					closeOutput := func() {
 						// Execute all of these, even if one one of them panics.
 						defer maybeClose(logWithPrefix)
 						defer maybeClose(logWithoutPrefix)
 						defer maybeClose(output)
 					}
 					sinceTime := latest[id]
 					readCloser, err := logsForPod(ctx, cs, ns, pod.ObjectMeta.Name,
 						&v1.PodLogOptions{
 							Container:  c.Name,
 							Follow:     true,
 							Timestamps: true,
 							SinceTime:  sinceTime,
 						})
 					if err != nil {
 						closeOutput()
 						// We do get "normal" errors here, like trying to read too early.
 						// We can ignore those.
 						if to.StatusWriter != nil &&
 							expectedErrors.FindStringIndex(err.Error()) == nil {
 							_, _ = fmt.Fprintf(to.StatusWriter, "WARNING: pod log: %s: %s\n", name, err)
 						}
 						continue
 					}
 					active[name] = true
 					started[id] = true
 					go func() {
 						defer closeOutput()
 						first := true
 						defer func() {
 							m.Lock()
 							// If we never printed anything, then also skip the final message.
 							if !first {
-								if prefix != "" {
+								if logWithPrefix != nil {
-									fmt.Fprintf(out, "%s==== end of pod log ====\n", prefix)
+									_, _ = fmt.Fprintf(logWithPrefix, "%s==== end of pod log at %s ====\n", prefix, latest[id])
-								} else {
+								}
-									fmt.Fprintf(out, "==== end of pod log for container %s ====\n", name)
+								if logWithoutPrefix != nil {
 									_, _ = fmt.Fprintf(logWithoutPrefix, "==== end of pod log for container %s at %s ====\n", name, latest[id])
 								}
 							}
 							active[name] = false
@ -209,6 +254,7 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 							readCloser.Close()
 						}()
 						scanner := bufio.NewScanner(readCloser)
 						var latestTS time.Time
 						for scanner.Scan() {
 							line := scanner.Text()
 							// Filter out the expected "end of stream" error message,
@ -222,30 +268,62 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 									// Because the same log might be written to multiple times
 									// in different test instances, log an extra line to separate them.
 									// Also provides some useful extra information.
-									if prefix == "" {
+									since := "(initial part)"
-										fmt.Fprintf(out, "==== start of pod log for container %s ====\n", name)
+									if sinceTime != nil {
-									} else {
+										since = fmt.Sprintf("since %s", sinceTime)
-										fmt.Fprintf(out, "%s==== start of pod log ====\n", prefix)
+									}
 									if logWithPrefix != nil {
 										_, _ = fmt.Fprintf(logWithPrefix, "%s==== start of pod log %s ====\n", prefix, since)
 									}
 									if logWithoutPrefix != nil {
 										_, _ = fmt.Fprintf(logWithoutPrefix, "==== start of pod log for container %s %s ====\n", name, since)
 									}
 									first = false
 								}
-								fmt.Fprintf(out, "%s%s\n", prefix, line)
+								index := strings.Index(line, " ")
 								if index > 0 {
 									ts, err := time.Parse(time.RFC3339, line[:index])
 									if err == nil {
 										latestTS = ts
 										// Log output typically has it's own log header with a time stamp,
 										// so let's strip the PodLogOptions time stamp.
 										line = line[index+1:]
 									}
 								}
 								if logWithPrefix != nil {
 									_, _ = fmt.Fprintf(logWithPrefix, "%s%s\n", prefix, line)
 								}
 								if logWithoutPrefix != nil {
 									_, _ = fmt.Fprintln(logWithoutPrefix, line)
 								}
 								if output != nil {
 									_, _ = output.Write([]byte(line + "\n"))
 								}
 							}
 						}
 						if !latestTS.IsZero() {
 							m.Lock()
 							defer m.Unlock()
 							latest[id] = &meta.Time{Time: latestTS}
 						}
 					}()
 					active[name] = true
 					started[id] = true
 				}
 			}
 		}
 		// Watch events to see whether we can start logging
-		// and log interesting ones.
+		// and log interesting ones. Also check periodically,
 		// in case of failures which are not followed by
 		// some pod change.
 		ticker := time.NewTicker(30 * time.Second)
 		defer ticker.Stop()
 		check()
 		for {
 			select {
 			case <-watcher.ResultChan():
 				check()
 			case <-ticker.C:
 			case <-ctx.Done():
 				return
 			}
@ -255,6 +333,12 @@ func CopyPodLogs(ctx context.Context, cs clientset.Interface, ns, podName string
 	return nil
 }
 func maybeClose(writer io.Writer) {
 	if closer, ok := writer.(io.Closer); ok {
 		_ = closer.Close()
 	}
 }
 // logsForPod starts reading the logs for a certain pod. If the pod has more than one
 // container, opts.Container must be set. Reading stops when the context is done.
 // The stream includes formatted error messages and ends with
@ -276,7 +360,7 @@ func WatchPods(ctx context.Context, cs clientset.Interface, ns string, to io.Wri
 		}
 	}()
-	pods, err := cs.CoreV1().Pods(ns).Watch(context.Background(), meta.ListOptions{})
+	pods, err := cs.CoreV1().Pods(ns).Watch(ctx, meta.ListOptions{})
 	if err != nil {
 		return fmt.Errorf("cannot create Pod watcher: %w", err)
 	}
@ -286,7 +370,7 @@ func WatchPods(ctx context.Context, cs clientset.Interface, ns string, to io.Wri
 		}
 	}()
-	events, err := cs.CoreV1().Events(ns).Watch(context.Background(), meta.ListOptions{})
+	events, err := cs.CoreV1().Events(ns).Watch(ctx, meta.ListOptions{})
 	if err != nil {
 		return fmt.Errorf("cannot create Event watcher: %w", err)
 	}