This commit is contained in:
Alima Azamat 2026-02-04 02:09:08 +01:00 committed by GitHub
commit e7b7d72d51
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 666 additions and 77 deletions

View file

@ -640,6 +640,7 @@ func NewMainKubelet(ctx context.Context,
nodeStatusMaxImages: nodeStatusMaxImages,
tracer: tracer,
nodeStartupLatencyTracker: kubeDeps.NodeStartupLatencyTracker,
podStartupLatencyTracker: kubeDeps.PodStartupLatencyTracker,
healthChecker: kubeDeps.HealthChecker,
flagz: kubeDeps.Flagz,
}
@ -795,10 +796,11 @@ func NewMainKubelet(ctx context.Context,
kubeCfg.MemorySwap.SwapBehavior,
kubeDeps.ContainerManager.GetNodeAllocatableAbsolute,
*kubeCfg.MemoryThrottlingFactor,
kubeDeps.PodStartupLatencyTracker,
klet.podStartupLatencyTracker,
kubeDeps.TracerProvider,
tokenManager,
getServiceAccount,
klet.podStartupLatencyTracker,
)
if err != nil {
return nil, err
@ -1519,6 +1521,9 @@ type Kubelet struct {
// Track node startup latencies
nodeStartupLatencyTracker util.NodeStartupLatencyTracker
// Track pod startup latencies
podStartupLatencyTracker util.PodStartupLatencyTracker
// Health check kubelet
healthChecker watchdog.HealthChecker

View file

@ -302,8 +302,8 @@ func newTestKubeletWithImageList(
kubelet.configMapManager = configMapManager
kubelet.mirrorPodClient = fakeMirrorClient
kubelet.podManager = kubepod.NewBasicPodManager()
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker)
kubelet.podStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker()
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, kubelet.podStartupLatencyTracker)
kubelet.nodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker()
kubelet.podCertificateManager = &podcertificate.NoOpManager{}
@ -3474,10 +3474,11 @@ func TestSyncPodSpans(t *testing.T) {
kubeCfg.MemorySwap.SwapBehavior,
kubelet.containerManager.GetNodeAllocatableAbsolute,
*kubeCfg.MemoryThrottlingFactor,
kubeletutil.NewPodStartupLatencyTracker(),
kubelet.podStartupLatencyTracker,
tp,
token.NewManager(kubelet.kubeClient),
func(string, string) (*v1.ServiceAccount, error) { return nil, nil },
kubelet.podStartupLatencyTracker,
)
assert.NoError(t, err)
kubelet.allocationManager.SetContainerRuntime(kubelet.containerRuntime)

View file

@ -106,6 +106,11 @@ type podStateProvider interface {
ShouldPodRuntimeBeRemoved(kubetypes.UID) bool
}
type PodInitContainerTimeRecorder interface {
RecordInitContainerStarted(podUID kubetypes.UID, startedAt time.Time)
RecordInitContainerFinished(podUID kubetypes.UID, finishedAt time.Time)
}
type kubeGenericRuntimeManager struct {
runtimeName string
recorder record.EventRecorder
@ -191,6 +196,9 @@ type kubeGenericRuntimeManager struct {
// Swap controller availability check function (Linux only)
// Uses sync.OnceValue for lazy initialization
getSwapControllerAvailable func() bool
// Records first initContainer start time and last initContainer finish time
podInitContainerTimeRecorder PodInitContainerTimeRecorder
}
// KubeGenericRuntime is a interface contains interfaces for container runtime and command.
@ -240,6 +248,7 @@ func NewKubeGenericRuntimeManager(
tracerProvider trace.TracerProvider,
tokenManager *token.Manager,
getServiceAccount plugin.GetServiceAccountFunc,
podInitContainerTimeRecorder PodInitContainerTimeRecorder,
) (KubeGenericRuntime, []images.PostImageGCHook, error) {
logger := klog.FromContext(ctx)
@ -247,29 +256,30 @@ func NewKubeGenericRuntimeManager(
imageService = newInstrumentedImageManagerService(imageService)
tracer := tracerProvider.Tracer(instrumentationScope)
kubeRuntimeManager := &kubeGenericRuntimeManager{
recorder: recorder,
singleProcessOOMKill: singleProcessOOMKill,
cpuCFSQuota: cpuCFSQuota,
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"),
livenessManager: livenessManager,
readinessManager: readinessManager,
startupManager: startupManager,
machineInfo: machineInfo,
osInterface: osInterface,
runtimeHelper: runtimeHelper,
runtimeService: runtimeService,
imageService: imageService,
containerManager: containerManager,
internalLifecycle: containerManager.InternalContainerLifecycle(),
logManager: logManager,
runtimeClassManager: runtimeClassManager,
logReduction: logreduction.NewLogReduction(identicalErrorDelay),
seccompDefault: seccompDefault,
memorySwapBehavior: memorySwapBehavior,
getNodeAllocatable: getNodeAllocatable,
memoryThrottlingFactor: memoryThrottlingFactor,
podLogsDirectory: podLogsDirectory,
recorder: recorder,
singleProcessOOMKill: singleProcessOOMKill,
cpuCFSQuota: cpuCFSQuota,
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"),
livenessManager: livenessManager,
readinessManager: readinessManager,
startupManager: startupManager,
machineInfo: machineInfo,
osInterface: osInterface,
runtimeHelper: runtimeHelper,
runtimeService: runtimeService,
imageService: imageService,
containerManager: containerManager,
internalLifecycle: containerManager.InternalContainerLifecycle(),
logManager: logManager,
runtimeClassManager: runtimeClassManager,
logReduction: logreduction.NewLogReduction(identicalErrorDelay),
seccompDefault: seccompDefault,
memorySwapBehavior: memorySwapBehavior,
getNodeAllocatable: getNodeAllocatable,
memoryThrottlingFactor: memoryThrottlingFactor,
podLogsDirectory: podLogsDirectory,
podInitContainerTimeRecorder: podInitContainerTimeRecorder,
}
// Initialize swap controller availability check with lazy evaluation
@ -1665,6 +1675,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
}
return err
}
if typeName == "init container" {
// Don't measure restartable init containers (sidecars)
if !podutil.IsRestartableInitContainer(spec.container) {
if m.podInitContainerTimeRecorder != nil {
m.podInitContainerTimeRecorder.RecordInitContainerStarted(pod.UID, time.Now())
}
}
}
return nil
}
@ -1694,6 +1712,21 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
logger.V(4).Info("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
}
for _, cs := range podStatus.ContainerStatuses {
// Check if this is an init container
for _, init := range pod.Spec.InitContainers {
// Don't measure restartable init containers (sidecars)
if podutil.IsRestartableInitContainer(&init) {
continue
}
if cs.Name == init.Name && cs.State == kubecontainer.ContainerStateExited && !cs.FinishedAt.IsZero() {
if m.podInitContainerTimeRecorder != nil {
m.podInitContainerTimeRecorder.RecordInitContainerFinished(pod.UID, cs.FinishedAt)
}
}
}
}
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
if resizable, _, _ := allocation.IsInPlacePodVerticalScalingAllowed(pod); resizable {
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources || podContainerChanges.UpdatePodLevelResources {

View file

@ -34,6 +34,8 @@ type PodStartupLatencyTracker interface {
ObservedPodOnWatch(pod *v1.Pod, when time.Time)
RecordImageStartedPulling(podUID types.UID)
RecordImageFinishedPulling(podUID types.UID)
RecordInitContainerStarted(podUID types.UID, startedAt time.Time)
RecordInitContainerFinished(podUID types.UID, finishedAt time.Time)
RecordStatusUpdated(pod *v1.Pod)
DeletePodStartupState(podUID types.UID)
}
@ -42,15 +44,26 @@ type basicPodStartupLatencyTracker struct {
// protect against concurrent read and write on pods map
lock sync.Mutex
pods map[types.UID]*perPodState
// Track pods that were excluded from SLI due to unschedulability
// These pods should never be re-added even if they later become schedulable
excludedPods map[types.UID]bool
// metrics for the first network pod only
firstNetworkPodSeen bool
// For testability
clock clock.Clock
}
type imagePullSession struct {
start time.Time
end time.Time
}
type perPodState struct {
firstStartedPulling time.Time
lastFinishedPulling time.Time
// Session-based image pulling tracking for accurate overlap handling
imagePullSessions []imagePullSession
imagePullSessionsStarts []time.Time // Track multiple concurrent pull starts
// Init container tracking
totalInitContainerRuntime time.Duration
currentInitContainerStart time.Time
// first time, when pod status changed into Running
observedRunningTime time.Time
// log, if pod latency was already Observed
@ -60,8 +73,9 @@ type perPodState struct {
// NewPodStartupLatencyTracker creates an instance of PodStartupLatencyTracker
func NewPodStartupLatencyTracker() PodStartupLatencyTracker {
return &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
clock: clock.RealClock{},
pods: map[types.UID]*perPodState{},
excludedPods: map[types.UID]bool{},
clock: clock.RealClock{},
}
}
@ -77,13 +91,29 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
state := p.pods[pod.UID]
if state == nil {
// create a new record for pod, only if it was not yet acknowledged by the Kubelet
// this is required, as we want to log metric only for those pods, that where scheduled
// after Kubelet started
if pod.Status.StartTime.IsZero() {
p.pods[pod.UID] = &perPodState{}
// if pod was previously unschedulable, don't track it again
if p.excludedPods[pod.UID] {
return
}
// create a new record for pod
if pod.Status.StartTime.IsZero() {
if isPodUnschedulable(pod) {
p.excludedPods[pod.UID] = true
return
}
// if pod is schedulable then track it
state = &perPodState{}
p.pods[pod.UID] = state
}
return
}
// remove existing pods from tracking (this handles cases where scheduling state becomes known later)
if isPodUnschedulable(pod) {
delete(p.pods, pod.UID)
p.excludedPods[pod.UID] = true
return
}
@ -102,31 +132,76 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
ctx := context.TODO()
logger := klog.FromContext(ctx)
podStartingDuration := when.Sub(pod.CreationTimestamp.Time)
imagePullingDuration := state.lastFinishedPulling.Sub(state.firstStartedPulling)
podStartSLOduration := (podStartingDuration - imagePullingDuration).Seconds()
podStartSLOduration := podStartingDuration
totalImagesPullingTime := calculateImagePullingTime(state.imagePullSessions)
if totalImagesPullingTime > 0 {
podStartSLOduration -= totalImagesPullingTime
}
if state.totalInitContainerRuntime > 0 {
podStartSLOduration -= state.totalInitContainerRuntime
}
podIsStateful := isStatefulPod(pod)
logger.Info("Observed pod startup duration",
"pod", klog.KObj(pod),
"podStartSLOduration", podStartSLOduration,
"podStartSLOduration", podStartSLOduration.Seconds(),
"podStartE2EDuration", podStartingDuration,
"totalImagesPullingTime", totalImagesPullingTime,
"totalInitContainerRuntime", state.totalInitContainerRuntime,
"isStatefulPod", podIsStateful,
"podCreationTimestamp", pod.CreationTimestamp.Time,
"firstStartedPulling", state.firstStartedPulling,
"lastFinishedPulling", state.lastFinishedPulling,
"imagePullSessionsCount", len(state.imagePullSessions),
"imagePullSessionsStartsCount", len(state.imagePullSessionsStarts),
"observedRunningTime", state.observedRunningTime,
"watchObservedRunningTime", when)
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds())
state.metricRecorded = true
// if is the first Pod with network track the start values
// these metrics will help to identify problems with the CNI plugin
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration)
p.firstNetworkPodSeen = true
if !podIsStateful {
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration.Seconds())
// if is the first Pod with network track the start values
// these metrics will help to identify problems with the CNI plugin
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration.Seconds())
p.firstNetworkPodSeen = true
}
}
state.metricRecorded = true
}
}
// calculateImagePullingTime computes the total time spent pulling images,
// accounting for overlapping pull sessions properly
func calculateImagePullingTime(sessions []imagePullSession) time.Duration {
if len(sessions) == 0 {
return 0
}
var totalTime time.Duration
var currentEnd time.Time
for i, session := range sessions {
if session.end.IsZero() {
continue
}
if i == 0 || session.start.After(currentEnd) {
// First session or no overlap with previous session
totalTime += session.end.Sub(session.start)
currentEnd = session.end
} else if session.end.After(currentEnd) {
// Partial overlap - add only the non-overlapping part
totalTime += session.end.Sub(currentEnd)
currentEnd = session.end
}
// If session.end <= currentEnd, it's completely overlapped
}
return totalTime
}
func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.UID) {
p.lock.Lock()
defer p.lock.Unlock()
@ -136,9 +211,8 @@ func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.U
return
}
if state.firstStartedPulling.IsZero() {
state.firstStartedPulling = p.clock.Now()
}
now := p.clock.Now()
state.imagePullSessionsStarts = append(state.imagePullSessionsStarts, now)
}
func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.UID) {
@ -150,8 +224,48 @@ func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.
return
}
if !state.firstStartedPulling.IsZero() {
state.lastFinishedPulling = p.clock.Now() // Now is always grater than values from the past.
now := p.clock.Now()
// Complete the oldest pull session if we have active starts
if len(state.imagePullSessionsStarts) > 0 {
// Take the first (oldest) start and create a session
startTime := state.imagePullSessionsStarts[0]
session := imagePullSession{
start: startTime,
end: now,
}
state.imagePullSessions = append(state.imagePullSessions, session)
state.imagePullSessionsStarts = state.imagePullSessionsStarts[1:]
}
}
func (p *basicPodStartupLatencyTracker) RecordInitContainerStarted(podUID types.UID, startedAt time.Time) {
p.lock.Lock()
defer p.lock.Unlock()
state := p.pods[podUID]
if state == nil {
return
}
state.currentInitContainerStart = startedAt
}
func (p *basicPodStartupLatencyTracker) RecordInitContainerFinished(podUID types.UID, finishedAt time.Time) {
p.lock.Lock()
defer p.lock.Unlock()
state := p.pods[podUID]
if state == nil {
return
}
if !state.currentInitContainerStart.IsZero() {
initDuration := finishedAt.Sub(state.currentInitContainerStart)
if initDuration > 0 {
state.totalInitContainerRuntime += initDuration
}
state.currentInitContainerStart = time.Time{}
}
}
@ -197,9 +311,42 @@ func hasPodStartedSLO(pod *v1.Pod) bool {
return true
}
// isStatefulPod determines if a pod is stateful according to the SLI documentation:
// "A stateful pod is defined as a pod that mounts at least one volume with sources
// other than secrets, config maps, downward API and empty dir."
// This should reflect the "stateful pod" definition
// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md
func isStatefulPod(pod *v1.Pod) bool {
for _, volume := range pod.Spec.Volumes {
// Check if this volume is NOT a stateless/ephemeral type
if volume.Secret == nil &&
volume.ConfigMap == nil &&
volume.DownwardAPI == nil &&
volume.EmptyDir == nil {
return true
}
}
return false
}
// isPodUnschedulable determines if a pod should be excluded from SLI tracking
// according to the SLI definition: "By schedulable pod we mean a pod that has to be
// immediately (without actions from any other components) schedulable in the cluster
// without causing any preemption."
// Any pod with PodScheduled=False is not immediately schedulable and should be excluded.
func isPodUnschedulable(pod *v1.Pod) bool {
for _, condition := range pod.Status.Conditions {
if condition.Type == v1.PodScheduled && condition.Status == v1.ConditionFalse {
return true
}
}
return false
}
func (p *basicPodStartupLatencyTracker) DeletePodStartupState(podUID types.UID) {
p.lock.Lock()
defer p.lock.Unlock()
delete(p.pods, podUID)
delete(p.excludedPods, podUID)
}

View file

@ -47,7 +47,8 @@ func TestNoEvents(t *testing.T) {
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
pods: map[types.UID]*perPodState{},
excludedPods: map[types.UID]bool{},
}
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
@ -68,7 +69,8 @@ func TestPodsRunningBeforeKubeletStarted(t *testing.T) {
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
pods: map[types.UID]*perPodState{},
excludedPods: map[types.UID]bool{},
}
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
@ -129,7 +131,7 @@ kubelet_pod_start_sli_duration_seconds_count 1
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
@ -145,9 +147,9 @@ kubelet_pod_start_sli_duration_seconds_count 1
if !ok {
t.Errorf("expected to track pod: %s, but pod not found", podInit.UID)
}
if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) {
t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s",
podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling)
if len(podState.imagePullSessions) != 1 {
t.Errorf("expected one image pull session to be recorded")
}
podStarted := buildRunningPod()
@ -170,7 +172,7 @@ kubelet_pod_start_sli_duration_seconds_count 1
func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) {
t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
t.Run("single pod; started in 30s, overlapping image pulling between 10th and 20th seconds", func(t *testing.T) {
wants := `
# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
@ -210,27 +212,25 @@ kubelet_pod_start_sli_duration_seconds_count 1
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
podInitializing := buildInitializingPod()
tracker.ObservedPodOnWatch(podInitializing, frozenTime)
// image pulling started at 10s and the last one finished at 30s
// first image starts pulling at 10s
// Image 1: 10-16s
fakeClock.SetTime(frozenTime.Add(time.Second * 10))
tracker.RecordImageStartedPulling(podInitializing.UID)
// second image starts pulling at 11s
// Image 2: 11-18s
fakeClock.SetTime(frozenTime.Add(time.Second * 11))
tracker.RecordImageStartedPulling(podInitializing.UID)
// third image starts pulling at 14s
// Image 3: 14-20s
fakeClock.SetTime(frozenTime.Add(time.Second * 14))
tracker.RecordImageStartedPulling(podInitializing.UID)
// first image finished pulling at 18s
fakeClock.SetTime(frozenTime.Add(time.Second * 16))
tracker.RecordImageFinishedPulling(podInitializing.UID)
fakeClock.SetTime(frozenTime.Add(time.Second * 18))
tracker.RecordImageFinishedPulling(podInitializing.UID)
// second and third finished pulling at 20s
fakeClock.SetTime(frozenTime.Add(time.Second * 20))
tracker.RecordImageFinishedPulling(podInitializing.UID)
@ -238,13 +238,13 @@ kubelet_pod_start_sli_duration_seconds_count 1
if !ok {
t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID)
}
if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) { // second and third image start pulling should not affect pod firstStartedPulling
t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s",
podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling)
if len(podState.imagePullSessions) != 3 {
t.Errorf("expected 3 image pull sessions but got %d", len(podState.imagePullSessions))
}
if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) { // should be updated when the pod's last image finished pulling
t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s",
podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling)
totalTime := calculateImagePullingTime(podState.imagePullSessions)
expectedTime := time.Second * 10
if totalTime != expectedTime {
t.Errorf("expected total pulling time: %v but got %v", expectedTime, totalTime)
}
// pod started
@ -275,6 +275,324 @@ kubelet_pod_start_sli_duration_seconds_count 1
})
}
func TestPodWithInitContainersAndMainContainers(t *testing.T) {
t.Run("single pod with multiple init containers and main containers", func(t *testing.T) {
wants := `
# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
# TYPE kubelet_pod_start_sli_duration_seconds histogram
kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0
kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
kubelet_pod_start_sli_duration_seconds_sum 4.2
kubelet_pod_start_sli_duration_seconds_count 1
`
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
podInit := buildInitializingPod("init-1", "init-2")
tracker.ObservedPodOnWatch(podInit, frozenTime)
// Init container 1 image pull: 0.5s-1.5s
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 500))
tracker.RecordImageStartedPulling(podInit.UID)
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 1500))
tracker.RecordImageFinishedPulling(podInit.UID)
// Init container 1 runtime: 2s-4s
fakeClock.SetTime(frozenTime.Add(time.Second * 2))
tracker.RecordInitContainerStarted(types.UID(uid), fakeClock.Now())
fakeClock.SetTime(frozenTime.Add(time.Second * 4))
tracker.RecordInitContainerFinished(types.UID(uid), fakeClock.Now())
// Init container 2 image pull: 4.2s-5.2s
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 4200))
tracker.RecordImageStartedPulling(podInit.UID)
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 5200))
tracker.RecordImageFinishedPulling(podInit.UID)
// Init container 2 runtime: 5.5s-7s
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 5500))
tracker.RecordInitContainerStarted(types.UID(uid), fakeClock.Now())
fakeClock.SetTime(frozenTime.Add(time.Second * 7))
tracker.RecordInitContainerFinished(types.UID(uid), fakeClock.Now())
// Main container 1 image pull: 7.2s-8.2s
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 7200))
tracker.RecordImageStartedPulling(podInit.UID)
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 8200))
tracker.RecordImageFinishedPulling(podInit.UID)
// Main container 2 image pull: 7.3s-8.5s
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 7300))
tracker.RecordImageStartedPulling(podInit.UID)
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 8500))
tracker.RecordImageFinishedPulling(podInit.UID)
// Pod becomes running at 11s
podStarted := buildRunningPod()
tracker.RecordStatusUpdated(podStarted)
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*11))
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
state := tracker.pods[types.UID(uid)]
assert.NotNil(t, state, "Pod state should exist")
expectedInitRuntime := 2*time.Second + 1500*time.Millisecond
assert.Equal(t, expectedInitRuntime, state.totalInitContainerRuntime,
"Total init container runtime should be 3.5s (2s + 1.5s)")
totalImageTime := calculateImagePullingTime(state.imagePullSessions)
expectedImageTime := 3300 * time.Millisecond // 1s + 1s + 1.3s concurrent overlap
assert.Equal(t, expectedImageTime, totalImageTime,
"Image pulling time should be 3.3s: init-1(1s) + init-2(1s) + concurrent-main(1.3s)")
assert.Len(t, state.imagePullSessions, 4, "Should have 4 image pull sessions")
// cleanup
tracker.DeletePodStartupState(podStarted.UID)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func TestImmediatelySchedulablePods(t *testing.T) {
t.Run("pods not immediately schedulable should be excluded from tracking", func(t *testing.T) {
wants := ""
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
// pod that is not immediately schedulable (PodScheduled=False)
podNotSchedulable := buildInitializingPod()
podNotSchedulable.UID = "not-schedulable-pod"
podNotSchedulable.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Status: corev1.ConditionFalse,
Reason: "Unschedulable",
},
}
tracker.ObservedPodOnWatch(podNotSchedulable, frozenTime)
assert.Empty(t, tracker.pods, "Pod with PodScheduled=False should not be tracked")
// pod that is immediately schedulable (PodScheduled=True)
podSchedulable := buildInitializingPod()
podSchedulable.UID = "schedulable-pod"
podSchedulable.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
Reason: "Scheduled",
},
}
tracker.ObservedPodOnWatch(podSchedulable, frozenTime)
assert.Len(t, tracker.pods, 1, "Pod with PodScheduled=True should be tracked")
// pod without PodScheduled condition
podNoCondition := buildInitializingPod()
podNoCondition.UID = "no-condition-pod"
tracker.ObservedPodOnWatch(podNoCondition, frozenTime)
assert.Len(t, tracker.pods, 2, "Pod without PodScheduled condition should be tracked by default")
// Verify metrics are empty
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
// cleanup
tracker.DeletePodStartupState(podSchedulable.UID)
tracker.DeletePodStartupState(podNoCondition.UID)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
t.Run("pod observed as schedulable first, then becomes unschedulable should not be tracked", func(t *testing.T) {
wants := ""
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
// First observe pod as schedulable
podSchedulable := buildInitializingPod()
podSchedulable.UID = "becomes-unschedulable"
tracker.ObservedPodOnWatch(podSchedulable, frozenTime)
assert.Len(t, tracker.pods, 1, "Pod should be tracked initially")
// Later observe the same pod as unschedulable
podUnschedulable := buildInitializingPod()
podUnschedulable.UID = "becomes-unschedulable"
podUnschedulable.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Status: corev1.ConditionFalse,
Reason: "Unschedulable",
},
}
tracker.ObservedPodOnWatch(podUnschedulable, frozenTime.Add(time.Second))
assert.Empty(t, tracker.pods, "Pod should be removed when it becomes unschedulable")
// Verify no metrics
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
metrics.PodStartSLIDuration.Reset()
})
t.Run("pod observed as unschedulable first, then becomes schedulable should remain not tracked", func(t *testing.T) {
wants := ""
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
// First observe pod as unschedulable
podUnschedulable := buildInitializingPod()
podUnschedulable.UID = "unschedulable-first"
podUnschedulable.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Status: corev1.ConditionFalse,
Reason: "Unschedulable",
},
}
tracker.ObservedPodOnWatch(podUnschedulable, frozenTime)
assert.Empty(t, tracker.pods, "Pod should not be tracked when first observed as unschedulable")
assert.True(t, tracker.excludedPods[podUnschedulable.UID], "Pod should be in excludedPods map")
// Later observe the same pod as schedulable (e.g., after cluster autoscaling)
podSchedulable := buildInitializingPod()
podSchedulable.UID = "unschedulable-first"
podSchedulable.Status.Conditions = []corev1.PodCondition{
{
Type: corev1.PodScheduled,
Status: corev1.ConditionTrue,
Reason: "Scheduled",
},
}
tracker.ObservedPodOnWatch(podSchedulable, frozenTime.Add(time.Second*5))
assert.Empty(t, tracker.pods, "Pod should remain excluded even after becoming schedulable")
assert.True(t, tracker.excludedPods[podSchedulable.UID], "Pod should remain in excludedPods map")
// Complete the startup process - should not record metrics
podRunning := buildRunningPod()
podRunning.UID = "unschedulable-first"
tracker.RecordStatusUpdated(podRunning)
tracker.ObservedPodOnWatch(podRunning, frozenTime.Add(time.Second*10))
// Verify no SLI metrics recorded
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
t.Fatal(err)
}
// cleanup
tracker.DeletePodStartupState(podRunning.UID)
assert.Empty(t, tracker.pods)
assert.False(t, tracker.excludedPods[podRunning.UID], "Pod should be removed from excludedPods on cleanup")
metrics.PodStartSLIDuration.Reset()
})
}
func TestStatefulPodExclusion(t *testing.T) {
t.Run("stateful pods should be excluded from SLI metrics", func(t *testing.T) {
wantsSLI := ""
fakeClock := testingclock.NewFakeClock(frozenTime)
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
// Create a stateful pod (with PVC volume)
statefulPod := buildInitializingPod()
statefulPod.UID = "stateful-pod"
statefulPod.Spec.Volumes = []corev1.Volume{
{
Name: "persistent-storage",
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: "test-pvc",
},
},
},
}
tracker.ObservedPodOnWatch(statefulPod, frozenTime)
statefulPodRunning := buildRunningPod()
statefulPodRunning.UID = "stateful-pod"
statefulPodRunning.Spec.Volumes = statefulPod.Spec.Volumes
tracker.RecordStatusUpdated(statefulPodRunning)
// Observe pod as running after 3 seconds
tracker.ObservedPodOnWatch(statefulPodRunning, frozenTime.Add(time.Second*3))
// Verify no SLI metrics for stateful pod (
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wantsSLI), metricsName); err != nil {
t.Fatal(err)
}
// cleanup
tracker.DeletePodStartupState(statefulPod.UID)
assert.Empty(t, tracker.pods)
metrics.PodStartSLIDuration.Reset()
})
}
func TestFirstNetworkPodMetrics(t *testing.T) {
t.Run("first network pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
@ -290,7 +608,7 @@ kubelet_first_network_pod_start_sli_duration_seconds 30
metrics.Register()
tracker := &basicPodStartupLatencyTracker{
pods: map[types.UID]*perPodState{},
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
clock: fakeClock,
}
@ -338,10 +656,20 @@ kubelet_first_network_pod_start_sli_duration_seconds 30
})
}
func buildInitializingPod() *corev1.Pod {
return buildPodWithStatus([]corev1.ContainerStatus{
func buildInitializingPod(initContainerNames ...string) *corev1.Pod {
pod := buildPodWithStatus([]corev1.ContainerStatus{
{State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},
})
// Add init containers if specified
if len(initContainerNames) > 0 {
pod.Spec.InitContainers = make([]corev1.Container, len(initContainerNames))
for i, name := range initContainerNames {
pod.Spec.InitContainers[i] = corev1.Container{Name: name}
}
}
return pod
}
func buildRunningPod() *corev1.Pod {
@ -361,3 +689,78 @@ func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod {
},
}
}
func TestCalculateImagePullingTime(t *testing.T) {
baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
t.Run("empty sessions should return zero", func(t *testing.T) {
sessions := []imagePullSession{}
result := calculateImagePullingTime(sessions)
assert.Equal(t, time.Duration(0), result)
})
t.Run("incomplete sessions should be ignored (for example: kubelet restart)", func(t *testing.T) {
sessions := []imagePullSession{
{start: baseTime, end: baseTime.Add(3 * time.Second)}, // valid: 3s
{start: baseTime.Add(5 * time.Second), end: time.Time{}}, // incomplete, ignored
{start: baseTime.Add(10 * time.Second), end: baseTime.Add(12 * time.Second)}, // valid: 2s
}
result := calculateImagePullingTime(sessions)
assert.Equal(t, 5*time.Second, result) // 3s + 2s
})
t.Run("non-overlapping sessions", func(t *testing.T) {
sessions := []imagePullSession{
{start: baseTime, end: baseTime.Add(2 * time.Second)}, // 2s
{start: baseTime.Add(5 * time.Second), end: baseTime.Add(8 * time.Second)}, // 3s
{start: baseTime.Add(10 * time.Second), end: baseTime.Add(15 * time.Second)}, // 5s
}
result := calculateImagePullingTime(sessions)
assert.Equal(t, 10*time.Second, result) // 2s + 3s + 5s
})
t.Run("partially overlapping sessions", func(t *testing.T) {
sessions := []imagePullSession{
{start: baseTime, end: baseTime.Add(5 * time.Second)}, // 0-5s
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(8 * time.Second)}, // 3-8s (overlap 3-5s)
{start: baseTime.Add(7 * time.Second), end: baseTime.Add(12 * time.Second)}, // 7-12s (overlap 7-8s)
}
result := calculateImagePullingTime(sessions)
assert.Equal(t, 12*time.Second, result) // 12s
})
t.Run("completely overlapped sessions", func(t *testing.T) {
sessions := []imagePullSession{
{start: baseTime, end: baseTime.Add(10 * time.Second)}, // 0-10s
{start: baseTime.Add(2 * time.Second), end: baseTime.Add(8 * time.Second)}, // 2-8s (completely inside)
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(5 * time.Second)}, // 3-5s (completely inside)
}
result := calculateImagePullingTime(sessions)
assert.Equal(t, 10*time.Second, result) // Only outer session: 10s
})
t.Run("partially and completely overlapping sessions", func(t *testing.T) {
sessions := []imagePullSession{
{start: baseTime, end: baseTime.Add(10 * time.Second)}, // 0-10s
{start: baseTime.Add(2 * time.Second), end: baseTime.Add(5 * time.Second)}, // 2-5s (completely inside first)
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(7 * time.Second)}, // 3-7s (completely inside first and partially overlapping second)
}
result := calculateImagePullingTime(sessions)
assert.Equal(t, 10*time.Second, result) // 10s
})
t.Run("completely overlapped and partially overlapped sessions with gap", func(t *testing.T) {
// Test mixing completely overlapped sessions with separate sessions
sessions := []imagePullSession{
{start: baseTime, end: baseTime.Add(8 * time.Second)}, // 0-8s
{start: baseTime.Add(2 * time.Second), end: baseTime.Add(6 * time.Second)}, // 2-6s (completely inside first)
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(5 * time.Second)}, // 3-5s (completely inside first)
{start: baseTime.Add(15 * time.Second), end: baseTime.Add(20 * time.Second)}, // 15-20s (separate)
{start: baseTime.Add(16 * time.Second), end: baseTime.Add(18 * time.Second)}, // 16-18s (completely inside fourth)
{start: baseTime.Add(17 * time.Second), end: baseTime.Add(21 * time.Second)}, // 17-21s (partially overlapping fourth and fifth)
}
result := calculateImagePullingTime(sessions)
// 8s (first group) + 6s (second group) = 14s
assert.Equal(t, 14*time.Second, result)
})
}