mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-03 20:40:26 -05:00
Merge 914230c9c9 into 44ff6085cd
This commit is contained in:
commit
e7b7d72d51
5 changed files with 666 additions and 77 deletions
|
|
@ -640,6 +640,7 @@ func NewMainKubelet(ctx context.Context,
|
|||
nodeStatusMaxImages: nodeStatusMaxImages,
|
||||
tracer: tracer,
|
||||
nodeStartupLatencyTracker: kubeDeps.NodeStartupLatencyTracker,
|
||||
podStartupLatencyTracker: kubeDeps.PodStartupLatencyTracker,
|
||||
healthChecker: kubeDeps.HealthChecker,
|
||||
flagz: kubeDeps.Flagz,
|
||||
}
|
||||
|
|
@ -795,10 +796,11 @@ func NewMainKubelet(ctx context.Context,
|
|||
kubeCfg.MemorySwap.SwapBehavior,
|
||||
kubeDeps.ContainerManager.GetNodeAllocatableAbsolute,
|
||||
*kubeCfg.MemoryThrottlingFactor,
|
||||
kubeDeps.PodStartupLatencyTracker,
|
||||
klet.podStartupLatencyTracker,
|
||||
kubeDeps.TracerProvider,
|
||||
tokenManager,
|
||||
getServiceAccount,
|
||||
klet.podStartupLatencyTracker,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -1519,6 +1521,9 @@ type Kubelet struct {
|
|||
// Track node startup latencies
|
||||
nodeStartupLatencyTracker util.NodeStartupLatencyTracker
|
||||
|
||||
// Track pod startup latencies
|
||||
podStartupLatencyTracker util.PodStartupLatencyTracker
|
||||
|
||||
// Health check kubelet
|
||||
healthChecker watchdog.HealthChecker
|
||||
|
||||
|
|
|
|||
|
|
@ -302,8 +302,8 @@ func newTestKubeletWithImageList(
|
|||
kubelet.configMapManager = configMapManager
|
||||
kubelet.mirrorPodClient = fakeMirrorClient
|
||||
kubelet.podManager = kubepod.NewBasicPodManager()
|
||||
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
|
||||
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker)
|
||||
kubelet.podStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker()
|
||||
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, kubelet.podStartupLatencyTracker)
|
||||
kubelet.nodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker()
|
||||
kubelet.podCertificateManager = &podcertificate.NoOpManager{}
|
||||
|
||||
|
|
@ -3474,10 +3474,11 @@ func TestSyncPodSpans(t *testing.T) {
|
|||
kubeCfg.MemorySwap.SwapBehavior,
|
||||
kubelet.containerManager.GetNodeAllocatableAbsolute,
|
||||
*kubeCfg.MemoryThrottlingFactor,
|
||||
kubeletutil.NewPodStartupLatencyTracker(),
|
||||
kubelet.podStartupLatencyTracker,
|
||||
tp,
|
||||
token.NewManager(kubelet.kubeClient),
|
||||
func(string, string) (*v1.ServiceAccount, error) { return nil, nil },
|
||||
kubelet.podStartupLatencyTracker,
|
||||
)
|
||||
assert.NoError(t, err)
|
||||
kubelet.allocationManager.SetContainerRuntime(kubelet.containerRuntime)
|
||||
|
|
|
|||
|
|
@ -106,6 +106,11 @@ type podStateProvider interface {
|
|||
ShouldPodRuntimeBeRemoved(kubetypes.UID) bool
|
||||
}
|
||||
|
||||
type PodInitContainerTimeRecorder interface {
|
||||
RecordInitContainerStarted(podUID kubetypes.UID, startedAt time.Time)
|
||||
RecordInitContainerFinished(podUID kubetypes.UID, finishedAt time.Time)
|
||||
}
|
||||
|
||||
type kubeGenericRuntimeManager struct {
|
||||
runtimeName string
|
||||
recorder record.EventRecorder
|
||||
|
|
@ -191,6 +196,9 @@ type kubeGenericRuntimeManager struct {
|
|||
// Swap controller availability check function (Linux only)
|
||||
// Uses sync.OnceValue for lazy initialization
|
||||
getSwapControllerAvailable func() bool
|
||||
|
||||
// Records first initContainer start time and last initContainer finish time
|
||||
podInitContainerTimeRecorder PodInitContainerTimeRecorder
|
||||
}
|
||||
|
||||
// KubeGenericRuntime is a interface contains interfaces for container runtime and command.
|
||||
|
|
@ -240,6 +248,7 @@ func NewKubeGenericRuntimeManager(
|
|||
tracerProvider trace.TracerProvider,
|
||||
tokenManager *token.Manager,
|
||||
getServiceAccount plugin.GetServiceAccountFunc,
|
||||
podInitContainerTimeRecorder PodInitContainerTimeRecorder,
|
||||
) (KubeGenericRuntime, []images.PostImageGCHook, error) {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
|
|
@ -247,29 +256,30 @@ func NewKubeGenericRuntimeManager(
|
|||
imageService = newInstrumentedImageManagerService(imageService)
|
||||
tracer := tracerProvider.Tracer(instrumentationScope)
|
||||
kubeRuntimeManager := &kubeGenericRuntimeManager{
|
||||
recorder: recorder,
|
||||
singleProcessOOMKill: singleProcessOOMKill,
|
||||
cpuCFSQuota: cpuCFSQuota,
|
||||
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
|
||||
seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"),
|
||||
livenessManager: livenessManager,
|
||||
readinessManager: readinessManager,
|
||||
startupManager: startupManager,
|
||||
machineInfo: machineInfo,
|
||||
osInterface: osInterface,
|
||||
runtimeHelper: runtimeHelper,
|
||||
runtimeService: runtimeService,
|
||||
imageService: imageService,
|
||||
containerManager: containerManager,
|
||||
internalLifecycle: containerManager.InternalContainerLifecycle(),
|
||||
logManager: logManager,
|
||||
runtimeClassManager: runtimeClassManager,
|
||||
logReduction: logreduction.NewLogReduction(identicalErrorDelay),
|
||||
seccompDefault: seccompDefault,
|
||||
memorySwapBehavior: memorySwapBehavior,
|
||||
getNodeAllocatable: getNodeAllocatable,
|
||||
memoryThrottlingFactor: memoryThrottlingFactor,
|
||||
podLogsDirectory: podLogsDirectory,
|
||||
recorder: recorder,
|
||||
singleProcessOOMKill: singleProcessOOMKill,
|
||||
cpuCFSQuota: cpuCFSQuota,
|
||||
cpuCFSQuotaPeriod: cpuCFSQuotaPeriod,
|
||||
seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"),
|
||||
livenessManager: livenessManager,
|
||||
readinessManager: readinessManager,
|
||||
startupManager: startupManager,
|
||||
machineInfo: machineInfo,
|
||||
osInterface: osInterface,
|
||||
runtimeHelper: runtimeHelper,
|
||||
runtimeService: runtimeService,
|
||||
imageService: imageService,
|
||||
containerManager: containerManager,
|
||||
internalLifecycle: containerManager.InternalContainerLifecycle(),
|
||||
logManager: logManager,
|
||||
runtimeClassManager: runtimeClassManager,
|
||||
logReduction: logreduction.NewLogReduction(identicalErrorDelay),
|
||||
seccompDefault: seccompDefault,
|
||||
memorySwapBehavior: memorySwapBehavior,
|
||||
getNodeAllocatable: getNodeAllocatable,
|
||||
memoryThrottlingFactor: memoryThrottlingFactor,
|
||||
podLogsDirectory: podLogsDirectory,
|
||||
podInitContainerTimeRecorder: podInitContainerTimeRecorder,
|
||||
}
|
||||
|
||||
// Initialize swap controller availability check with lazy evaluation
|
||||
|
|
@ -1665,6 +1675,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
|
|||
}
|
||||
return err
|
||||
}
|
||||
if typeName == "init container" {
|
||||
// Don't measure restartable init containers (sidecars)
|
||||
if !podutil.IsRestartableInitContainer(spec.container) {
|
||||
if m.podInitContainerTimeRecorder != nil {
|
||||
m.podInitContainerTimeRecorder.RecordInitContainerStarted(pod.UID, time.Now())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
@ -1694,6 +1712,21 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
|
|||
logger.V(4).Info("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
|
||||
}
|
||||
|
||||
for _, cs := range podStatus.ContainerStatuses {
|
||||
// Check if this is an init container
|
||||
for _, init := range pod.Spec.InitContainers {
|
||||
// Don't measure restartable init containers (sidecars)
|
||||
if podutil.IsRestartableInitContainer(&init) {
|
||||
continue
|
||||
}
|
||||
if cs.Name == init.Name && cs.State == kubecontainer.ContainerStateExited && !cs.FinishedAt.IsZero() {
|
||||
if m.podInitContainerTimeRecorder != nil {
|
||||
m.podInitContainerTimeRecorder.RecordInitContainerFinished(pod.UID, cs.FinishedAt)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
|
||||
if resizable, _, _ := allocation.IsInPlacePodVerticalScalingAllowed(pod); resizable {
|
||||
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources || podContainerChanges.UpdatePodLevelResources {
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ type PodStartupLatencyTracker interface {
|
|||
ObservedPodOnWatch(pod *v1.Pod, when time.Time)
|
||||
RecordImageStartedPulling(podUID types.UID)
|
||||
RecordImageFinishedPulling(podUID types.UID)
|
||||
RecordInitContainerStarted(podUID types.UID, startedAt time.Time)
|
||||
RecordInitContainerFinished(podUID types.UID, finishedAt time.Time)
|
||||
RecordStatusUpdated(pod *v1.Pod)
|
||||
DeletePodStartupState(podUID types.UID)
|
||||
}
|
||||
|
|
@ -42,15 +44,26 @@ type basicPodStartupLatencyTracker struct {
|
|||
// protect against concurrent read and write on pods map
|
||||
lock sync.Mutex
|
||||
pods map[types.UID]*perPodState
|
||||
// Track pods that were excluded from SLI due to unschedulability
|
||||
// These pods should never be re-added even if they later become schedulable
|
||||
excludedPods map[types.UID]bool
|
||||
// metrics for the first network pod only
|
||||
firstNetworkPodSeen bool
|
||||
// For testability
|
||||
clock clock.Clock
|
||||
}
|
||||
|
||||
type imagePullSession struct {
|
||||
start time.Time
|
||||
end time.Time
|
||||
}
|
||||
type perPodState struct {
|
||||
firstStartedPulling time.Time
|
||||
lastFinishedPulling time.Time
|
||||
// Session-based image pulling tracking for accurate overlap handling
|
||||
imagePullSessions []imagePullSession
|
||||
imagePullSessionsStarts []time.Time // Track multiple concurrent pull starts
|
||||
// Init container tracking
|
||||
totalInitContainerRuntime time.Duration
|
||||
currentInitContainerStart time.Time
|
||||
// first time, when pod status changed into Running
|
||||
observedRunningTime time.Time
|
||||
// log, if pod latency was already Observed
|
||||
|
|
@ -60,8 +73,9 @@ type perPodState struct {
|
|||
// NewPodStartupLatencyTracker creates an instance of PodStartupLatencyTracker
|
||||
func NewPodStartupLatencyTracker() PodStartupLatencyTracker {
|
||||
return &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
clock: clock.RealClock{},
|
||||
pods: map[types.UID]*perPodState{},
|
||||
excludedPods: map[types.UID]bool{},
|
||||
clock: clock.RealClock{},
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -77,13 +91,29 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
|
|||
|
||||
state := p.pods[pod.UID]
|
||||
if state == nil {
|
||||
// create a new record for pod, only if it was not yet acknowledged by the Kubelet
|
||||
// this is required, as we want to log metric only for those pods, that where scheduled
|
||||
// after Kubelet started
|
||||
if pod.Status.StartTime.IsZero() {
|
||||
p.pods[pod.UID] = &perPodState{}
|
||||
// if pod was previously unschedulable, don't track it again
|
||||
if p.excludedPods[pod.UID] {
|
||||
return
|
||||
}
|
||||
|
||||
// create a new record for pod
|
||||
if pod.Status.StartTime.IsZero() {
|
||||
if isPodUnschedulable(pod) {
|
||||
p.excludedPods[pod.UID] = true
|
||||
return
|
||||
}
|
||||
|
||||
// if pod is schedulable then track it
|
||||
state = &perPodState{}
|
||||
p.pods[pod.UID] = state
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// remove existing pods from tracking (this handles cases where scheduling state becomes known later)
|
||||
if isPodUnschedulable(pod) {
|
||||
delete(p.pods, pod.UID)
|
||||
p.excludedPods[pod.UID] = true
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -102,31 +132,76 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
|
|||
ctx := context.TODO()
|
||||
logger := klog.FromContext(ctx)
|
||||
podStartingDuration := when.Sub(pod.CreationTimestamp.Time)
|
||||
imagePullingDuration := state.lastFinishedPulling.Sub(state.firstStartedPulling)
|
||||
podStartSLOduration := (podStartingDuration - imagePullingDuration).Seconds()
|
||||
podStartSLOduration := podStartingDuration
|
||||
|
||||
totalImagesPullingTime := calculateImagePullingTime(state.imagePullSessions)
|
||||
if totalImagesPullingTime > 0 {
|
||||
podStartSLOduration -= totalImagesPullingTime
|
||||
}
|
||||
|
||||
if state.totalInitContainerRuntime > 0 {
|
||||
podStartSLOduration -= state.totalInitContainerRuntime
|
||||
}
|
||||
|
||||
podIsStateful := isStatefulPod(pod)
|
||||
|
||||
logger.Info("Observed pod startup duration",
|
||||
"pod", klog.KObj(pod),
|
||||
"podStartSLOduration", podStartSLOduration,
|
||||
"podStartSLOduration", podStartSLOduration.Seconds(),
|
||||
"podStartE2EDuration", podStartingDuration,
|
||||
"totalImagesPullingTime", totalImagesPullingTime,
|
||||
"totalInitContainerRuntime", state.totalInitContainerRuntime,
|
||||
"isStatefulPod", podIsStateful,
|
||||
"podCreationTimestamp", pod.CreationTimestamp.Time,
|
||||
"firstStartedPulling", state.firstStartedPulling,
|
||||
"lastFinishedPulling", state.lastFinishedPulling,
|
||||
"imagePullSessionsCount", len(state.imagePullSessions),
|
||||
"imagePullSessionsStartsCount", len(state.imagePullSessionsStarts),
|
||||
"observedRunningTime", state.observedRunningTime,
|
||||
"watchObservedRunningTime", when)
|
||||
|
||||
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
|
||||
metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds())
|
||||
state.metricRecorded = true
|
||||
// if is the first Pod with network track the start values
|
||||
// these metrics will help to identify problems with the CNI plugin
|
||||
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
|
||||
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration)
|
||||
p.firstNetworkPodSeen = true
|
||||
if !podIsStateful {
|
||||
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration.Seconds())
|
||||
// if is the first Pod with network track the start values
|
||||
// these metrics will help to identify problems with the CNI plugin
|
||||
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
|
||||
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration.Seconds())
|
||||
p.firstNetworkPodSeen = true
|
||||
}
|
||||
}
|
||||
state.metricRecorded = true
|
||||
}
|
||||
}
|
||||
|
||||
// calculateImagePullingTime computes the total time spent pulling images,
|
||||
// accounting for overlapping pull sessions properly
|
||||
func calculateImagePullingTime(sessions []imagePullSession) time.Duration {
|
||||
if len(sessions) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
var totalTime time.Duration
|
||||
var currentEnd time.Time
|
||||
|
||||
for i, session := range sessions {
|
||||
if session.end.IsZero() {
|
||||
continue
|
||||
}
|
||||
|
||||
if i == 0 || session.start.After(currentEnd) {
|
||||
// First session or no overlap with previous session
|
||||
totalTime += session.end.Sub(session.start)
|
||||
currentEnd = session.end
|
||||
} else if session.end.After(currentEnd) {
|
||||
// Partial overlap - add only the non-overlapping part
|
||||
totalTime += session.end.Sub(currentEnd)
|
||||
currentEnd = session.end
|
||||
}
|
||||
// If session.end <= currentEnd, it's completely overlapped
|
||||
}
|
||||
|
||||
return totalTime
|
||||
}
|
||||
|
||||
func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.UID) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
|
@ -136,9 +211,8 @@ func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.U
|
|||
return
|
||||
}
|
||||
|
||||
if state.firstStartedPulling.IsZero() {
|
||||
state.firstStartedPulling = p.clock.Now()
|
||||
}
|
||||
now := p.clock.Now()
|
||||
state.imagePullSessionsStarts = append(state.imagePullSessionsStarts, now)
|
||||
}
|
||||
|
||||
func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.UID) {
|
||||
|
|
@ -150,8 +224,48 @@ func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.
|
|||
return
|
||||
}
|
||||
|
||||
if !state.firstStartedPulling.IsZero() {
|
||||
state.lastFinishedPulling = p.clock.Now() // Now is always grater than values from the past.
|
||||
now := p.clock.Now()
|
||||
|
||||
// Complete the oldest pull session if we have active starts
|
||||
if len(state.imagePullSessionsStarts) > 0 {
|
||||
// Take the first (oldest) start and create a session
|
||||
startTime := state.imagePullSessionsStarts[0]
|
||||
session := imagePullSession{
|
||||
start: startTime,
|
||||
end: now,
|
||||
}
|
||||
state.imagePullSessions = append(state.imagePullSessions, session)
|
||||
state.imagePullSessionsStarts = state.imagePullSessionsStarts[1:]
|
||||
}
|
||||
}
|
||||
|
||||
func (p *basicPodStartupLatencyTracker) RecordInitContainerStarted(podUID types.UID, startedAt time.Time) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
||||
state := p.pods[podUID]
|
||||
if state == nil {
|
||||
return
|
||||
}
|
||||
|
||||
state.currentInitContainerStart = startedAt
|
||||
}
|
||||
|
||||
func (p *basicPodStartupLatencyTracker) RecordInitContainerFinished(podUID types.UID, finishedAt time.Time) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
||||
state := p.pods[podUID]
|
||||
if state == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !state.currentInitContainerStart.IsZero() {
|
||||
initDuration := finishedAt.Sub(state.currentInitContainerStart)
|
||||
if initDuration > 0 {
|
||||
state.totalInitContainerRuntime += initDuration
|
||||
}
|
||||
state.currentInitContainerStart = time.Time{}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -197,9 +311,42 @@ func hasPodStartedSLO(pod *v1.Pod) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// isStatefulPod determines if a pod is stateful according to the SLI documentation:
|
||||
// "A stateful pod is defined as a pod that mounts at least one volume with sources
|
||||
// other than secrets, config maps, downward API and empty dir."
|
||||
// This should reflect the "stateful pod" definition
|
||||
// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md
|
||||
func isStatefulPod(pod *v1.Pod) bool {
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
// Check if this volume is NOT a stateless/ephemeral type
|
||||
if volume.Secret == nil &&
|
||||
volume.ConfigMap == nil &&
|
||||
volume.DownwardAPI == nil &&
|
||||
volume.EmptyDir == nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isPodUnschedulable determines if a pod should be excluded from SLI tracking
|
||||
// according to the SLI definition: "By schedulable pod we mean a pod that has to be
|
||||
// immediately (without actions from any other components) schedulable in the cluster
|
||||
// without causing any preemption."
|
||||
// Any pod with PodScheduled=False is not immediately schedulable and should be excluded.
|
||||
func isPodUnschedulable(pod *v1.Pod) bool {
|
||||
for _, condition := range pod.Status.Conditions {
|
||||
if condition.Type == v1.PodScheduled && condition.Status == v1.ConditionFalse {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *basicPodStartupLatencyTracker) DeletePodStartupState(podUID types.UID) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
||||
delete(p.pods, podUID)
|
||||
delete(p.excludedPods, podUID)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,8 @@ func TestNoEvents(t *testing.T) {
|
|||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
pods: map[types.UID]*perPodState{},
|
||||
excludedPods: map[types.UID]bool{},
|
||||
}
|
||||
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
|
||||
|
|
@ -68,7 +69,8 @@ func TestPodsRunningBeforeKubeletStarted(t *testing.T) {
|
|||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
pods: map[types.UID]*perPodState{},
|
||||
excludedPods: map[types.UID]bool{},
|
||||
}
|
||||
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
|
||||
|
|
@ -129,7 +131,7 @@ kubelet_pod_start_sli_duration_seconds_count 1
|
|||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
|
|
@ -145,9 +147,9 @@ kubelet_pod_start_sli_duration_seconds_count 1
|
|||
if !ok {
|
||||
t.Errorf("expected to track pod: %s, but pod not found", podInit.UID)
|
||||
}
|
||||
if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) {
|
||||
t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s",
|
||||
podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling)
|
||||
|
||||
if len(podState.imagePullSessions) != 1 {
|
||||
t.Errorf("expected one image pull session to be recorded")
|
||||
}
|
||||
|
||||
podStarted := buildRunningPod()
|
||||
|
|
@ -170,7 +172,7 @@ kubelet_pod_start_sli_duration_seconds_count 1
|
|||
|
||||
func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) {
|
||||
|
||||
t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
|
||||
t.Run("single pod; started in 30s, overlapping image pulling between 10th and 20th seconds", func(t *testing.T) {
|
||||
|
||||
wants := `
|
||||
# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
|
||||
|
|
@ -210,27 +212,25 @@ kubelet_pod_start_sli_duration_seconds_count 1
|
|||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
podInitializing := buildInitializingPod()
|
||||
tracker.ObservedPodOnWatch(podInitializing, frozenTime)
|
||||
|
||||
// image pulling started at 10s and the last one finished at 30s
|
||||
// first image starts pulling at 10s
|
||||
// Image 1: 10-16s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 10))
|
||||
tracker.RecordImageStartedPulling(podInitializing.UID)
|
||||
// second image starts pulling at 11s
|
||||
// Image 2: 11-18s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 11))
|
||||
tracker.RecordImageStartedPulling(podInitializing.UID)
|
||||
// third image starts pulling at 14s
|
||||
// Image 3: 14-20s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 14))
|
||||
tracker.RecordImageStartedPulling(podInitializing.UID)
|
||||
// first image finished pulling at 18s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 16))
|
||||
tracker.RecordImageFinishedPulling(podInitializing.UID)
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 18))
|
||||
tracker.RecordImageFinishedPulling(podInitializing.UID)
|
||||
// second and third finished pulling at 20s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 20))
|
||||
tracker.RecordImageFinishedPulling(podInitializing.UID)
|
||||
|
||||
|
|
@ -238,13 +238,13 @@ kubelet_pod_start_sli_duration_seconds_count 1
|
|||
if !ok {
|
||||
t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID)
|
||||
}
|
||||
if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) { // second and third image start pulling should not affect pod firstStartedPulling
|
||||
t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s",
|
||||
podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling)
|
||||
if len(podState.imagePullSessions) != 3 {
|
||||
t.Errorf("expected 3 image pull sessions but got %d", len(podState.imagePullSessions))
|
||||
}
|
||||
if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) { // should be updated when the pod's last image finished pulling
|
||||
t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s",
|
||||
podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling)
|
||||
totalTime := calculateImagePullingTime(podState.imagePullSessions)
|
||||
expectedTime := time.Second * 10
|
||||
if totalTime != expectedTime {
|
||||
t.Errorf("expected total pulling time: %v but got %v", expectedTime, totalTime)
|
||||
}
|
||||
|
||||
// pod started
|
||||
|
|
@ -275,6 +275,324 @@ kubelet_pod_start_sli_duration_seconds_count 1
|
|||
})
|
||||
}
|
||||
|
||||
func TestPodWithInitContainersAndMainContainers(t *testing.T) {
|
||||
|
||||
t.Run("single pod with multiple init containers and main containers", func(t *testing.T) {
|
||||
|
||||
wants := `
|
||||
# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch
|
||||
# TYPE kubelet_pod_start_sli_duration_seconds histogram
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1
|
||||
kubelet_pod_start_sli_duration_seconds_sum 4.2
|
||||
kubelet_pod_start_sli_duration_seconds_count 1
|
||||
`
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(frozenTime)
|
||||
|
||||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
podInit := buildInitializingPod("init-1", "init-2")
|
||||
tracker.ObservedPodOnWatch(podInit, frozenTime)
|
||||
|
||||
// Init container 1 image pull: 0.5s-1.5s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 500))
|
||||
tracker.RecordImageStartedPulling(podInit.UID)
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 1500))
|
||||
tracker.RecordImageFinishedPulling(podInit.UID)
|
||||
// Init container 1 runtime: 2s-4s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 2))
|
||||
tracker.RecordInitContainerStarted(types.UID(uid), fakeClock.Now())
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 4))
|
||||
tracker.RecordInitContainerFinished(types.UID(uid), fakeClock.Now())
|
||||
|
||||
// Init container 2 image pull: 4.2s-5.2s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 4200))
|
||||
tracker.RecordImageStartedPulling(podInit.UID)
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 5200))
|
||||
tracker.RecordImageFinishedPulling(podInit.UID)
|
||||
// Init container 2 runtime: 5.5s-7s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 5500))
|
||||
tracker.RecordInitContainerStarted(types.UID(uid), fakeClock.Now())
|
||||
fakeClock.SetTime(frozenTime.Add(time.Second * 7))
|
||||
tracker.RecordInitContainerFinished(types.UID(uid), fakeClock.Now())
|
||||
|
||||
// Main container 1 image pull: 7.2s-8.2s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 7200))
|
||||
tracker.RecordImageStartedPulling(podInit.UID)
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 8200))
|
||||
tracker.RecordImageFinishedPulling(podInit.UID)
|
||||
// Main container 2 image pull: 7.3s-8.5s
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 7300))
|
||||
tracker.RecordImageStartedPulling(podInit.UID)
|
||||
fakeClock.SetTime(frozenTime.Add(time.Millisecond * 8500))
|
||||
tracker.RecordImageFinishedPulling(podInit.UID)
|
||||
|
||||
// Pod becomes running at 11s
|
||||
podStarted := buildRunningPod()
|
||||
tracker.RecordStatusUpdated(podStarted)
|
||||
tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*11))
|
||||
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
state := tracker.pods[types.UID(uid)]
|
||||
assert.NotNil(t, state, "Pod state should exist")
|
||||
|
||||
expectedInitRuntime := 2*time.Second + 1500*time.Millisecond
|
||||
assert.Equal(t, expectedInitRuntime, state.totalInitContainerRuntime,
|
||||
"Total init container runtime should be 3.5s (2s + 1.5s)")
|
||||
|
||||
totalImageTime := calculateImagePullingTime(state.imagePullSessions)
|
||||
expectedImageTime := 3300 * time.Millisecond // 1s + 1s + 1.3s concurrent overlap
|
||||
assert.Equal(t, expectedImageTime, totalImageTime,
|
||||
"Image pulling time should be 3.3s: init-1(1s) + init-2(1s) + concurrent-main(1.3s)")
|
||||
|
||||
assert.Len(t, state.imagePullSessions, 4, "Should have 4 image pull sessions")
|
||||
|
||||
// cleanup
|
||||
tracker.DeletePodStartupState(podStarted.UID)
|
||||
|
||||
assert.Empty(t, tracker.pods)
|
||||
metrics.PodStartSLIDuration.Reset()
|
||||
})
|
||||
}
|
||||
|
||||
func TestImmediatelySchedulablePods(t *testing.T) {
|
||||
t.Run("pods not immediately schedulable should be excluded from tracking", func(t *testing.T) {
|
||||
wants := ""
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(frozenTime)
|
||||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
// pod that is not immediately schedulable (PodScheduled=False)
|
||||
podNotSchedulable := buildInitializingPod()
|
||||
podNotSchedulable.UID = "not-schedulable-pod"
|
||||
podNotSchedulable.Status.Conditions = []corev1.PodCondition{
|
||||
{
|
||||
Type: corev1.PodScheduled,
|
||||
Status: corev1.ConditionFalse,
|
||||
Reason: "Unschedulable",
|
||||
},
|
||||
}
|
||||
tracker.ObservedPodOnWatch(podNotSchedulable, frozenTime)
|
||||
assert.Empty(t, tracker.pods, "Pod with PodScheduled=False should not be tracked")
|
||||
|
||||
// pod that is immediately schedulable (PodScheduled=True)
|
||||
podSchedulable := buildInitializingPod()
|
||||
podSchedulable.UID = "schedulable-pod"
|
||||
podSchedulable.Status.Conditions = []corev1.PodCondition{
|
||||
{
|
||||
Type: corev1.PodScheduled,
|
||||
Status: corev1.ConditionTrue,
|
||||
Reason: "Scheduled",
|
||||
},
|
||||
}
|
||||
tracker.ObservedPodOnWatch(podSchedulable, frozenTime)
|
||||
assert.Len(t, tracker.pods, 1, "Pod with PodScheduled=True should be tracked")
|
||||
|
||||
// pod without PodScheduled condition
|
||||
podNoCondition := buildInitializingPod()
|
||||
podNoCondition.UID = "no-condition-pod"
|
||||
tracker.ObservedPodOnWatch(podNoCondition, frozenTime)
|
||||
assert.Len(t, tracker.pods, 2, "Pod without PodScheduled condition should be tracked by default")
|
||||
|
||||
// Verify metrics are empty
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// cleanup
|
||||
tracker.DeletePodStartupState(podSchedulable.UID)
|
||||
tracker.DeletePodStartupState(podNoCondition.UID)
|
||||
assert.Empty(t, tracker.pods)
|
||||
metrics.PodStartSLIDuration.Reset()
|
||||
})
|
||||
|
||||
t.Run("pod observed as schedulable first, then becomes unschedulable should not be tracked", func(t *testing.T) {
|
||||
wants := ""
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(frozenTime)
|
||||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
// First observe pod as schedulable
|
||||
podSchedulable := buildInitializingPod()
|
||||
podSchedulable.UID = "becomes-unschedulable"
|
||||
tracker.ObservedPodOnWatch(podSchedulable, frozenTime)
|
||||
assert.Len(t, tracker.pods, 1, "Pod should be tracked initially")
|
||||
|
||||
// Later observe the same pod as unschedulable
|
||||
podUnschedulable := buildInitializingPod()
|
||||
podUnschedulable.UID = "becomes-unschedulable"
|
||||
podUnschedulable.Status.Conditions = []corev1.PodCondition{
|
||||
{
|
||||
Type: corev1.PodScheduled,
|
||||
Status: corev1.ConditionFalse,
|
||||
Reason: "Unschedulable",
|
||||
},
|
||||
}
|
||||
|
||||
tracker.ObservedPodOnWatch(podUnschedulable, frozenTime.Add(time.Second))
|
||||
assert.Empty(t, tracker.pods, "Pod should be removed when it becomes unschedulable")
|
||||
|
||||
// Verify no metrics
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
metrics.PodStartSLIDuration.Reset()
|
||||
})
|
||||
|
||||
t.Run("pod observed as unschedulable first, then becomes schedulable should remain not tracked", func(t *testing.T) {
|
||||
wants := ""
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(frozenTime)
|
||||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
// First observe pod as unschedulable
|
||||
podUnschedulable := buildInitializingPod()
|
||||
podUnschedulable.UID = "unschedulable-first"
|
||||
podUnschedulable.Status.Conditions = []corev1.PodCondition{
|
||||
{
|
||||
Type: corev1.PodScheduled,
|
||||
Status: corev1.ConditionFalse,
|
||||
Reason: "Unschedulable",
|
||||
},
|
||||
}
|
||||
|
||||
tracker.ObservedPodOnWatch(podUnschedulable, frozenTime)
|
||||
assert.Empty(t, tracker.pods, "Pod should not be tracked when first observed as unschedulable")
|
||||
assert.True(t, tracker.excludedPods[podUnschedulable.UID], "Pod should be in excludedPods map")
|
||||
|
||||
// Later observe the same pod as schedulable (e.g., after cluster autoscaling)
|
||||
podSchedulable := buildInitializingPod()
|
||||
podSchedulable.UID = "unschedulable-first"
|
||||
podSchedulable.Status.Conditions = []corev1.PodCondition{
|
||||
{
|
||||
Type: corev1.PodScheduled,
|
||||
Status: corev1.ConditionTrue,
|
||||
Reason: "Scheduled",
|
||||
},
|
||||
}
|
||||
|
||||
tracker.ObservedPodOnWatch(podSchedulable, frozenTime.Add(time.Second*5))
|
||||
assert.Empty(t, tracker.pods, "Pod should remain excluded even after becoming schedulable")
|
||||
assert.True(t, tracker.excludedPods[podSchedulable.UID], "Pod should remain in excludedPods map")
|
||||
|
||||
// Complete the startup process - should not record metrics
|
||||
podRunning := buildRunningPod()
|
||||
podRunning.UID = "unschedulable-first"
|
||||
tracker.RecordStatusUpdated(podRunning)
|
||||
tracker.ObservedPodOnWatch(podRunning, frozenTime.Add(time.Second*10))
|
||||
|
||||
// Verify no SLI metrics recorded
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// cleanup
|
||||
tracker.DeletePodStartupState(podRunning.UID)
|
||||
assert.Empty(t, tracker.pods)
|
||||
assert.False(t, tracker.excludedPods[podRunning.UID], "Pod should be removed from excludedPods on cleanup")
|
||||
metrics.PodStartSLIDuration.Reset()
|
||||
})
|
||||
}
|
||||
|
||||
func TestStatefulPodExclusion(t *testing.T) {
|
||||
t.Run("stateful pods should be excluded from SLI metrics", func(t *testing.T) {
|
||||
wantsSLI := ""
|
||||
|
||||
fakeClock := testingclock.NewFakeClock(frozenTime)
|
||||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
// Create a stateful pod (with PVC volume)
|
||||
statefulPod := buildInitializingPod()
|
||||
statefulPod.UID = "stateful-pod"
|
||||
statefulPod.Spec.Volumes = []corev1.Volume{
|
||||
{
|
||||
Name: "persistent-storage",
|
||||
VolumeSource: corev1.VolumeSource{
|
||||
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
|
||||
ClaimName: "test-pvc",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
tracker.ObservedPodOnWatch(statefulPod, frozenTime)
|
||||
|
||||
statefulPodRunning := buildRunningPod()
|
||||
statefulPodRunning.UID = "stateful-pod"
|
||||
statefulPodRunning.Spec.Volumes = statefulPod.Spec.Volumes
|
||||
tracker.RecordStatusUpdated(statefulPodRunning)
|
||||
|
||||
// Observe pod as running after 3 seconds
|
||||
tracker.ObservedPodOnWatch(statefulPodRunning, frozenTime.Add(time.Second*3))
|
||||
|
||||
// Verify no SLI metrics for stateful pod (
|
||||
if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wantsSLI), metricsName); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// cleanup
|
||||
tracker.DeletePodStartupState(statefulPod.UID)
|
||||
assert.Empty(t, tracker.pods)
|
||||
metrics.PodStartSLIDuration.Reset()
|
||||
})
|
||||
}
|
||||
|
||||
func TestFirstNetworkPodMetrics(t *testing.T) {
|
||||
|
||||
t.Run("first network pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) {
|
||||
|
|
@ -290,7 +608,7 @@ kubelet_first_network_pod_start_sli_duration_seconds 30
|
|||
metrics.Register()
|
||||
|
||||
tracker := &basicPodStartupLatencyTracker{
|
||||
pods: map[types.UID]*perPodState{},
|
||||
pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{},
|
||||
clock: fakeClock,
|
||||
}
|
||||
|
||||
|
|
@ -338,10 +656,20 @@ kubelet_first_network_pod_start_sli_duration_seconds 30
|
|||
})
|
||||
}
|
||||
|
||||
func buildInitializingPod() *corev1.Pod {
|
||||
return buildPodWithStatus([]corev1.ContainerStatus{
|
||||
func buildInitializingPod(initContainerNames ...string) *corev1.Pod {
|
||||
pod := buildPodWithStatus([]corev1.ContainerStatus{
|
||||
{State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}},
|
||||
})
|
||||
|
||||
// Add init containers if specified
|
||||
if len(initContainerNames) > 0 {
|
||||
pod.Spec.InitContainers = make([]corev1.Container, len(initContainerNames))
|
||||
for i, name := range initContainerNames {
|
||||
pod.Spec.InitContainers[i] = corev1.Container{Name: name}
|
||||
}
|
||||
}
|
||||
|
||||
return pod
|
||||
}
|
||||
|
||||
func buildRunningPod() *corev1.Pod {
|
||||
|
|
@ -361,3 +689,78 @@ func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod {
|
|||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalculateImagePullingTime(t *testing.T) {
|
||||
baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
|
||||
t.Run("empty sessions should return zero", func(t *testing.T) {
|
||||
sessions := []imagePullSession{}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
assert.Equal(t, time.Duration(0), result)
|
||||
})
|
||||
|
||||
t.Run("incomplete sessions should be ignored (for example: kubelet restart)", func(t *testing.T) {
|
||||
sessions := []imagePullSession{
|
||||
{start: baseTime, end: baseTime.Add(3 * time.Second)}, // valid: 3s
|
||||
{start: baseTime.Add(5 * time.Second), end: time.Time{}}, // incomplete, ignored
|
||||
{start: baseTime.Add(10 * time.Second), end: baseTime.Add(12 * time.Second)}, // valid: 2s
|
||||
}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
assert.Equal(t, 5*time.Second, result) // 3s + 2s
|
||||
})
|
||||
|
||||
t.Run("non-overlapping sessions", func(t *testing.T) {
|
||||
sessions := []imagePullSession{
|
||||
{start: baseTime, end: baseTime.Add(2 * time.Second)}, // 2s
|
||||
{start: baseTime.Add(5 * time.Second), end: baseTime.Add(8 * time.Second)}, // 3s
|
||||
{start: baseTime.Add(10 * time.Second), end: baseTime.Add(15 * time.Second)}, // 5s
|
||||
}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
assert.Equal(t, 10*time.Second, result) // 2s + 3s + 5s
|
||||
})
|
||||
|
||||
t.Run("partially overlapping sessions", func(t *testing.T) {
|
||||
sessions := []imagePullSession{
|
||||
{start: baseTime, end: baseTime.Add(5 * time.Second)}, // 0-5s
|
||||
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(8 * time.Second)}, // 3-8s (overlap 3-5s)
|
||||
{start: baseTime.Add(7 * time.Second), end: baseTime.Add(12 * time.Second)}, // 7-12s (overlap 7-8s)
|
||||
}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
assert.Equal(t, 12*time.Second, result) // 12s
|
||||
})
|
||||
|
||||
t.Run("completely overlapped sessions", func(t *testing.T) {
|
||||
sessions := []imagePullSession{
|
||||
{start: baseTime, end: baseTime.Add(10 * time.Second)}, // 0-10s
|
||||
{start: baseTime.Add(2 * time.Second), end: baseTime.Add(8 * time.Second)}, // 2-8s (completely inside)
|
||||
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(5 * time.Second)}, // 3-5s (completely inside)
|
||||
}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
assert.Equal(t, 10*time.Second, result) // Only outer session: 10s
|
||||
})
|
||||
|
||||
t.Run("partially and completely overlapping sessions", func(t *testing.T) {
|
||||
sessions := []imagePullSession{
|
||||
{start: baseTime, end: baseTime.Add(10 * time.Second)}, // 0-10s
|
||||
{start: baseTime.Add(2 * time.Second), end: baseTime.Add(5 * time.Second)}, // 2-5s (completely inside first)
|
||||
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(7 * time.Second)}, // 3-7s (completely inside first and partially overlapping second)
|
||||
}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
assert.Equal(t, 10*time.Second, result) // 10s
|
||||
})
|
||||
|
||||
t.Run("completely overlapped and partially overlapped sessions with gap", func(t *testing.T) {
|
||||
// Test mixing completely overlapped sessions with separate sessions
|
||||
sessions := []imagePullSession{
|
||||
{start: baseTime, end: baseTime.Add(8 * time.Second)}, // 0-8s
|
||||
{start: baseTime.Add(2 * time.Second), end: baseTime.Add(6 * time.Second)}, // 2-6s (completely inside first)
|
||||
{start: baseTime.Add(3 * time.Second), end: baseTime.Add(5 * time.Second)}, // 3-5s (completely inside first)
|
||||
{start: baseTime.Add(15 * time.Second), end: baseTime.Add(20 * time.Second)}, // 15-20s (separate)
|
||||
{start: baseTime.Add(16 * time.Second), end: baseTime.Add(18 * time.Second)}, // 16-18s (completely inside fourth)
|
||||
{start: baseTime.Add(17 * time.Second), end: baseTime.Add(21 * time.Second)}, // 17-21s (partially overlapping fourth and fifth)
|
||||
}
|
||||
result := calculateImagePullingTime(sessions)
|
||||
// 8s (first group) + 6s (second group) = 14s
|
||||
assert.Equal(t, 14*time.Second, result)
|
||||
})
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue