mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-03 20:40:26 -05:00
fix: exclude init container + image pulling time and check pod is stateless
This commit is contained in:
parent
81c14b7dd5
commit
871f6e1ebe
1 changed files with 89 additions and 11 deletions
|
|
@ -106,14 +106,67 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
|
|||
ctx := context.TODO()
|
||||
logger := klog.FromContext(ctx)
|
||||
podStartingDuration := when.Sub(pod.CreationTimestamp.Time)
|
||||
imagePullingDuration := state.lastFinishedPulling.Sub(state.firstStartedPulling)
|
||||
initContainerDuration := state.lastInitContainerFinish.Sub(state.firstInitContainerStart)
|
||||
podStartSLOduration := (podStartingDuration - imagePullingDuration - initContainerDuration).Seconds()
|
||||
var podStartSLOduration time.Duration
|
||||
var excludedTimeStart time.Time
|
||||
var excludedTimeEnd time.Time
|
||||
|
||||
// Add time from pod creation to first excluded activity starts (either image pulling or init containers starting)
|
||||
if !state.firstStartedPulling.IsZero() && !state.firstInitContainerStart.IsZero() {
|
||||
if state.firstStartedPulling.Before(state.firstInitContainerStart) {
|
||||
excludedTimeStart = state.firstStartedPulling
|
||||
} else {
|
||||
excludedTimeStart = state.firstInitContainerStart
|
||||
}
|
||||
} else if !state.firstStartedPulling.IsZero() {
|
||||
excludedTimeStart = state.firstStartedPulling
|
||||
} else if !state.firstInitContainerStart.IsZero() {
|
||||
excludedTimeStart = state.firstInitContainerStart
|
||||
}
|
||||
|
||||
if !excludedTimeStart.IsZero() {
|
||||
preExcludedDuration := excludedTimeStart.Sub(pod.CreationTimestamp.Time)
|
||||
if preExcludedDuration > 0 {
|
||||
podStartSLOduration += preExcludedDuration
|
||||
}
|
||||
}
|
||||
|
||||
// Add gap between image pulling end and init container start if there is any
|
||||
if !state.lastFinishedPulling.IsZero() && !state.firstInitContainerStart.IsZero() {
|
||||
// Only count gap if init container starts after image pulling ends (no overlap)
|
||||
if state.firstInitContainerStart.After(state.lastFinishedPulling) {
|
||||
gapDuration := state.firstInitContainerStart.Sub(state.lastFinishedPulling)
|
||||
if gapDuration > 0 {
|
||||
podStartSLOduration += gapDuration
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add time from last dependency completion to containers running
|
||||
if state.lastFinishedPulling.After(state.lastInitContainerFinish) {
|
||||
excludedTimeEnd = state.lastFinishedPulling
|
||||
} else if !state.lastInitContainerFinish.IsZero() {
|
||||
excludedTimeEnd = state.lastInitContainerFinish
|
||||
} else if !state.lastFinishedPulling.IsZero() {
|
||||
excludedTimeEnd = state.lastFinishedPulling
|
||||
}
|
||||
|
||||
if !excludedTimeEnd.IsZero() {
|
||||
postExcludedDuration := when.Sub(excludedTimeEnd)
|
||||
if postExcludedDuration > 0 {
|
||||
podStartSLOduration += postExcludedDuration
|
||||
}
|
||||
} else if excludedTimeStart.IsZero() {
|
||||
// No dependencies at all, count entire duration
|
||||
podStartSLOduration = podStartingDuration
|
||||
}
|
||||
|
||||
isStatefulPod := isStatefulPod(pod)
|
||||
|
||||
logger.Info("Observed pod startup duration",
|
||||
"pod", klog.KObj(pod),
|
||||
"podStartSLOduration", podStartSLOduration,
|
||||
"podStartSLOduration", podStartSLOduration.Seconds(),
|
||||
"podStartE2EDuration", podStartingDuration,
|
||||
"isStatefulPod", isStatefulPod,
|
||||
"podCreationTimestamp", pod.CreationTimestamp.Time,
|
||||
"firstStartedPulling", state.firstStartedPulling,
|
||||
"lastFinishedPulling", state.lastFinishedPulling,
|
||||
|
|
@ -122,15 +175,17 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim
|
|||
"observedRunningTime", state.observedRunningTime,
|
||||
"watchObservedRunningTime", when)
|
||||
|
||||
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration)
|
||||
metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds())
|
||||
state.metricRecorded = true
|
||||
// if is the first Pod with network track the start values
|
||||
// these metrics will help to identify problems with the CNI plugin
|
||||
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
|
||||
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration)
|
||||
p.firstNetworkPodSeen = true
|
||||
if !isStatefulPod {
|
||||
metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration.Seconds())
|
||||
// if is the first Pod with network track the start values
|
||||
// these metrics will help to identify problems with the CNI plugin
|
||||
if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen {
|
||||
metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration.Seconds())
|
||||
p.firstNetworkPodSeen = true
|
||||
}
|
||||
}
|
||||
state.metricRecorded = true
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -232,6 +287,29 @@ func hasPodStartedSLO(pod *v1.Pod) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// isStatefulPod determines if a pod is stateful according to the SLI documentation:
|
||||
// "A stateful pod is defined as a pod that mounts at least one volume with sources
|
||||
// other than secrets, config maps, downward API and empty dir."
|
||||
// This should reflect the "stateful pod" definition
|
||||
// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md
|
||||
func isStatefulPod(pod *v1.Pod) bool {
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
// Check if this volume is NOT a stateless/ephemeral type
|
||||
if volume.Secret == nil &&
|
||||
volume.ConfigMap == nil &&
|
||||
volume.DownwardAPI == nil &&
|
||||
volume.EmptyDir == nil &&
|
||||
volume.Projected == nil &&
|
||||
volume.GitRepo == nil &&
|
||||
volume.Image == nil &&
|
||||
volume.Ephemeral == nil &&
|
||||
(volume.CSI == nil || volume.CSI.VolumeAttributes["csi.storage.k8s.io/ephemeral"] != "true") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *basicPodStartupLatencyTracker) DeletePodStartupState(podUID types.UID) {
|
||||
p.lock.Lock()
|
||||
defer p.lock.Unlock()
|
||||
|
|
|
|||
Loading…
Reference in a new issue