Merge pull request #129240 from KevinTMtz/evict-terminated-pods-on-disk-pressure

E2e test for cleaning of terminated containers
2026-02-03 20:40:26 -05:00 · 2025-09-10 11:47:57 -07:00 · 2025-09-10 11:47:57 -07:00 · e9eef19379
commit e9eef19379
parent 91bb2847af 86e3ad233f
1 changed files with 141 additions and 63 deletions
--- a/test/e2e_node/eviction_test.go
+++ b/test/e2e_node/eviction_test.go
@ -73,19 +73,19 @@ const (
 var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("inode-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	expectedNodeCondition := v1.NodeDiskPressure
-	expectedStarvedResource := resourceInodes
-	pressureTimeout := 15 * time.Minute
-	inodesConsumed := uint64(200000)
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = resourceInodes
+	const pressureTimeout = 15 * time.Minute
+	const inodesToThreshold = uint64(100000)
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
-			// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
+			// Set the eviction threshold to inodesFree - inodesToThreshold, so that using inodesToThreshold causes an eviction.
 			summary := eventuallyGetSummary(ctx)
 			inodesFree := *summary.Node.Fs.InodesFree
-			if inodesFree <= inodesConsumed {
+			if inodesFree <= inodesToThreshold {
 				e2eskipper.Skipf("Too few inodes free on the host for the InodeEviction test to run")
 			}
-			initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
+			initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesToThreshold)}
 			initialConfig.EvictionMinimumReclaim = map[string]string{}
 		})
 		runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
@ -93,7 +93,7 @@ var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(
 				evictionPriority: 1,
 				// TODO(#127864): Container runtime may not immediate free up the resources after the pod eviction,
 				// causing the test to fail. We provision an emptyDir volume to avoid relying on the runtime behavior.
-				pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}),
+				pod: inodeConsumingPod("volume-inode-hog", lotsOfFiles, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, true),
 			},
 			{
 				evictionPriority: 0,
@ -107,14 +107,14 @@ var _ = SIGDescribe("InodeEviction", framework.WithSlow(), framework.WithSerial(
 // by reclaiming resources(inodes) through image garbage collection.
 // Disk pressure is induced by consuming a lot of inodes on the node.
 // Images are pre-pulled before running the test workload to ensure
-// that the image garbage collerctor can remove them to avoid eviction.
+// that the image garbage collector can remove them to avoid eviction.
 var _ = SIGDescribe("ImageGCNoEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("image-gc-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	pressureTimeout := 10 * time.Minute
-	expectedNodeCondition := v1.NodeDiskPressure
-	expectedStarvedResource := resourceInodes
-	inodesConsumed := uint64(100000)
+	const pressureTimeout = 10 * time.Minute
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = resourceInodes
+	const inodesToThreshold = uint64(100000)
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		prepull := func(ctx context.Context) {
 			// Prepull images for image garbage collector to remove them
@ -128,22 +128,74 @@ var _ = SIGDescribe("ImageGCNoEviction", framework.WithSlow(), framework.WithSer
 		}

 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
-			// Set the eviction threshold to inodesFree - inodesConsumed, so that using inodesConsumed causes an eviction.
+			// Set the eviction threshold to inodesFree - inodesToThreshold, so that using inodesToThreshold causes an eviction.
 			summary := eventuallyGetSummary(ctx)
 			inodesFree := *summary.Node.Fs.InodesFree
-			if inodesFree <= inodesConsumed {
+			if inodesFree <= inodesToThreshold {
 				e2eskipper.Skipf("Too few inodes free on the host for the InodeEviction test to run")
 			}
-			framework.Logf("Setting eviction threshold to %d inodes", inodesFree-inodesConsumed)
-			initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesConsumed)}
+			framework.Logf("Setting eviction threshold to %d inodes", inodesFree-inodesToThreshold)
+			initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesToThreshold)}
 			initialConfig.EvictionMinimumReclaim = map[string]string{}
 		})
 		// Consume enough inodes to induce disk pressure,
 		// but expect that image garbage collection can reduce it enough to avoid an eviction
-		runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logDiskMetrics, []podEvictSpec{
+		runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
 			{
 				evictionPriority: 0,
-				pod:              inodeConsumingPod("container-inode", 110000, nil),
+				pod:              inodeConsumingPod("container-inode", 110000, nil, true),
+			},
+		})
+	})
+})
+
+// ImageGCTerminatedPodsContainersCleanup tests that the node responds to disk
+// pressure by cleaning up containers from terminated pods without evicting
+// running pods, when reclaiming their resources freed enough space to eliminate
+// the disk pressure
+// Disk pressure is induced by pulling large images
+var _ = SIGDescribe("ImageGCTerminatedPodsContainersCleanup", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
+	f := framework.NewDefaultFramework("image-gc-terminated-pods-containers-cleanup-test")
+	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
+	const pressureTimeout = 10 * time.Minute
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = resourceInodes
+	const inodesToThreshold = uint64(50000)
+
+	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
+		prepull := func(ctx context.Context) {
+			// Prepull images for image garbage collector to remove them
+			// when reclaiming resources
+			err := PrePullAllImages(ctx)
+			gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
+		}
+		ginkgo.BeforeEach(prepull)
+		if framework.TestContext.PrepullImages {
+			ginkgo.AfterEach(prepull)
+		}
+
+		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
+			// Set the eviction threshold to inodesFree - inodesToThreshold, so that using inodesToThreshold causes an eviction.
+			summary := eventuallyGetSummary(ctx)
+			inodesFree := *summary.Node.Fs.InodesFree
+			if inodesFree <= inodesToThreshold {
+				e2eskipper.Skipf("Too few inodes free on the host for the InodeEviction test to run")
+			}
+			framework.Logf("Setting eviction threshold to %d inodes", inodesFree-inodesToThreshold)
+			initialConfig.EvictionHard = map[string]string{string(evictionapi.SignalNodeFsInodesFree): fmt.Sprintf("%d", inodesFree-inodesToThreshold)}
+			initialConfig.EvictionMinimumReclaim = map[string]string{}
+		})
+		runEvictionTest(f, pressureTimeout, expectedNodeCondition, expectedStarvedResource, logInodeMetrics, []podEvictSpec{
+			// Setting the volume as nil, will make the file to be written to the
+			// container writable layer, which defaults to using the node's root fs.
+			{
+				evictionPriority: 0,
+				pod:              inodeConsumingPod("disk-hog", 30000, nil, false),
+				expectToSucceed:  true,
+			},
+			{
+				evictionPriority: 0,
+				pod:              inodeConsumingPod("container-inode", 30000, &v1.VolumeSource{EmptyDir: &v1.EmptyDirVolumeSource{}}, true),
 			},
 		})
 	})
@ -154,9 +206,9 @@ var _ = SIGDescribe("ImageGCNoEviction", framework.WithSlow(), framework.WithSer
 var _ = SIGDescribe("MemoryAllocatableEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("memory-allocatable-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	expectedNodeCondition := v1.NodeMemoryPressure
-	expectedStarvedResource := v1.ResourceMemory
-	pressureTimeout := 10 * time.Minute
+	const expectedNodeCondition = v1.NodeMemoryPressure
+	const expectedStarvedResource = v1.ResourceMemory
+	const pressureTimeout = 10 * time.Minute
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
 			// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
@ -188,9 +240,9 @@ var _ = SIGDescribe("MemoryAllocatableEviction", framework.WithSlow(), framework
 var _ = SIGDescribe("MemoryAllocatableEvictionPodLevelResources", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.PodLevelResources, func() {
 	f := framework.NewDefaultFramework("memory-allocatable-eviction-test-pod-level-resources")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	expectedNodeCondition := v1.NodeMemoryPressure
-	expectedStarvedResource := v1.ResourceMemory
-	pressureTimeout := 10 * time.Minute
+	const expectedNodeCondition = v1.NodeMemoryPressure
+	const expectedStarvedResource = v1.ResourceMemory
+	const pressureTimeout = 10 * time.Minute
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
 			// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
@ -222,9 +274,9 @@ var _ = SIGDescribe("MemoryAllocatableEvictionPodLevelResources", framework.With
 var _ = SIGDescribe("LocalStorageEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("localstorage-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	pressureTimeout := 15 * time.Minute
-	expectedNodeCondition := v1.NodeDiskPressure
-	expectedStarvedResource := v1.ResourceEphemeralStorage
+	const pressureTimeout = 15 * time.Minute
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = v1.ResourceEphemeralStorage
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {

 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
@ -263,9 +315,9 @@ var _ = SIGDescribe("LocalStorageEviction", framework.WithSlow(), framework.With
 var _ = SIGDescribe("LocalStorageSoftEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("localstorage-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	pressureTimeout := 10 * time.Minute
-	expectedNodeCondition := v1.NodeDiskPressure
-	expectedStarvedResource := v1.ResourceEphemeralStorage
+	const pressureTimeout = 10 * time.Minute
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = v1.ResourceEphemeralStorage
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
 			diskConsumed := resource.MustParse("4Gi")
@ -301,12 +353,12 @@ var _ = SIGDescribe("LocalStorageSoftEviction", framework.WithSlow(), framework.
 var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodSeconds", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("localstorage-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	pressureTimeout := 10 * time.Minute
-	expectedNodeCondition := v1.NodeDiskPressure
-	expectedStarvedResource := v1.ResourceEphemeralStorage
+	const pressureTimeout = 10 * time.Minute
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = v1.ResourceEphemeralStorage

-	evictionMaxPodGracePeriod := 30
-	evictionSoftGracePeriod := 30
+	const evictionMaxPodGracePeriod = 30
+	const evictionSoftGracePeriod = 30
 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
 			diskConsumed := resource.MustParse("4Gi")
@ -340,7 +392,7 @@ var _ = SIGDescribe("LocalStorageSoftEvictionNotOverwriteTerminationGracePeriodS
 var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.LocalStorageCapacityIsolationQuota, feature.Eviction, func() {
 	f := framework.NewDefaultFramework("localstorage-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	evictionTestTimeout := 10 * time.Minute
+	const evictionTestTimeout = 10 * time.Minute
 	ginkgo.Context(fmt.Sprintf(testContextFmt, "evictions due to pod local storage violations"), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
 			// setting a threshold to 0% disables; non-empty map overrides default value (necessary due to omitempty)
@ -393,12 +445,12 @@ var _ = SIGDescribe("LocalStorageCapacityIsolationEviction", framework.WithSlow(
 var _ = SIGDescribe("PriorityMemoryEvictionOrdering", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	expectedNodeCondition := v1.NodeMemoryPressure
-	expectedStarvedResource := v1.ResourceMemory
-	pressureTimeout := 10 * time.Minute
+	const expectedNodeCondition = v1.NodeMemoryPressure
+	const expectedStarvedResource = v1.ResourceMemory
+	const pressureTimeout = 10 * time.Minute

 	highPriorityClassName := f.BaseName + "-high-priority"
-	highPriority := int32(999999999)
+	const highPriority = int32(999999999)

 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
@ -471,12 +523,12 @@ var _ = SIGDescribe("PriorityMemoryEvictionOrdering", framework.WithSlow(), fram
 var _ = SIGDescribe("PriorityMemoryEvictionOrderingPodLevelResources", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.PodLevelResources, func() {
 	f := framework.NewDefaultFramework("priority-memory-eviction-ordering-test-pod-level-resources")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	expectedNodeCondition := v1.NodeMemoryPressure
-	expectedStarvedResource := v1.ResourceMemory
-	pressureTimeout := 10 * time.Minute
+	const expectedNodeCondition = v1.NodeMemoryPressure
+	const expectedStarvedResource = v1.ResourceMemory
+	const pressureTimeout = 10 * time.Minute

 	highPriorityClassName := f.BaseName + "-high-priority"
-	highPriority := int32(999999999)
+	const highPriority = int32(999999999)

 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
@ -538,12 +590,12 @@ var _ = SIGDescribe("PriorityMemoryEvictionOrderingPodLevelResources", framework
 var _ = SIGDescribe("PriorityLocalStorageEvictionOrdering", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("priority-disk-eviction-ordering-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	expectedNodeCondition := v1.NodeDiskPressure
-	expectedStarvedResource := v1.ResourceEphemeralStorage
-	pressureTimeout := 15 * time.Minute
+	const expectedNodeCondition = v1.NodeDiskPressure
+	const expectedStarvedResource = v1.ResourceEphemeralStorage
+	const pressureTimeout = 15 * time.Minute

 	highPriorityClassName := f.BaseName + "-high-priority"
-	highPriority := int32(999999999)
+	const highPriority = int32(999999999)

 	ginkgo.Context(fmt.Sprintf(testContextFmt, expectedNodeCondition), func() {
 		tempSetCurrentKubeletConfig(f, func(ctx context.Context, initialConfig *kubeletconfig.KubeletConfiguration) {
@ -601,16 +653,16 @@ var _ = SIGDescribe("PriorityLocalStorageEvictionOrdering", framework.WithSlow()
 var _ = SIGDescribe("PriorityPidEvictionOrdering", framework.WithSlow(), framework.WithSerial(), framework.WithDisruptive(), feature.Eviction, func() {
 	f := framework.NewDefaultFramework("pidpressure-eviction-test")
 	f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
-	pressureTimeout := 10 * time.Minute
-	expectedNodeCondition := v1.NodePIDPressure
-	expectedStarvedResource := noStarvedResource
+	const pressureTimeout = 10 * time.Minute
+	const expectedNodeCondition = v1.NodePIDPressure
+	const expectedStarvedResource = noStarvedResource

 	highPriorityClassName := f.BaseName + "-high-priority"
-	highPriority := int32(999999999)
+	const highPriority = int32(999999999)
 	// Apparently there is a threshold at around 10,000+. If it's over the threshold,
 	// the processes are likely to be oom-killed instead of evicted.
 	// One test can have at most two pidConsumingPods at a time not to cause oom-kill.
-	processes := 5000
+	const processes = 5000

 	// if criStats is true, PodAndContainerStatsFromCRI will use data from cri instead of cadvisor for kubelet to get pid count of pods
 	for _, criStats := range []bool{true, false} {
@ -686,6 +738,7 @@ type podEvictSpec struct {
 	evictionPriority           int
 	pod                        *v1.Pod
 	wantPodDisruptionCondition *v1.PodConditionType
+	expectToSucceed            bool

 	evictionMaxPodGracePeriod int
 	evictionSoftGracePeriod   int
@ -713,13 +766,35 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
 			time.Sleep(30 * time.Second)
 			ginkgo.By("setting up pods to be used by tests")
 			pods := []*v1.Pod{}
+			podsToSucceed := []*v1.Pod{}
 			for _, spec := range testSpecs {
 				if spec.prePodCreationModificationFunc != nil {
 					spec.prePodCreationModificationFunc(ctx, spec.pod)
 				}
+				if spec.expectToSucceed {
+					podsToSucceed = append(podsToSucceed, spec.pod)
+					continue
+				}
 				pods = append(pods, spec.pod)
 			}
-			e2epod.NewPodClient(f).CreateBatch(ctx, pods)
+
+			e2ePodClient := e2epod.NewPodClient(f)
+
+			// Create all expected pods and wait for them to succeed.
+			// CreateBatch was not used to prevent a race condition because of
+			// underlying call to CreateSync, which may fail if the wait function
+			// misses to see the pod readiness before it terminates.
+			for _, pod := range podsToSucceed {
+				*pod = *e2ePodClient.Create(ctx, pod)
+			}
+
+			ginkgo.By(fmt.Sprintf("Waiting for the expected pods to complete: %v", podsToSucceed))
+			for _, pod := range podsToSucceed {
+				framework.ExpectNoError(e2epod.WaitForPodSuccessInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace))
+			}
+
+			// Create all other pods after expected to succeed pods terminated
+			e2ePodClient.CreateBatch(ctx, pods)
 		})

 		ginkgo.It("should eventually evict all of the correct pods", func(ctx context.Context) {
@ -855,8 +930,10 @@ func verifyEvictionOrdering(ctx context.Context, f *framework.Framework, testSpe
 			}
 		}
 		gomega.Expect(priorityPod).NotTo(gomega.BeNil())
-		gomega.Expect(priorityPod.Status.Phase).ToNot(gomega.Equal(v1.PodSucceeded),
-			fmt.Sprintf("pod: %s succeeded unexpectedly", priorityPod.Name))
+		if !priorityPodSpec.expectToSucceed {
+			gomega.Expect(priorityPod.Status.Phase).ToNot(gomega.Equal(v1.PodSucceeded),
+				fmt.Sprintf("pod: %s succeeded unexpectedly", priorityPod.Name))
+		}

 		// Check eviction ordering.
 		// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round,
@ -1185,13 +1262,13 @@ const (
 	volumeName      = "test-volume"
 )

-func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource) *v1.Pod {
+func inodeConsumingPod(name string, numFiles int, volumeSource *v1.VolumeSource, sleepAfterExecuting bool) *v1.Pod {
 	path := ""
 	if volumeSource != nil {
 		path = volumeMountPath
 	}
 	// Each iteration creates an empty file
-	return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, fmt.Sprintf("touch %s${i}.txt; sleep 0.001;", filepath.Join(path, "file")))
+	return podWithCommand(volumeSource, v1.ResourceRequirements{}, numFiles, name, fmt.Sprintf("touch %s${i}.txt; sleep 0.001;", filepath.Join(path, "file")), sleepAfterExecuting)
 }

 func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSource, resources v1.ResourceRequirements) *v1.Pod {
@ -1200,17 +1277,17 @@ func diskConsumingPod(name string, diskConsumedMB int, volumeSource *v1.VolumeSo
 		path = volumeMountPath
 	}
 	// Each iteration writes 1 Mb, so do diskConsumedMB iterations.
-	return podWithCommand(volumeSource, resources, diskConsumedMB, name, fmt.Sprintf("dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null; sleep .1;", filepath.Join(path, "file")))
+	return podWithCommand(volumeSource, resources, diskConsumedMB, name, fmt.Sprintf("dd if=/dev/urandom of=%s${i} bs=1048576 count=1 2>/dev/null; sleep .1;", filepath.Join(path, "file")), true)
 }

 func pidConsumingPod(name string, numProcesses int) *v1.Pod {
 	// Slowing down the iteration speed to prevent a race condition where eviction may occur
 	// before the correct number of processes is captured in the stats during a sudden surge in processes.
-	return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses, name, "/bin/sleep 0.01; (/bin/sleep 3600)&")
+	return podWithCommand(nil, v1.ResourceRequirements{}, numProcesses, name, "/bin/sleep 0.01; (/bin/sleep 3600)&", true)
 }

 // podWithCommand returns a pod with the provided volumeSource and resourceRequirements.
-func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string) *v1.Pod {
+func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirements, iterations int, name, command string, sleepAfterExecuting bool) *v1.Pod {
 	// Due to https://github.com/kubernetes/kubernetes/issues/115819,
 	// When evictionHard to used, we were setting grace period to 0 which meant the default setting (30 seconds)
 	// This could help with flakiness as we should send sigterm right away.
@ -1221,6 +1298,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
 		volumeMounts = []v1.VolumeMount{{MountPath: volumeMountPath, Name: volumeName}}
 		volumes = []v1.Volume{{Name: volumeName, VolumeSource: *volumeSource}}
 	}
+
 	return &v1.Pod{
 		ObjectMeta: metav1.ObjectMeta{Name: fmt.Sprintf("%s-pod", name)},
 		Spec: v1.PodSpec{
@ -1233,7 +1311,7 @@ func podWithCommand(volumeSource *v1.VolumeSource, resources v1.ResourceRequirem
 					Command: []string{
 						"sh",
 						"-c",
-						fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while true; do sleep 5; done", iterations, command),
+						fmt.Sprintf("i=0; while [ $i -lt %d ]; do %s i=$(($i+1)); done; while %v; do sleep 5; done", iterations, command, sleepAfterExecuting),
 					},
 					Resources:    resources,
 					VolumeMounts: volumeMounts,