mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-07-04 07:26:06 -04:00
Extend the Windows CPU manager node e2e suite with coverage that was present on Linux but missing on Windows: - strict-cpu-reservation: with the policy option enabled the reserved CPU is excluded from the burstable shared pool; without it (default) the reserved CPU stays in the shared pool. - dynamic shared-pool resizing: a running burstable container's affinity shrinks and grows via the CPU manager reconcile loop as a guaranteed pod takes and releases an exclusive CPU. Toggling strict-cpu-reservation invalidates the persisted CPU manager checkpoint (the reserved CPU moves in/out of the default pool), so the kubelet refuses to start until the state file is removed. Implement the previously no-op deleteStateFile on Windows and clear the CPU/memory manager state files when toggling the option, mirroring the Linux updateKubeletConfig(..., deleteStateFiles=true) behavior.
887 lines
40 KiB
Go
887 lines
40 KiB
Go
//go:build windows
|
|
|
|
/*
|
|
Copyright The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package e2enodewindows
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"reflect"
|
|
"time"
|
|
|
|
"github.com/onsi/ginkgo/v2"
|
|
"github.com/onsi/gomega"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/resource"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
internalapi "k8s.io/cri-api/pkg/apis"
|
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
|
"k8s.io/kubernetes/pkg/features"
|
|
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
|
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
|
"k8s.io/kubernetes/test/e2e/feature"
|
|
"k8s.io/kubernetes/test/e2e/framework"
|
|
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
|
e2enodekubelet "k8s.io/kubernetes/test/e2e_node_windows/kubeletconfig"
|
|
admissionapi "k8s.io/pod-security-admission/api"
|
|
)
|
|
|
|
/*
|
|
* Windows CPU Affinity Node E2E Tests
|
|
*
|
|
* These tests verify that the CPU manager correctly sets Windows CPU group
|
|
* affinity on containers via the CRI, as implemented in:
|
|
* - pkg/kubelet/cm/cpumanager/cpu_manager_windows.go
|
|
* - pkg/kubelet/cm/internal_container_lifecycle_windows.go
|
|
*
|
|
* Prerequisites:
|
|
* - Windows node with kubelet running as a Windows service named "kubelet"
|
|
* - At least 2 allocatable integer CPUs on the node
|
|
* - containerd as the container runtime
|
|
*
|
|
* Linux-only features intentionally NOT covered here:
|
|
* - SMT/HT alignment (FullPCPUsOnlyOption) — no equivalent in Windows CRI
|
|
* - Uncore cache alignment (PreferAlignByUnCoreCacheOption) — no L3 topology via Windows CRI
|
|
* - CFS quota management — cgroup concept, not applicable on Windows
|
|
* - Strict CPU reservation option — implemented via cgroup on Linux
|
|
*/
|
|
|
|
var _ = SIGWindowsDescribe(feature.CPUManager, feature.Windows, ginkgo.Ordered, ginkgo.ContinueOnFailure, framework.WithSerial(), func() {
|
|
f := framework.NewDefaultFramework("cpu-manager-windows")
|
|
f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged
|
|
|
|
var (
|
|
oldCfg *kubeletconfig.KubeletConfiguration
|
|
criClient internalapi.RuntimeService
|
|
podMap map[string]*v1.Pod
|
|
)
|
|
|
|
// createPodSync creates a pod, waits for it to be running, registers it
|
|
// in podMap for cleanup, and returns the updated pod object.
|
|
var createPodSync func(ctx context.Context, pod *v1.Pod) *v1.Pod
|
|
|
|
ginkgo.BeforeAll(func(ctx context.Context) {
|
|
var err error
|
|
oldCfg, err = getCurrentKubeletConfig(ctx)
|
|
framework.ExpectNoError(err, "failed to get current kubelet config")
|
|
|
|
criClient, _, err = getCRIClient(ctx)
|
|
framework.ExpectNoError(err, "failed to get CRI client")
|
|
})
|
|
|
|
ginkgo.AfterAll(func(ctx context.Context) {
|
|
updateWindowsKubeletConfig(ctx, f, oldCfg)
|
|
})
|
|
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
podMap = make(map[string]*v1.Pod)
|
|
createPodSync = func(ctx context.Context, pod *v1.Pod) *v1.Pod {
|
|
newPod := e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
|
podMap[string(newPod.UID)] = newPod
|
|
return newPod
|
|
}
|
|
})
|
|
|
|
ginkgo.AfterEach(func(ctx context.Context) {
|
|
for _, pod := range podMap {
|
|
e2epod.NewPodClient(f).DeleteSync(ctx, pod.Name, metav1.DeleteOptions{}, f.Timeouts.PodDelete)
|
|
}
|
|
})
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Non-guaranteed (burstable / best-effort) pod tests
|
|
// These mirror the Linux "running non-guaranteed pods tests" group.
|
|
// -------------------------------------------------------------------------
|
|
ginkgo.When("running non-guaranteed pods", ginkgo.Label("non-guaranteed"), func() {
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
updateWindowsKubeletConfigIfNeeded(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, true))
|
|
})
|
|
|
|
ginkgo.It("should not set CPU affinity on a burstable container", func(ctx context.Context) {
|
|
pod := makeWindowsCPUManagerPod("burstable-pod", []windowsCtnAttribute{
|
|
{name: "burstable-ctr", cpuRequest: "100m", cpuLimit: "500m"},
|
|
})
|
|
ginkgo.By("creating the burstable pod")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying no exclusive CPU affinity is set")
|
|
affinities, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "burstable-ctr")
|
|
framework.ExpectNoError(err)
|
|
cpuCount := countCPUsInAffinities(affinities)
|
|
hostCPUs := int(getLocalNode(ctx, f).Status.Capacity.Cpu().Value())
|
|
gomega.Expect(cpuCount).To(gomega.Equal(hostCPUs),
|
|
"burstable container must not receive exclusive CPU affinity: got %d CPUs (host has %d)", cpuCount, hostCPUs)
|
|
})
|
|
|
|
// Mirrors: "should let the container access all the online non-exclusively-allocated
|
|
// CPUs when using a reserved CPUs set" (Linux).
|
|
// On Windows we verify via CRI: the guaranteed container has affinity set and the
|
|
// burstable container has none, meaning the burstable runs on the shared pool.
|
|
ginkgo.It("should set affinity only on the guaranteed container when coexisting with a burstable pod", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 2) // 1 for guaranteed + 1 reserved
|
|
|
|
guPod := makeWindowsCPUManagerPod("gu-pod", []windowsCtnAttribute{
|
|
{name: "gu-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod")
|
|
guPod = createPodSync(ctx, guPod)
|
|
|
|
buPod := makeWindowsCPUManagerPod("bu-pod", []windowsCtnAttribute{
|
|
{name: "bu-ctr", cpuRequest: "100m", cpuLimit: "300m"},
|
|
})
|
|
ginkgo.By("creating the burstable pod")
|
|
buPod = createPodSync(ctx, buPod)
|
|
|
|
ginkgo.By("verifying guaranteed container has CPU affinity set")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, guPod, "gu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Equal(1),
|
|
"guaranteed container should have exactly 1 CPU affinity")
|
|
|
|
ginkgo.By("verifying burstable container runs on the shared pool, not exclusively pinned")
|
|
buAff, err := getWindowsContainerCPUAffinity(ctx, criClient, buPod, "bu-ctr")
|
|
framework.ExpectNoError(err)
|
|
guAff, err := getWindowsContainerCPUAffinity(ctx, criClient, guPod, "gu-ctr")
|
|
framework.ExpectNoError(err)
|
|
// The shared pool used by burstable containers shrinks as guaranteed pods
|
|
// take exclusive CPUs, so the expected size is hostCPUs - guaranteedExclusive.
|
|
buCPUs := countCPUsInAffinities(buAff)
|
|
guCPUs := countCPUsInAffinities(guAff)
|
|
hostCPUs := int(getLocalNode(ctx, f).Status.Capacity.Cpu().Value())
|
|
sharedPool := hostCPUs - guCPUs
|
|
gomega.Expect(buCPUs).To(gomega.Equal(sharedPool),
|
|
"burstable container must not receive exclusive CPU affinity: got %d CPUs (host=%d, guaranteed=%d, shared pool=%d)",
|
|
buCPUs, hostCPUs, guCPUs, sharedPool)
|
|
})
|
|
})
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Guaranteed pod tests (feature gate ON)
|
|
// These mirror the Linux "running guaranteed pod tests" group.
|
|
// -------------------------------------------------------------------------
|
|
ginkgo.When("running guaranteed pods with exclusive CPU allocation", ginkgo.Label("guaranteed", "exclusive-cpus"), func() {
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
updateWindowsKubeletConfigIfNeeded(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, true))
|
|
})
|
|
|
|
// Mirrors: "should allocate exclusively a CPU to a 1-container pod".
|
|
ginkgo.It("should set CPU affinity with exactly 1 CPU for a single-container pod", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 1)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-1cpu", []windowsCtnAttribute{
|
|
{name: "gu-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying CPU affinity is set to exactly 1 CPU")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Equal(1),
|
|
"expected exactly 1 CPU in affinity mask")
|
|
|
|
ginkgo.By("verifying the host job-object affinity agrees with the CRI report")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr")
|
|
})
|
|
|
|
// Mirrors: "should allocate exclusively a even number of CPUs to a 1-container pod".
|
|
ginkgo.It("should set CPU affinity with exactly 2 CPUs for a single-container pod", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 2)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-2cpu", []windowsCtnAttribute{
|
|
{name: "gu-ctr", cpuRequest: "2000m", cpuLimit: "2000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod requesting 2 CPUs")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying CPU affinity is set to exactly 2 CPUs")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Equal(2),
|
|
"expected exactly 2 CPUs in affinity mask")
|
|
|
|
ginkgo.By("verifying the host job-object affinity agrees with the CRI report")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr")
|
|
})
|
|
|
|
// Mirrors: "should allocate exclusively a odd number of CPUs to a 1-container pod".
|
|
ginkgo.It("should set CPU affinity with exactly 3 CPUs for a single-container pod", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 3)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-3cpu", []windowsCtnAttribute{
|
|
{name: "gu-ctr", cpuRequest: "3000m", cpuLimit: "3000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod requesting 3 CPUs")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying CPU affinity is set to exactly 3 CPUs")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Equal(3),
|
|
"expected exactly 3 CPUs in affinity mask")
|
|
|
|
ginkgo.By("verifying the host job-object affinity agrees with the CRI report")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr")
|
|
})
|
|
|
|
// Mirrors: "should allocate exclusively CPUs to a multi-container pod (1+2)".
|
|
ginkgo.It("should assign non-overlapping CPU affinity to each container in a multi-container pod (1+2)", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 3)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-1plus2", []windowsCtnAttribute{
|
|
{name: "gu-ctr-1", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
{name: "gu-ctr-2", cpuRequest: "2000m", cpuLimit: "2000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod with containers requesting 1 and 2 CPUs")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
var aff1, aff2 []*runtimeapi.WindowsCpuGroupAffinity
|
|
ginkgo.By("verifying each container gets the correct CPU count")
|
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
|
var err error
|
|
aff1, err = getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr-1")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
aff2, err = getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr-2")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c1, c2 := countCPUsInAffinities(aff1), countCPUsInAffinities(aff2)
|
|
if c1 != 1 || c2 != 2 {
|
|
return fmt.Errorf("want cpu counts (1,2), got (%d,%d)", c1, c2)
|
|
}
|
|
return nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Succeed())
|
|
|
|
ginkgo.By("verifying the CPU affinity masks do not overlap")
|
|
gomega.Expect(windowsAffinitiesOverlap(aff1, aff2)).To(gomega.BeFalse(),
|
|
"containers in the same pod must receive non-overlapping CPU affinity masks")
|
|
|
|
ginkgo.By("verifying the host job-object affinity agrees with the CRI report and masks are disjoint")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr-1")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr-2")
|
|
verifyHostMasksDisjoint(pod, "gu-ctr-1", pod, "gu-ctr-2")
|
|
})
|
|
|
|
// Mirrors: "should allocate exclusively CPUs to a multi-container pod (3+2)".
|
|
ginkgo.It("should assign non-overlapping CPU affinity to each container in a multi-container pod (3+2)", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 5)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-3plus2", []windowsCtnAttribute{
|
|
{name: "gu-ctr-1", cpuRequest: "3000m", cpuLimit: "3000m"},
|
|
{name: "gu-ctr-2", cpuRequest: "2000m", cpuLimit: "2000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod with containers requesting 3 and 2 CPUs")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
var aff1, aff2 []*runtimeapi.WindowsCpuGroupAffinity
|
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
|
var err error
|
|
aff1, err = getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr-1")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
aff2, err = getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr-2")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c1, c2 := countCPUsInAffinities(aff1), countCPUsInAffinities(aff2)
|
|
if c1 != 3 || c2 != 2 {
|
|
return fmt.Errorf("want cpu counts (3,2), got (%d,%d)", c1, c2)
|
|
}
|
|
return nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Succeed())
|
|
|
|
gomega.Expect(windowsAffinitiesOverlap(aff1, aff2)).To(gomega.BeFalse(),
|
|
"containers in the same pod must receive non-overlapping CPU affinity masks")
|
|
|
|
ginkgo.By("verifying the host job-object affinity agrees with the CRI report and masks are disjoint")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr-1")
|
|
verifyHostMatchesCRI(ctx, criClient, pod, "gu-ctr-2")
|
|
verifyHostMasksDisjoint(pod, "gu-ctr-1", pod, "gu-ctr-2")
|
|
})
|
|
|
|
// Mirrors: "should allocate exclusively a CPU to multiple 1-container pods".
|
|
ginkgo.It("should assign non-overlapping CPU affinity across separate guaranteed pods (2+2)", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 4)
|
|
|
|
pod1 := makeWindowsCPUManagerPod("gu-pod-a", []windowsCtnAttribute{
|
|
{name: "gu-ctr-a", cpuRequest: "2000m", cpuLimit: "2000m"},
|
|
})
|
|
ginkgo.By("creating guaranteed pod 1")
|
|
pod1 = createPodSync(ctx, pod1)
|
|
|
|
pod2 := makeWindowsCPUManagerPod("gu-pod-b", []windowsCtnAttribute{
|
|
{name: "gu-ctr-b", cpuRequest: "2000m", cpuLimit: "2000m"},
|
|
})
|
|
ginkgo.By("creating guaranteed pod 2")
|
|
pod2 = createPodSync(ctx, pod2)
|
|
|
|
var affA, affB []*runtimeapi.WindowsCpuGroupAffinity
|
|
ginkgo.By("verifying both containers each get exactly 2 CPUs")
|
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
|
var err error
|
|
affA, err = getWindowsContainerCPUAffinity(ctx, criClient, pod1, "gu-ctr-a")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
affB, err = getWindowsContainerCPUAffinity(ctx, criClient, pod2, "gu-ctr-b")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cA, cB := countCPUsInAffinities(affA), countCPUsInAffinities(affB)
|
|
if cA != 2 || cB != 2 {
|
|
return fmt.Errorf("want cpu counts (2,2), got (%d,%d)", cA, cB)
|
|
}
|
|
return nil
|
|
}, 30*time.Second, 2*time.Second).Should(gomega.Succeed())
|
|
|
|
ginkgo.By("verifying the two pods' CPU affinity masks do not overlap")
|
|
gomega.Expect(windowsAffinitiesOverlap(affA, affB)).To(gomega.BeFalse(),
|
|
"guaranteed pods must not share exclusively-allocated CPUs")
|
|
|
|
ginkgo.By("verifying the host job-object affinity agrees with the CRI report and masks are disjoint")
|
|
verifyHostMatchesCRI(ctx, criClient, pod1, "gu-ctr-a")
|
|
verifyHostMatchesCRI(ctx, criClient, pod2, "gu-ctr-b")
|
|
verifyHostMasksDisjoint(pod1, "gu-ctr-a", pod2, "gu-ctr-b")
|
|
})
|
|
})
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Feature gate disabled tests
|
|
// Mirrors the Linux "running guaranteed pod tests with feature gates disabled"
|
|
// group, adapted for the Windows-specific WindowsCPUAndMemoryAffinity gate.
|
|
// When the feature gate is off, cpu_manager_windows.go's updateContainerCPUSet
|
|
// returns immediately without calling UpdateContainerResources, so the CRI
|
|
// should report no AffinityCpus even for guaranteed containers.
|
|
// -------------------------------------------------------------------------
|
|
ginkgo.When("running guaranteed pods with WindowsCPUAndMemoryAffinity feature gate disabled", ginkgo.Label("guaranteed", "feature-gate-disabled"), func() {
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
updateWindowsKubeletConfigIfNeeded(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, false))
|
|
})
|
|
|
|
ginkgo.It("should NOT set CPU affinity on a 1-CPU guaranteed container when feature gate is off", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 1)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-no-affinity", []windowsCtnAttribute{
|
|
{name: "gu-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed pod")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
// Give the CPU manager reconcile loop (1 s period) a few cycles to act,
|
|
// then confirm it still has not set any exclusive affinity. On Windows
|
|
// "no affinity assigned" is reported either as an empty list or as a full
|
|
// mask covering every host CPU; anything in between is a strict subset
|
|
// (i.e. exclusive pinning) which the feature gate must prevent.
|
|
ginkgo.By("verifying no exclusive CPU affinity is set even for a guaranteed container")
|
|
hostCPUs := int(getLocalNode(ctx, f).Status.Capacity.Cpu().Value())
|
|
gomega.Consistently(ctx, func(ctx context.Context) error {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cpuCount := countCPUsInAffinities(aff)
|
|
if cpuCount != 0 && cpuCount < hostCPUs {
|
|
return fmt.Errorf("feature gate off: guaranteed container received exclusive affinity: got %d CPUs (host has %d)", cpuCount, hostCPUs)
|
|
}
|
|
return nil
|
|
}, 10*time.Second, 2*time.Second).Should(gomega.Succeed())
|
|
})
|
|
|
|
ginkgo.It("should NOT set CPU affinity on a multi-container guaranteed pod when feature gate is off", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 3)
|
|
|
|
pod := makeWindowsCPUManagerPod("gu-pod-mc-no-affinity", []windowsCtnAttribute{
|
|
{name: "gu-ctr-1", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
{name: "gu-ctr-2", cpuRequest: "2000m", cpuLimit: "2000m"},
|
|
})
|
|
ginkgo.By("creating the guaranteed multi-container pod")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying no exclusive CPU affinity is set on either container")
|
|
hostCPUs := int(getLocalNode(ctx, f).Status.Capacity.Cpu().Value())
|
|
gomega.Consistently(ctx, func(ctx context.Context) error {
|
|
aff1, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr-1")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
aff2, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "gu-ctr-2")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
c1 := countCPUsInAffinities(aff1)
|
|
c2 := countCPUsInAffinities(aff2)
|
|
if c1 != 0 && c1 < hostCPUs {
|
|
return fmt.Errorf("feature gate off: gu-ctr-1 received exclusive affinity: got %d CPUs (host has %d)", c1, hostCPUs)
|
|
}
|
|
if c2 != 0 && c2 < hostCPUs {
|
|
return fmt.Errorf("feature gate off: gu-ctr-2 received exclusive affinity: got %d CPUs (host has %d)", c2, hostCPUs)
|
|
}
|
|
return nil
|
|
}, 10*time.Second, 2*time.Second).Should(gomega.Succeed())
|
|
})
|
|
})
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Init / sidecar container tests
|
|
// Windows supports init containers and restartable init (sidecar) containers.
|
|
// These mirror the Linux "checking the sidecar containers" group.
|
|
//
|
|
// Verification approach differs from Linux: on Windows we cannot read the
|
|
// cpuset filesystem. Instead:
|
|
// - For terminated init containers we can only observe the outcome on the
|
|
// main container (CPUs released and re-used).
|
|
// - For running sidecar (restartable init) containers we query the CRI.
|
|
// -------------------------------------------------------------------------
|
|
ginkgo.When("running pods with init containers", ginkgo.Label("guaranteed", "init-containers"), func() {
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
updateWindowsKubeletConfigIfNeeded(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, true))
|
|
})
|
|
|
|
// Mirrors: "should reuse init container exclusive CPUs, but not sidecar container
|
|
// exclusive CPUs".
|
|
// A terminated (non-restartable) init container releases its exclusive CPUs so
|
|
// that the regular app container can use them. A restartable sidecar init
|
|
// container holds its CPUs for its entire lifetime.
|
|
// We verify:
|
|
// 1. The sidecar container (restartable init) holds exactly 1 exclusive CPU.
|
|
// 2. The app container holds exactly 1 exclusive CPU.
|
|
// 3. The sidecar and app CPUs do not overlap (both are exclusive).
|
|
ginkgo.It("sidecar container should hold exclusive CPUs separately from the app container", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 2) // 1 non-restartable init + 1 sidecar reused + 1 app = 2 total (init CPUs reused)
|
|
|
|
var restartAlways = v1.ContainerRestartPolicyAlways
|
|
|
|
pod := makeWindowsCPUManagerInitPod("gu-sidecar-pod",
|
|
[]windowsCtnAttribute{
|
|
// non-restartable init: terminates quickly, CPUs are reused
|
|
{name: "init-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
// restartable sidecar: stays alive, holds exclusive CPUs
|
|
{name: "sidecar-ctr", cpuRequest: "1000m", cpuLimit: "1000m", restartPolicy: &restartAlways},
|
|
},
|
|
// app container
|
|
windowsCtnAttribute{name: "app-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
)
|
|
ginkgo.By("creating the pod with a non-restartable init container, a sidecar, and an app container")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying sidecar container holds exactly 1 CPU")
|
|
var sidecarAff, appAff []*runtimeapi.WindowsCpuGroupAffinity
|
|
gomega.Eventually(ctx, func(ctx context.Context) error {
|
|
var err error
|
|
sidecarAff, err = getWindowsContainerCPUAffinity(ctx, criClient, pod, "sidecar-ctr")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
appAff, err = getWindowsContainerCPUAffinity(ctx, criClient, pod, "app-ctr")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cs, ca := countCPUsInAffinities(sidecarAff), countCPUsInAffinities(appAff)
|
|
if cs != 1 || ca != 1 {
|
|
return fmt.Errorf("want sidecar=1 app=1, got sidecar=%d app=%d", cs, ca)
|
|
}
|
|
return nil
|
|
}, 60*time.Second, 2*time.Second).Should(gomega.Succeed(),
|
|
"both sidecar and app containers must each hold exactly 1 exclusive CPU")
|
|
|
|
ginkgo.By("verifying sidecar and app container CPU affinity masks do not overlap")
|
|
gomega.Expect(windowsAffinitiesOverlap(sidecarAff, appAff)).To(gomega.BeFalse(),
|
|
"sidecar and app containers must not share exclusively-allocated CPUs")
|
|
})
|
|
|
|
// A pod whose only init container is non-restartable: after the init
|
|
// container completes, the app container gets exclusive CPUs (which may
|
|
// overlap with the init container's former CPUs — that is expected and
|
|
// correct; we only verify the app container count here).
|
|
ginkgo.It("app container should get exclusive CPUs after a non-restartable init container completes", func(ctx context.Context) {
|
|
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), 1)
|
|
|
|
pod := makeWindowsCPUManagerInitPod("gu-init-pod",
|
|
[]windowsCtnAttribute{
|
|
// init container: terminates after a brief sleep
|
|
{name: "init-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
},
|
|
windowsCtnAttribute{name: "app-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
)
|
|
ginkgo.By("creating the pod with a non-restartable init container")
|
|
pod = createPodSync(ctx, pod)
|
|
|
|
ginkgo.By("verifying the app container holds exactly 1 exclusive CPU after init completes")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "app-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 60*time.Second, 2*time.Second).Should(gomega.Equal(1),
|
|
"app container must hold exactly 1 exclusive CPU")
|
|
})
|
|
})
|
|
// -------------------------------------------------------------------------
|
|
// Strict vs non-strict CPU reservation
|
|
// With the strict-cpu-reservation policy option the reserved CPUs are removed
|
|
// from the shared pool, so shared-pool (burstable) containers are confined to
|
|
// (online - reserved). Without it (the default) the reserved CPUs remain part
|
|
// of the shared pool and stay usable by burstable containers.
|
|
// -------------------------------------------------------------------------
|
|
ginkgo.When("running with the strict-cpu-reservation policy option", ginkgo.Label("non-guaranteed", "strict-cpu-reservation"), func() {
|
|
// Toggling strict-cpu-reservation changes whether the reserved CPU belongs
|
|
// to the default pool, which invalidates the persisted CPU manager
|
|
// checkpoint. Reset to a clean non-strict state on the way out so the
|
|
// checkpoint left behind is compatible with the rest of the suite.
|
|
ginkgo.AfterEach(func(ctx context.Context) {
|
|
updateWindowsKubeletConfigClearState(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, true))
|
|
})
|
|
|
|
ginkgo.It("should exclude the reserved CPU from the burstable shared pool when strict-cpu-reservation is enabled", func(ctx context.Context) {
|
|
node := getLocalNode(ctx, f)
|
|
hostCPUs := int(node.Status.Capacity.Cpu().Value())
|
|
if hostCPUs < 2 {
|
|
ginkgo.Skip(fmt.Sprintf("strict-cpu-reservation test needs >= 2 CPUs (1 reserved + shared pool), node has %d", hostCPUs))
|
|
}
|
|
|
|
ginkgo.By("enabling the static CPU manager with strict-cpu-reservation and CPU 0 reserved")
|
|
updateWindowsKubeletConfigClearState(ctx, f, buildWindowsStrictCPUReservationConfig(oldCfg))
|
|
|
|
pod := createPodSync(ctx, makeWindowsCPUManagerPod("strict-burstable-pod", []windowsCtnAttribute{
|
|
{name: "bu-ctr", cpuRequest: "100m", cpuLimit: "300m"},
|
|
}))
|
|
|
|
// The reconcile loop applies the shared pool shortly after the container
|
|
// starts; with strict reservation the reserved CPU 0 is excluded from it.
|
|
ginkgo.By("verifying the burstable container is confined to (host - reserved) CPUs")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "bu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 60*time.Second, 2*time.Second).Should(gomega.Equal(hostCPUs-1),
|
|
"with strict-cpu-reservation the burstable shared pool must exclude the 1 reserved CPU (host=%d)", hostCPUs)
|
|
})
|
|
|
|
ginkgo.It("should keep the reserved CPU in the burstable shared pool when strict-cpu-reservation is disabled (default)", func(ctx context.Context) {
|
|
node := getLocalNode(ctx, f)
|
|
hostCPUs := int(node.Status.Capacity.Cpu().Value())
|
|
if hostCPUs < 2 {
|
|
ginkgo.Skip(fmt.Sprintf("test needs >= 2 CPUs, node has %d", hostCPUs))
|
|
}
|
|
|
|
ginkgo.By("enabling the static CPU manager without strict-cpu-reservation (CPU 0 reserved)")
|
|
updateWindowsKubeletConfigClearState(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, true))
|
|
|
|
pod := createPodSync(ctx, makeWindowsCPUManagerPod("nonstrict-burstable-pod", []windowsCtnAttribute{
|
|
{name: "bu-ctr", cpuRequest: "100m", cpuLimit: "300m"},
|
|
}))
|
|
|
|
// Without strict reservation and with no exclusive allocations, the
|
|
// shared pool is the whole machine, including the reserved CPU.
|
|
ginkgo.By("verifying the burstable container can use all host CPUs (reserved included)")
|
|
gomega.Eventually(ctx, func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, "bu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}, 60*time.Second, 2*time.Second).Should(gomega.Equal(hostCPUs),
|
|
"without strict-cpu-reservation the burstable shared pool must include the reserved CPU (host=%d)", hostCPUs)
|
|
})
|
|
})
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Dynamic shared-pool resizing (reconcile loop)
|
|
// The CPU manager reconcile loop updates a *running* shared-pool container's
|
|
// affinity as guaranteed pods take and release exclusive CPUs. This verifies
|
|
// the live transition, not just the steady state at pod start.
|
|
// -------------------------------------------------------------------------
|
|
ginkgo.When("dynamically resizing the shared pool", ginkgo.Label("guaranteed", "non-guaranteed", "shared-pool", "reconcile"), func() {
|
|
ginkgo.BeforeEach(func(ctx context.Context) {
|
|
updateWindowsKubeletConfigIfNeeded(ctx, f, buildWindowsCPUManagerKubeletConfig(oldCfg, true))
|
|
})
|
|
|
|
ginkgo.It("should shrink and grow a running burstable container's affinity as a guaranteed pod comes and goes", func(ctx context.Context) {
|
|
node := getLocalNode(ctx, f)
|
|
hostCPUs := int(node.Status.Capacity.Cpu().Value())
|
|
// Need >= 2 CPUs: CPU 0 reserved (which stays in the shared pool in
|
|
// non-strict mode) plus at least one non-reserved CPU for the guaranteed
|
|
// pod's exclusive allocation. The shared pool is then never empty, so the
|
|
// burstable mask shrinks from hostCPUs to hostCPUs-1 and back.
|
|
if hostCPUs < 2 {
|
|
ginkgo.Skip(fmt.Sprintf("dynamic shared-pool test needs >= 2 CPUs, node has %d", hostCPUs))
|
|
}
|
|
|
|
buPod := createPodSync(ctx, makeWindowsCPUManagerPod("dyn-bu-pod", []windowsCtnAttribute{
|
|
{name: "bu-ctr", cpuRequest: "100m", cpuLimit: "300m"},
|
|
}))
|
|
burstableCPUCount := func(ctx context.Context) (int, error) {
|
|
aff, err := getWindowsContainerCPUAffinity(ctx, criClient, buPod, "bu-ctr")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return countCPUsInAffinities(aff), nil
|
|
}
|
|
|
|
ginkgo.By("waiting for the burstable container to occupy the full shared pool")
|
|
gomega.Eventually(ctx, burstableCPUCount, 60*time.Second, 2*time.Second).Should(gomega.Equal(hostCPUs),
|
|
"with no exclusive allocations the burstable shared pool should span all %d host CPUs", hostCPUs)
|
|
|
|
ginkgo.By("creating a guaranteed pod that takes 1 exclusive CPU")
|
|
guPod := createPodSync(ctx, makeWindowsCPUManagerPod("dyn-gu-pod", []windowsCtnAttribute{
|
|
{name: "gu-ctr", cpuRequest: "1000m", cpuLimit: "1000m"},
|
|
}))
|
|
|
|
ginkgo.By("verifying the running burstable container's affinity shrinks by the 1 exclusive CPU")
|
|
gomega.Eventually(ctx, burstableCPUCount, 60*time.Second, 2*time.Second).Should(gomega.Equal(hostCPUs-1),
|
|
"the reconcile loop should remove the guaranteed pod's exclusive CPU from the running burstable container's mask")
|
|
|
|
ginkgo.By("deleting the guaranteed pod to release its exclusive CPU")
|
|
e2epod.NewPodClient(f).DeleteSync(ctx, guPod.Name, metav1.DeleteOptions{}, f.Timeouts.PodDelete)
|
|
delete(podMap, string(guPod.UID))
|
|
|
|
ginkgo.By("verifying the running burstable container's affinity grows back to the full shared pool")
|
|
gomega.Eventually(ctx, burstableCPUCount, 60*time.Second, 2*time.Second).Should(gomega.Equal(hostCPUs),
|
|
"the reconcile loop should return the released CPU to the running burstable container's mask")
|
|
})
|
|
})
|
|
})
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Pod / container creation helpers
|
|
// -------------------------------------------------------------------------
|
|
|
|
// windowsCtnAttribute describes a single container's CPU resource requirements
|
|
// for use with makeWindowsCPUManagerPod / makeWindowsCPUManagerInitPod.
|
|
type windowsCtnAttribute struct {
|
|
name string
|
|
cpuRequest string
|
|
cpuLimit string
|
|
restartPolicy *v1.ContainerRestartPolicy
|
|
}
|
|
|
|
// makeWindowsCPUManagerPod builds a Pod spec for Windows CPU affinity tests.
|
|
// Containers run a PowerShell sleep so they stay alive without Linux-specific
|
|
// cgroup mounts or /proc filesystem access.
|
|
func makeWindowsCPUManagerPod(podName string, attrs []windowsCtnAttribute) *v1.Pod {
|
|
return &v1.Pod{
|
|
ObjectMeta: metav1.ObjectMeta{Name: podName},
|
|
Spec: v1.PodSpec{
|
|
RestartPolicy: v1.RestartPolicyNever,
|
|
Containers: buildWindowsContainers(attrs),
|
|
NodeSelector: map[string]string{"kubernetes.io/os": "windows"},
|
|
},
|
|
}
|
|
}
|
|
|
|
// verifyHostMatchesCRI asserts that the Windows kernel's job-object affinity
|
|
// for ctnName agrees bit-for-bit with the affinity CRI reports. This proves the
|
|
// runtime actually applied the mask, not just that containerd recorded it.
|
|
func verifyHostMatchesCRI(ctx context.Context, criClient internalapi.RuntimeService, pod *v1.Pod, ctnName string) {
|
|
ginkgo.GinkgoHelper()
|
|
criAff, err := getWindowsContainerCPUAffinity(ctx, criClient, pod, ctnName)
|
|
framework.ExpectNoError(err, "failed to fetch CRI affinity for host comparison of %q", ctnName)
|
|
framework.ExpectNoError(
|
|
validateHostJobAffinityProcessIsolated(pod, ctnName, criAff),
|
|
"host job-object affinity does not match CRI-reported affinity for %q", ctnName)
|
|
}
|
|
|
|
// verifyHostMasksDisjoint asserts that the kernel-applied affinity masks of two
|
|
// containers (in the same or different pods) share no CPU — the host-level
|
|
// counterpart to the CRI windowsAffinitiesOverlap check.
|
|
func verifyHostMasksDisjoint(pod1 *v1.Pod, ctn1 string, pod2 *v1.Pod, ctn2 string) {
|
|
ginkgo.GinkgoHelper()
|
|
h1, err := getHostJobAffinity(pod1, ctn1)
|
|
framework.ExpectNoError(err, "failed to read host job affinity for %q", ctn1)
|
|
h2, err := getHostJobAffinity(pod2, ctn2)
|
|
framework.ExpectNoError(err, "failed to read host job affinity for %q", ctn2)
|
|
gomega.Expect(hostJobAffinitiesOverlap(h1, h2)).To(gomega.BeFalse(),
|
|
"containers %q and %q must not share exclusively-allocated CPUs at the kernel level", ctn1, ctn2)
|
|
}
|
|
|
|
// makeWindowsCPUManagerInitPod builds a Pod spec that has init containers
|
|
// (possibly restartable / sidecar) followed by a regular app container.
|
|
func makeWindowsCPUManagerInitPod(podName string, initAttrs []windowsCtnAttribute, appAttr windowsCtnAttribute) *v1.Pod {
|
|
initContainers := buildWindowsContainers(initAttrs)
|
|
for i := range initContainers {
|
|
if initAttrs[i].restartPolicy != nil {
|
|
initContainers[i].RestartPolicy = initAttrs[i].restartPolicy
|
|
} else {
|
|
// Non-restartable init containers exit quickly.
|
|
initContainers[i].Command = []string{"powershell.exe", "-Command", "Start-Sleep -Seconds 2"}
|
|
}
|
|
}
|
|
return &v1.Pod{
|
|
ObjectMeta: metav1.ObjectMeta{Name: podName},
|
|
Spec: v1.PodSpec{
|
|
RestartPolicy: v1.RestartPolicyNever,
|
|
InitContainers: initContainers,
|
|
Containers: buildWindowsContainers([]windowsCtnAttribute{appAttr}),
|
|
NodeSelector: map[string]string{"kubernetes.io/os": "windows"},
|
|
},
|
|
}
|
|
}
|
|
|
|
// buildWindowsContainers converts a slice of windowsCtnAttribute into v1.Container
|
|
// objects suitable for Windows nodes (PowerShell command, no Linux volume mounts).
|
|
func buildWindowsContainers(attrs []windowsCtnAttribute) []v1.Container {
|
|
var containers []v1.Container
|
|
for _, attr := range attrs {
|
|
requests := v1.ResourceList{
|
|
v1.ResourceMemory: resource.MustParse("128Mi"),
|
|
}
|
|
if attr.cpuRequest != "" {
|
|
requests[v1.ResourceCPU] = resource.MustParse(attr.cpuRequest)
|
|
}
|
|
limits := v1.ResourceList{
|
|
v1.ResourceMemory: resource.MustParse("128Mi"),
|
|
}
|
|
if attr.cpuLimit != "" {
|
|
limits[v1.ResourceCPU] = resource.MustParse(attr.cpuLimit)
|
|
}
|
|
containers = append(containers, v1.Container{
|
|
Name: attr.name,
|
|
Image: busyboxImage,
|
|
Resources: v1.ResourceRequirements{
|
|
Requests: requests,
|
|
Limits: limits,
|
|
},
|
|
// Long-running sleep; powershell.exe is available in the Windows BusyBox image.
|
|
Command: []string{"powershell.exe", "-Command", "Start-Sleep -Seconds 86400"},
|
|
})
|
|
}
|
|
return containers
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Kubelet configuration helpers
|
|
// -------------------------------------------------------------------------
|
|
|
|
// buildWindowsCPUManagerKubeletConfig returns a KubeletConfiguration with the
|
|
// static CPU manager policy. When featureGateOn is true, the
|
|
// WindowsCPUAndMemoryAffinity feature gate is also enabled.
|
|
func buildWindowsCPUManagerKubeletConfig(oldCfg *kubeletconfig.KubeletConfiguration, featureGateOn bool) *kubeletconfig.KubeletConfiguration {
|
|
newCfg := oldCfg.DeepCopy()
|
|
if newCfg.FeatureGates == nil {
|
|
newCfg.FeatureGates = make(map[string]bool)
|
|
}
|
|
newCfg.FeatureGates[string(features.WindowsCPUAndMemoryAffinity)] = featureGateOn
|
|
newCfg.CPUManagerPolicy = string(cpumanager.PolicyStatic)
|
|
newCfg.CPUManagerReconcilePeriod = metav1.Duration{Duration: 1 * time.Second}
|
|
// Reserve CPU 0 so the remaining CPUs are available for guaranteed pods.
|
|
newCfg.ReservedSystemCPUs = "0"
|
|
return newCfg
|
|
}
|
|
|
|
// buildWindowsStrictCPUReservationConfig returns a static CPU manager config
|
|
// (feature gate on) with the strict-cpu-reservation policy option enabled, which
|
|
// removes the reserved CPUs from the shared pool. CPUManagerPolicyOptions is GA
|
|
// and locked on, so only the option map needs to be set.
|
|
func buildWindowsStrictCPUReservationConfig(oldCfg *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
|
|
newCfg := buildWindowsCPUManagerKubeletConfig(oldCfg, true)
|
|
newCfg.CPUManagerPolicyOptions = map[string]string{
|
|
cpumanager.StrictCPUReservationOption: "true",
|
|
}
|
|
return newCfg
|
|
}
|
|
|
|
// updateWindowsKubeletConfig stops the kubelet Windows service, writes the new
|
|
// configuration file, and restarts the service.
|
|
func updateWindowsKubeletConfig(ctx context.Context, f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) {
|
|
ginkgo.GinkgoHelper()
|
|
kubeletStart := mustStopKubelet(ctx, f)
|
|
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg), "failed to write kubelet config file")
|
|
kubeletStart(ctx)
|
|
}
|
|
|
|
// updateWindowsKubeletConfigClearState is like updateWindowsKubeletConfig but
|
|
// also removes the CPU/memory manager state files while the kubelet is stopped.
|
|
// This is required when the new configuration invalidates the persisted
|
|
// checkpoint (e.g. toggling strict-cpu-reservation, which changes whether the
|
|
// reserved CPU belongs to the default pool) — otherwise the kubelet refuses to
|
|
// start with "invalid state, please drain node and remove policy state file".
|
|
// It mirrors the Linux updateKubeletConfig(..., deleteStateFiles=true).
|
|
func updateWindowsKubeletConfigClearState(ctx context.Context, f *framework.Framework, cfg *kubeletconfig.KubeletConfiguration) {
|
|
ginkgo.GinkgoHelper()
|
|
kubeletStart := mustStopKubelet(ctx, f)
|
|
deleteStateFile(cpuManagerStateFile)
|
|
deleteStateFile(memoryManagerStateFile)
|
|
framework.ExpectNoError(e2enodekubelet.WriteKubeletConfigFile(cfg), "failed to write kubelet config file")
|
|
kubeletStart(ctx)
|
|
}
|
|
|
|
// updateWindowsKubeletConfigIfNeeded calls updateWindowsKubeletConfig only when
|
|
// the desired configuration differs from what is currently running, avoiding
|
|
// unnecessary kubelet restarts between tests in the same When block.
|
|
func updateWindowsKubeletConfigIfNeeded(ctx context.Context, f *framework.Framework, desired *kubeletconfig.KubeletConfiguration) {
|
|
ginkgo.GinkgoHelper()
|
|
current, err := getCurrentKubeletConfig(ctx)
|
|
framework.ExpectNoError(err, "failed to get current kubelet config")
|
|
if equalKubeletConfiguration(current, desired) {
|
|
framework.Logf("kubelet configuration already matches desired state, skipping restart")
|
|
return
|
|
}
|
|
updateWindowsKubeletConfig(ctx, f, desired)
|
|
}
|
|
|
|
// equalKubeletConfiguration returns true when the two configurations are
|
|
// semantically equal (ignoring TypeMeta which is not meaningful for comparison).
|
|
func equalKubeletConfiguration(a, b *kubeletconfig.KubeletConfiguration) bool {
|
|
a = a.DeepCopy()
|
|
b = b.DeepCopy()
|
|
a.TypeMeta = metav1.TypeMeta{}
|
|
b.TypeMeta = metav1.TypeMeta{}
|
|
return reflect.DeepEqual(a, b)
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Skip / node helpers
|
|
// -------------------------------------------------------------------------
|
|
|
|
// skipIfAllocatableCPUsLessThan skips the current test when the node does not
|
|
// have enough allocatable integer CPUs to satisfy the test.
|
|
// One CPU (CPU 0) is always reserved by buildWindowsCPUManagerKubeletConfig,
|
|
// so the minimum allocatable count is (requested + 1).
|
|
func skipIfAllocatableCPUsLessThan(node *v1.Node, requested int) {
|
|
ginkgo.GinkgoHelper()
|
|
allocatable := node.Status.Allocatable[v1.ResourceCPU]
|
|
need := int64(requested + 1) // +1 for the reserved CPU
|
|
if allocatable.Value() < need {
|
|
ginkgo.Skip(fmt.Sprintf(
|
|
"skipping: node has %d allocatable CPUs but test needs %d (including 1 reserved)",
|
|
allocatable.Value(), need))
|
|
}
|
|
}
|