mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-03 20:40:26 -05:00
Add e2e case and UT case, four E2E test case mapping to
https://github.com/kubernetes/kubernetes/pull/131764#issuecomment-3431019564
This commit is contained in:
parent
2ed5499bf7
commit
a707afafce
6 changed files with 934 additions and 48 deletions
|
|
@ -30,6 +30,7 @@ import (
|
|||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
|
|
@ -38,7 +39,6 @@ import (
|
|||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/status"
|
||||
"k8s.io/utils/cpuset"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
)
|
||||
|
||||
// ActivePodsFunc is a function that returns a list of pods to reconcile.
|
||||
|
|
@ -427,15 +427,18 @@ func (m *manager) isAllInitContainerTerminated(rootLogger logr.Logger, pod *v1.P
|
|||
return false
|
||||
}
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
logger := klog.LoggerWithValues(podLogger, "containerName", container.Name)
|
||||
cstatus, err := findContainerStatusByName(&pstatus, container.Name)
|
||||
if err != nil {
|
||||
logger.V(5).Info("skipping container; ID not found in pod status", "err", err)
|
||||
return false
|
||||
}
|
||||
if !podutil.IsRestartableInitContainer(&container) {
|
||||
logger := klog.LoggerWithValues(podLogger, "containerName", container.Name)
|
||||
|
||||
if cstatus.State.Terminated == nil {
|
||||
return false
|
||||
cstatus, err := findContainerStatusByName(&pstatus, container.Name)
|
||||
if err != nil {
|
||||
logger.V(5).Info("skipping container; ID not found in pod status", "err", err)
|
||||
return false
|
||||
}
|
||||
|
||||
if cstatus.State.Terminated == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -131,6 +131,51 @@ func (p *mockPolicy) GetAllocatableCPUs(m state.State) cpuset.CPUSet {
|
|||
return cpuset.New()
|
||||
}
|
||||
|
||||
func (p *mockPolicy) ReleaseLeakedCPUs(logger logr.Logger, s state.State, pod *v1.Pod) cpuset.CPUSet {
|
||||
// If pod has no init containers, no leaked CPUs to release
|
||||
if len(pod.Spec.InitContainers) == 0 {
|
||||
return cpuset.New()
|
||||
}
|
||||
|
||||
// Simulate releasing leaked CPUs from init containers
|
||||
leakedCPUs := cpuset.New()
|
||||
if assignments, exists := s.GetCPUAssignments()[string(pod.UID)]; exists {
|
||||
for containerName, cset := range assignments {
|
||||
// Check if this is an init container
|
||||
for _, initC := range pod.Spec.InitContainers {
|
||||
if initC.Name == containerName {
|
||||
// If this is a sidecar container, keep all its CPUs
|
||||
if initC.RestartPolicy != nil && *initC.RestartPolicy == v1.ContainerRestartPolicyAlways {
|
||||
// Sidecar containers keep all their CPUs, no leakage
|
||||
continue
|
||||
}
|
||||
|
||||
// Simulate some leaked CPUs (remove some from the assignment)
|
||||
if cset.Size() > 2 {
|
||||
// Get the first 2 CPUs to keep, release the rest
|
||||
cpus := cset.List()
|
||||
if len(cpus) > 2 {
|
||||
keepCPUs := cpuset.New(cpus[0], cpus[1])
|
||||
leakedFromContainer := cset.Difference(keepCPUs)
|
||||
leakedCPUs = leakedCPUs.Union(leakedFromContainer)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update default CPUSet to include leaked CPUs
|
||||
defaultSet := s.GetDefaultCPUSet()
|
||||
s.SetDefaultCPUSet(defaultSet.Union(leakedCPUs))
|
||||
|
||||
return leakedCPUs
|
||||
}
|
||||
|
||||
func (p *mockPolicy) UpdateCPUsForInitC(logger logr.Logger, s state.State, pod *v1.Pod, containerName string, leakedCPUs cpuset.CPUSet) {
|
||||
}
|
||||
|
||||
type mockRuntimeService struct {
|
||||
err error
|
||||
}
|
||||
|
|
@ -1476,6 +1521,213 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestReleasePodUnallocatedCPUs(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
activePods []*v1.Pod
|
||||
podStatus v1.PodStatus
|
||||
podStatusFound bool
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
expAssignments state.ContainerCPUAssignments
|
||||
expDefaultSet cpuset.CPUSet
|
||||
}{
|
||||
{
|
||||
description: "Pod with terminated init containers - leaked CPUs released",
|
||||
activePods: []*v1.Pod{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test-pod",
|
||||
UID: "test-pod-uid",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
InitContainers: []v1.Container{{Name: "init-1"}},
|
||||
Containers: []v1.Container{{Name: "app-1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatus: v1.PodStatus{
|
||||
InitContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "init-1",
|
||||
State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatusFound: true,
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 2, 4, 6), // 4 CPUs - has leaked CPUs
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 2, 4, 6), // should be updated after releasing leaked CPUs
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
expDefaultSet: cpuset.New(3, 4, 6, 7), // leaked CPUs returned to default
|
||||
},
|
||||
{
|
||||
description: "Pod with running init containers - no CPU release",
|
||||
activePods: []*v1.Pod{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test-pod",
|
||||
UID: "test-pod-uid",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
InitContainers: []v1.Container{{Name: "init-1"}},
|
||||
Containers: []v1.Container{{Name: "app-1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatus: v1.PodStatus{
|
||||
InitContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "init-1",
|
||||
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatusFound: true,
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 2, 4, 6),
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 2, 4, 6), // should remain unchanged
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
expDefaultSet: cpuset.New(3, 7), // should remain unchanged
|
||||
},
|
||||
{
|
||||
description: "Pod with restartable init containers - no CPU release",
|
||||
activePods: []*v1.Pod{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test-pod",
|
||||
UID: "test-pod-uid",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
InitContainers: []v1.Container{
|
||||
{
|
||||
Name: "init-1",
|
||||
RestartPolicy: &[]v1.ContainerRestartPolicy{v1.ContainerRestartPolicyAlways}[0],
|
||||
},
|
||||
},
|
||||
Containers: []v1.Container{{Name: "app-1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatus: v1.PodStatus{
|
||||
InitContainerStatuses: []v1.ContainerStatus{
|
||||
{
|
||||
Name: "init-1",
|
||||
State: v1.ContainerState{Terminated: &v1.ContainerStateTerminated{}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatusFound: true,
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 4, 2, 6),
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 2, 4, 6),
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
expDefaultSet: cpuset.New(3, 7), // leaked CPUs returned to default
|
||||
},
|
||||
{
|
||||
description: "Pod status not found - no CPU release",
|
||||
activePods: []*v1.Pod{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "test-pod",
|
||||
UID: "test-pod-uid",
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
InitContainers: []v1.Container{{Name: "init-1"}},
|
||||
Containers: []v1.Container{{Name: "app-1"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
podStatus: v1.PodStatus{},
|
||||
podStatusFound: false,
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 4, 2, 6),
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-uid": {
|
||||
"init-1": cpuset.New(0, 4, 2, 6), // should remain unchanged
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
expDefaultSet: cpuset.New(3, 7), // should remain unchanged
|
||||
},
|
||||
{
|
||||
description: "No active pods - no changes",
|
||||
activePods: []*v1.Pod{},
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expAssignments: state.ContainerCPUAssignments{},
|
||||
expDefaultSet: cpuset.New(3, 7),
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
mockState := &mockState{
|
||||
assignments: testCase.stAssignments.Clone(),
|
||||
defaultCPUSet: testCase.stDefaultCPUSet.Clone(),
|
||||
}
|
||||
|
||||
mgr := &manager{
|
||||
policy: &mockPolicy{},
|
||||
state: mockState,
|
||||
activePods: func() []*v1.Pod {
|
||||
return testCase.activePods
|
||||
},
|
||||
podStatusProvider: mockPodStatusProvider{
|
||||
podStatus: testCase.podStatus,
|
||||
found: testCase.podStatusFound,
|
||||
},
|
||||
}
|
||||
|
||||
// Call the function under test
|
||||
mgr.releasePodUnallocatedCPUs(logger)
|
||||
|
||||
// Verify the results
|
||||
if !reflect.DeepEqual(testCase.expAssignments, mgr.state.GetCPUAssignments()) {
|
||||
t.Errorf("Expected assignments %v, but got %v", testCase.expAssignments, mgr.state.GetCPUAssignments())
|
||||
}
|
||||
|
||||
if !testCase.expDefaultSet.Equals(mgr.state.GetDefaultCPUSet()) {
|
||||
t.Errorf("Expected default CPUSet %v, but got %v", testCase.expDefaultSet, mgr.state.GetDefaultCPUSet())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
if runtime.GOOS == "windows" {
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ type Policy interface {
|
|||
// GetAllocatableCPUs returns the total set of CPUs available for allocation.
|
||||
GetAllocatableCPUs(m state.State) cpuset.CPUSet
|
||||
// Release pod leaked CPUs
|
||||
ReleaseLeakedCPUs(logger logr.Logger, s state.State, pod *v1.Pod) cpuset.CPUSet
|
||||
ReleaseLeakedCPUs(logger logr.Logger, s state.State, pod *v1.Pod) cpuset.CPUSet
|
||||
// Reallocate the leaked CPUs for InitContainer
|
||||
UpdateCPUsForInitC(logger logr.Logger, s state.State, pod *v1.Pod, containerName string, leakedCPUs cpuset.CPUSet)
|
||||
UpdateCPUsForInitC(logger logr.Logger, s state.State, pod *v1.Pod, containerName string, leakedCPUs cpuset.CPUSet)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -462,7 +462,7 @@ func (p *staticPolicy) ReleaseLeakedCPUs(logger logr.Logger, s state.State, pod
|
|||
leakedCPUsNumber = unallocatedReusableCPUs.Size()
|
||||
}
|
||||
// Get the keeped CPUs in unallocatedReusableCPUs
|
||||
keepCPUs, err := p.takeByTopology(logger, unallocatedReusableCPUs, unallocatedReusableCPUs.Size() - leakedCPUsNumber)
|
||||
keepCPUs, err := p.takeByTopology(logger, unallocatedReusableCPUs, unallocatedReusableCPUs.Size()-leakedCPUsNumber)
|
||||
if err != nil {
|
||||
return leakedCPUs
|
||||
}
|
||||
|
|
@ -473,11 +473,11 @@ func (p *staticPolicy) ReleaseLeakedCPUs(logger logr.Logger, s state.State, pod
|
|||
p.deleteCPUsInCpusToReuse(pod, leakedCPUs)
|
||||
p.updateMetricsOnRelease(logger, s, leakedCPUs)
|
||||
logger.Info("Static policy: ReleaseLeakedCPUs, release leaked CPUs to DefaultCPUSet", "leakedCPUsNumber", leakedCPUsNumber, "leakedCPUs", leakedCPUs)
|
||||
}
|
||||
return leakedCPUs
|
||||
}
|
||||
return leakedCPUs
|
||||
}
|
||||
|
||||
// Check whether the assigned CPUs of the InitContainer are released into the default cpuset,
|
||||
// Check whether the assigned CPUs of the InitContainer are released into the default cpuset,
|
||||
// if yes, reallocate the CPUs (which assigned to the pod) to the InitContainer
|
||||
func (p *staticPolicy) UpdateCPUsForInitC(logger logr.Logger, s state.State, pod *v1.Pod, containerName string, leakedCPUs cpuset.CPUSet) {
|
||||
logger = klog.LoggerWithValues(logger, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", containerName)
|
||||
|
|
@ -490,7 +490,7 @@ func (p *staticPolicy) UpdateCPUsForInitC(logger logr.Logger, s state.State, pod
|
|||
sidecarCPUsBeforeInit := p.getSidecarCPUsBeforeInit(s, pod, containerName)
|
||||
allocatedCPUs, err := p.takeByTopology(logger, podCPUSet.Difference(leakedCPUs).Difference(sidecarCPUsBeforeInit), cset.Size())
|
||||
if err != nil {
|
||||
return
|
||||
return
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), containerName, allocatedCPUs)
|
||||
logger.Info("Static policy: ReplaceLeakedCPUs", "cset before update", cset, "cset after update", allocatedCPUs)
|
||||
|
|
@ -505,7 +505,7 @@ func (p *staticPolicy) getSidecarCPUsBeforeInit(s state.State, pod *v1.Pod, cont
|
|||
if containerName == initContainer.Name {
|
||||
break
|
||||
}
|
||||
if podutil.IsRestartableInitContainer(&initContainer){
|
||||
if podutil.IsRestartableInitContainer(&initContainer) {
|
||||
cset, _ := s.GetCPUSet(string(pod.UID), initContainer.Name)
|
||||
sidecarCPUs = sidecarCPUs.Union(cset)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2100,3 +2100,343 @@ func getPodUncoreCacheIDs(s state.Reader, topo *topology.CPUTopology, pod *v1.Po
|
|||
}
|
||||
return uncoreCacheIDs, nil
|
||||
}
|
||||
|
||||
func TestReleaseLeakedCPUs(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
pod *v1.Pod
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
expLeakedCPUs cpuset.CPUSet
|
||||
expDefaultSet cpuset.CPUSet
|
||||
}{
|
||||
{
|
||||
description: "No leaked CPUs",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"}, // init container
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"}, // app container
|
||||
}), "test-pod-1"),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-1": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(0, 4),
|
||||
"appContainer-0": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(2, 3, 6, 7),
|
||||
expLeakedCPUs: cpuset.New(0, 4), // init container CPUs are released
|
||||
expDefaultSet: cpuset.New(0, 2, 3, 4, 6, 7), // leaked CPUs returned to default
|
||||
},
|
||||
{
|
||||
description: "Leaked CPUs from init container when init containers have more CPUs",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"4000m", "4000m"}, // init container - request 4 CPUs
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"}, // app container - request 2 CPUs
|
||||
}), "test-pod-2"),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-2": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(0, 4, 2, 6),
|
||||
"appContainer-0": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expLeakedCPUs: cpuset.New(2, 6), // 2 CPUs leaked from init container
|
||||
expDefaultSet: cpuset.New(2, 3, 6, 7), // leaked CPUs returned to default
|
||||
},
|
||||
{
|
||||
description: "No leaked CPUs when app containers have more CPUs",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"}, // init container
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"4000m", "4000m"}, // app container
|
||||
}), "test-pod-4"),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-4": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(0, 4),
|
||||
"appContainer-0": cpuset.New(1, 5, 2, 6),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expLeakedCPUs: cpuset.New(0, 4),
|
||||
expDefaultSet: cpuset.New(0, 3, 4, 7),
|
||||
},
|
||||
{
|
||||
description: "Multiple leaked CPUs from multiple init containers",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"4000m", "4000m"}, // init container 1 - request 4 CPUs
|
||||
{"2000m", "2000m"}, // init container 2 - request 2 CPUs
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"}, // app container - request 2 CPUs
|
||||
}), "test-pod-3"),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-3": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(0, 4, 2, 6),
|
||||
"initContainer-1": cpuset.New(3, 7),
|
||||
"appContainer-0": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(),
|
||||
expLeakedCPUs: cpuset.New(2, 3, 6, 7),
|
||||
expDefaultSet: cpuset.New(2, 3, 6, 7),
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
policy, err := NewStaticPolicy(logger, topoSingleSocketHT, 0, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStaticPolicy() failed: %v", err)
|
||||
}
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
defaultCPUSet: testCase.stDefaultCPUSet,
|
||||
}
|
||||
|
||||
leakedCPUs := policy.ReleaseLeakedCPUs(logger, st, testCase.pod)
|
||||
|
||||
if !leakedCPUs.Equals(testCase.expLeakedCPUs) {
|
||||
t.Errorf("ReleaseLeakedCPUs() error (%v). expected leaked CPUs %v but got %v",
|
||||
testCase.description, testCase.expLeakedCPUs, leakedCPUs)
|
||||
}
|
||||
|
||||
if !st.GetDefaultCPUSet().Equals(testCase.expDefaultSet) {
|
||||
t.Errorf("ReleaseLeakedCPUs() error (%v). expected default CPUSet %v but got %v",
|
||||
testCase.description, testCase.expDefaultSet, st.GetDefaultCPUSet())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateCPUsForInitC(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
pod *v1.Pod
|
||||
containerName string
|
||||
leakedCPUs cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
expCPUSet cpuset.CPUSet
|
||||
shouldUpdate bool
|
||||
}{
|
||||
{
|
||||
description: "Update init container with leaked CPUs",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"4000m", "4000m"}, // request 4 CPUs to match allocation
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"},
|
||||
}), "test-pod-1"),
|
||||
containerName: "init-1",
|
||||
leakedCPUs: cpuset.New(2, 6),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-1": map[string]cpuset.CPUSet{
|
||||
"init-1": cpuset.New(0, 4, 2, 6),
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 7),
|
||||
expCPUSet: cpuset.New(0, 1, 4, 5), // should replace leaked CPU with available one
|
||||
shouldUpdate: true,
|
||||
},
|
||||
{
|
||||
description: "No update when pod doesn't have leaked CPUs",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"},
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"},
|
||||
}), "test-pod-2"),
|
||||
containerName: "init-1",
|
||||
leakedCPUs: cpuset.New(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-2": map[string]cpuset.CPUSet{
|
||||
"init-1": cpuset.New(1, 5), // doesn't contain leaked CPUs
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(2, 3, 6, 7),
|
||||
expCPUSet: cpuset.New(1, 5), // should remain unchanged
|
||||
shouldUpdate: false,
|
||||
},
|
||||
{
|
||||
description: "Update init container with partial leaked CPUs",
|
||||
pod: WithPodUID(makeMultiContainerPod(
|
||||
[]struct{ request, limit string }{
|
||||
{"3000m", "3000m"},
|
||||
},
|
||||
[]struct{ request, limit string }{
|
||||
{"2000m", "2000m"},
|
||||
}), "test-pod-3"),
|
||||
containerName: "init-1",
|
||||
leakedCPUs: cpuset.New(2, 4),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod-3": map[string]cpuset.CPUSet{
|
||||
"init-1": cpuset.New(0, 4, 2), // contains one leaked CPU
|
||||
"app-1": cpuset.New(1, 5),
|
||||
},
|
||||
},
|
||||
stDefaultCPUSet: cpuset.New(3, 6, 7),
|
||||
expCPUSet: cpuset.New(0, 1, 5), // should replace leaked CPU with available one
|
||||
shouldUpdate: true,
|
||||
},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
policy, err := NewStaticPolicy(logger, topoSingleSocketHT, 0, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStaticPolicy() failed: %v", err)
|
||||
}
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
defaultCPUSet: testCase.stDefaultCPUSet,
|
||||
}
|
||||
policy.UpdateCPUsForInitC(logger, st, testCase.pod, testCase.containerName, testCase.leakedCPUs)
|
||||
if testCase.shouldUpdate {
|
||||
cset, found := st.GetCPUSet(string(testCase.pod.UID), testCase.containerName)
|
||||
if !found {
|
||||
t.Errorf("UpdateCPUsForInitC() error (%v). expected container %v to be present in assignments",
|
||||
testCase.description, testCase.containerName)
|
||||
} else if !cset.Equals(testCase.expCPUSet) {
|
||||
t.Errorf("UpdateCPUsForInitC() error (%v). expected CPUSet %v but got %v",
|
||||
testCase.description, testCase.expCPUSet, cset)
|
||||
}
|
||||
} else {
|
||||
// If no update expected, verify the container either doesn't exist or has unchanged CPUSet
|
||||
if cset, found := st.GetCPUSet(string(testCase.pod.UID), testCase.containerName); found {
|
||||
if !testCase.expCPUSet.IsEmpty() && !cset.Equals(testCase.expCPUSet) {
|
||||
t.Errorf("UpdateCPUsForInitC() error (%v). expected CPUSet %v but got %v",
|
||||
testCase.description, testCase.expCPUSet, cset)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetSidecarCPUsBeforeInit(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
pod *v1.Pod
|
||||
containerName string
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
expSidecarCPUs cpuset.CPUSet
|
||||
}{
|
||||
{
|
||||
description: "No sidecar containers before init container",
|
||||
pod: WithPodUID(makeMultiContainerPodWithOptions(
|
||||
[]*containerOptions{
|
||||
{request: "2000m", limit: "2000m"}, // regular init container
|
||||
{request: "2000m", limit: "2000m", restartPolicy: v1.ContainerRestartPolicyAlways}, // sidecar init container
|
||||
},
|
||||
[]*containerOptions{
|
||||
{request: "2000m", limit: "2000m"}, // app container
|
||||
},
|
||||
), "test-pod"),
|
||||
containerName: "initContainer-0",
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(1, 5), // current init container
|
||||
"initContainer-1": cpuset.New(0, 4), // sidecar container
|
||||
},
|
||||
},
|
||||
expSidecarCPUs: cpuset.New(), // should not get CPUs from sidecar initContainer-0
|
||||
},
|
||||
{
|
||||
description: "One sidecar container before init container",
|
||||
pod: WithPodUID(makeMultiContainerPodWithOptions(
|
||||
[]*containerOptions{
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyAlways}, // sidecar init container
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyOnFailure}, // regular init container
|
||||
},
|
||||
[]*containerOptions{
|
||||
{request: "1000m", limit: "1000m"}, // regular app container
|
||||
},
|
||||
), "test-pod"),
|
||||
containerName: "initContainer-1",
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(0, 4), // sidecar container
|
||||
"initContainer-1": cpuset.New(1, 5), // current init container
|
||||
},
|
||||
},
|
||||
expSidecarCPUs: cpuset.New(0, 4), // should get CPUs from sidecar initContainer-0
|
||||
},
|
||||
{
|
||||
description: "Multiple sidecar containers before init container",
|
||||
pod: WithPodUID(makeMultiContainerPodWithOptions(
|
||||
[]*containerOptions{
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyAlways}, // sidecar init container 1
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyAlways}, // sidecar init container 2
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyOnFailure}, // regular init container
|
||||
},
|
||||
[]*containerOptions{
|
||||
{request: "1000m", limit: "1000m"}, // regular app container
|
||||
},
|
||||
), "test-pod"),
|
||||
containerName: "initContainer-2",
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(0, 4), // sidecar init container 1
|
||||
"initContainer-1": cpuset.New(1, 5), // sidecar init container 2
|
||||
"initContainer-2": cpuset.New(2, 6), // current init container
|
||||
},
|
||||
},
|
||||
expSidecarCPUs: cpuset.New(0, 1, 4, 5), // should get CPUs from sidecar initContainer-0 and initContainer-1
|
||||
},
|
||||
{
|
||||
description: "Init container after some sidecars",
|
||||
pod: WithPodUID(makeMultiContainerPodWithOptions(
|
||||
[]*containerOptions{
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyAlways}, // sidecar init container 1
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyOnFailure}, // regular init container
|
||||
{request: "1000m", limit: "1000m", restartPolicy: v1.ContainerRestartPolicyAlways}, // sidecar init container 2
|
||||
},
|
||||
[]*containerOptions{
|
||||
{request: "1000m", limit: "1000m"}, // regular app container
|
||||
},
|
||||
), "test-pod"),
|
||||
containerName: "initContainer-1",
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"test-pod": map[string]cpuset.CPUSet{
|
||||
"initContainer-0": cpuset.New(2, 6), // sidecar init container 1
|
||||
"initContainer-1": cpuset.New(0, 4), // current init container
|
||||
"initContainer-2": cpuset.New(3, 5), // sidecar init container 2
|
||||
},
|
||||
},
|
||||
expSidecarCPUs: cpuset.New(2, 6), // should get CPUs from sidecar initContainer-0 only
|
||||
},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
policy, err := NewStaticPolicy(logger, topoSingleSocketHT, 0, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("NewStaticPolicy() failed: %v", err)
|
||||
}
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
defaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
}
|
||||
staticPolicy := policy.(*staticPolicy)
|
||||
sidecarCPUs := staticPolicy.getSidecarCPUsBeforeInit(st, testCase.pod, testCase.containerName)
|
||||
if !sidecarCPUs.Equals(testCase.expSidecarCPUs) {
|
||||
t.Errorf("getSidecarCPUsBeforeInit() error (%v). expected %v but got %v",
|
||||
testCase.description, testCase.expSidecarCPUs, sidecarCPUs)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ package e2enode
|
|||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
|
@ -36,6 +37,7 @@ import (
|
|||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
|
||||
|
|
@ -1918,16 +1920,18 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra
|
|||
|
||||
ginkgo.When("When checking CPU leaks in Guaranteed Pods", func() {
|
||||
ginkgo.It("One init container(CPU number: 1) and one app container(CPU number: 2) for a pod, release leaked CPUs", func(ctx context.Context) {
|
||||
cpuCount := 2 // total
|
||||
reservedCPUs = cpuset.New(0)
|
||||
cpuCount := 2
|
||||
podCPUsNumber := 2
|
||||
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
|
||||
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
|
||||
}))
|
||||
|
||||
pod := makeCPUManagerMultiTypeContainersPod("gu-pod-multi-type-container", []ctnAttribute{
|
||||
pod := makeCPUManagerMultiTypeContainersPodWithRestartPolicy("gu-pod-multi-type-container", []ctnAttribute{
|
||||
{
|
||||
ctnName: "init-container-1",
|
||||
cpuRequest: "1000m",
|
||||
|
|
@ -1939,39 +1943,228 @@ var _ = SIGDescribe("CPU Manager", ginkgo.Ordered, ginkgo.ContinueOnFailure, fra
|
|||
cpuRequest: "2000m",
|
||||
cpuLimit: "2000m",
|
||||
},
|
||||
})
|
||||
}, v1.RestartPolicyAlways)
|
||||
ginkgo.By("running a Gu pod with a init container and a app container")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#resources-calculation-for-scheduling-and-pod-admission
|
||||
// for the detail.
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
UnionCPUNumber, err := getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// have leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).ToNot(gomega.Equal(podCPUsNumber))
|
||||
|
||||
// no leaked CPUs in checkpoint
|
||||
// initcontainer cpuset could be changed after terminating if there are leaked CPUs, this change only appied to checkpoint
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0])), podCPUsNumber))
|
||||
|
||||
ginkgo.By("checking if the no leaked cpuset was applied when pod restart")
|
||||
// make pod restart by removing pod sandbox
|
||||
restartPodByRemovingSandbox(ctx, f, pod)
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// no leaked CPUs when Pod setup
|
||||
UnionCPUNumber, err = getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// no leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).To(gomega.Equal(podCPUsNumber))
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0])), podCPUsNumber))
|
||||
})
|
||||
|
||||
ginkgo.It("One init container(CPU number: 2) and one app container(CPU number: 1) for a pod, release part of unallocated reusable CPUs", func(ctx context.Context) {
|
||||
reservedCPUs = cpuset.New(0)
|
||||
cpuCount := 3
|
||||
podCPUsNumber := 2
|
||||
|
||||
// when we get there the real initcontainer terminated, so we can only check its logs
|
||||
ginkgo.By("checking if the expected cpuset was assigned")
|
||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, pod.Namespace, pod.Name, pod.Spec.InitContainers[0].Name)
|
||||
framework.ExpectNoError(err, "expected log not found in init container [%s] of pod [%s]", pod.Spec.InitContainers[0].Name, pod.Name)
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
|
||||
|
||||
// Get reusable CPUs
|
||||
initContainerCPUs := getContainerAllowedCPUsFromLogs(pod.Name, pod.Spec.InitContainers[0].Name, logs)
|
||||
gomega.Expect(initContainerCPUs.Size()).To(gomega.Equal(1), "expected cpu set size == 1, got %q", initContainerCPUs.String())
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
|
||||
}))
|
||||
|
||||
// Get nonReusable CPUs
|
||||
appContainerCPUs, err := getContainerAllowedCPUs(pod, pod.Spec.Containers[0].Name, false)
|
||||
framework.ExpectNoError(err, "cannot get exclusive CPUs for pod %s/%s", pod.Namespace, pod.Name)
|
||||
gomega.Expect(appContainerCPUs.Size()).To(gomega.Equal(2), "expected cpu set size == 2, got %q", appContainerCPUs.String())
|
||||
pod := makeCPUManagerMultiTypeContainersPodWithRestartPolicy("gu-pod-multi-type-container", []ctnAttribute{
|
||||
{
|
||||
ctnName: "init-container-1",
|
||||
cpuRequest: "2000m",
|
||||
cpuLimit: "2000m",
|
||||
},
|
||||
}, []ctnAttribute{
|
||||
{
|
||||
ctnName: "gu-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
},
|
||||
}, v1.RestartPolicyAlways)
|
||||
ginkgo.By("running a Gu pod with a init container and a app container")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
appContainerName := pod.Spec.Containers[0].Name
|
||||
gomega.Expect(pod).To(HaveContainerCPUsCount(appContainerName, 2))
|
||||
gomega.Expect(pod).To(HaveContainerCPUsASubsetOf(appContainerName, onlineCPUs))
|
||||
gomega.Expect(pod).ToNot(HaveContainerCPUsOverlapWith(appContainerName, reservedCPUs))
|
||||
// Waiting for reconcile loop.
|
||||
time.Sleep(10 * time.Second)
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(initContainerCPUs.Union(appContainerCPUs), podCPUsNumber))
|
||||
/*Eventually(func() bool {
|
||||
return HavePodCPUsCount(initContainerCPUs.Union(appContainerCPUs), podCPUsNumber)
|
||||
}, 10*time.Second, 1*time.Second).Should(BeTrue())*/
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
UnionCPUNumber, err := getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// have leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).ToNot(gomega.Equal(podCPUsNumber))
|
||||
|
||||
// no leaked CPUs in checkpoint
|
||||
// initcontainer cpuset could be changed after terminating if there are leaked CPUs, this change only appied to checkpoint
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0])), podCPUsNumber))
|
||||
|
||||
ginkgo.By("checking if the no leaked cpuset was applied when pod restart")
|
||||
// make pod restart by removing pod sandbox
|
||||
restartPodByRemovingSandbox(ctx, f, pod)
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// no leaked CPUs when Pod setup
|
||||
UnionCPUNumber, err = getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// no leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).To(gomega.Equal(podCPUsNumber))
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0])), podCPUsNumber))
|
||||
})
|
||||
|
||||
ginkgo.It("One sidecar container(CPU number:1) One init container(CPU number: 2) and one app container(CPU number: 1) for a pod, release part of unallocated reusable CPUs", func(ctx context.Context) {
|
||||
reservedCPUs = cpuset.New(0, 2)
|
||||
cpuCount := 4
|
||||
podCPUsNumber := 3
|
||||
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
|
||||
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
|
||||
}))
|
||||
|
||||
containerRestartPolicyAlways := v1.ContainerRestartPolicyAlways
|
||||
pod := makeCPUManagerMultiTypeContainersPodWithRestartPolicy("gu-pod-multi-type-container", []ctnAttribute{
|
||||
{
|
||||
ctnName: "sidecar-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
restartPolicy: &containerRestartPolicyAlways,
|
||||
},
|
||||
{
|
||||
ctnName: "init-container-1",
|
||||
cpuRequest: "2000m",
|
||||
cpuLimit: "2000m",
|
||||
},
|
||||
}, []ctnAttribute{
|
||||
{
|
||||
ctnName: "gu-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
},
|
||||
}, v1.RestartPolicyAlways)
|
||||
ginkgo.By("running a Gu pod with a init container and a app container")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
UnionCPUNumber, err := getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// have leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).ToNot(gomega.Equal(podCPUsNumber))
|
||||
|
||||
// no leaked CPUs in checkpoint
|
||||
// initcontainer cpuset could be changed after terminating if there are leaked CPUs, this change only appied to checkpoint
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[1]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0]))), podCPUsNumber))
|
||||
|
||||
ginkgo.By("checking if the no leaked cpuset was applied when pod restart")
|
||||
// make pod restart by removing pod sandbox
|
||||
restartPodByRemovingSandbox(ctx, f, pod)
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// no leaked CPUs when Pod setup
|
||||
UnionCPUNumber, err = getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// no leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).To(gomega.Equal(podCPUsNumber))
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[1]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0]))), podCPUsNumber))
|
||||
|
||||
})
|
||||
|
||||
ginkgo.It("One init container(CPU number: 2) and one app container(CPU number: 1) for a pod, no leaked CPUs", func(ctx context.Context) {
|
||||
reservedCPUs = cpuset.New(0, 10)
|
||||
cpuCount := 2
|
||||
podCPUsNumber := 2
|
||||
|
||||
skipIfAllocatableCPUsLessThan(getLocalNode(ctx, f), cpuCount)
|
||||
|
||||
updateKubeletConfigIfNeeded(ctx, f, configureCPUManagerInKubelet(oldCfg, &cpuManagerKubeletArguments{
|
||||
policyName: string(cpumanager.PolicyStatic),
|
||||
reservedSystemCPUs: reservedCPUs, // Not really needed for the tests but helps to make a more precise check
|
||||
}))
|
||||
|
||||
pod := makeCPUManagerMultiTypeContainersPodWithRestartPolicy("gu-pod-multi-type-container", []ctnAttribute{
|
||||
{
|
||||
ctnName: "init-container-1",
|
||||
cpuRequest: "2000m",
|
||||
cpuLimit: "2000m",
|
||||
},
|
||||
}, []ctnAttribute{
|
||||
{
|
||||
ctnName: "gu-container-1",
|
||||
cpuRequest: "1000m",
|
||||
cpuLimit: "1000m",
|
||||
},
|
||||
}, v1.RestartPolicyAlways)
|
||||
ginkgo.By("running a Gu pod with a init container and a app container")
|
||||
pod = e2epod.NewPodClient(f).CreateSync(ctx, pod)
|
||||
podMap[string(pod.UID)] = pod
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
UnionCPUNumber, err := getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// no leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).To(gomega.Equal(podCPUsNumber))
|
||||
|
||||
// no leaked CPUs in checkpoint
|
||||
// initcontainer cpuset could be changed after terminating if there are leaked CPUs, this change only appied to checkpoint
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0])), podCPUsNumber))
|
||||
|
||||
ginkgo.By("checking if the no leaked cpuset was applied when pod restart")
|
||||
// make pod restart by removing pod sandbox
|
||||
restartPodByRemovingSandbox(ctx, f, pod)
|
||||
|
||||
// waiting for reconcile loop.
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// no leaked CPUs when Pod setup
|
||||
UnionCPUNumber, err = getPodContainersCPUSetUnion(ctx, f, pod)
|
||||
framework.ExpectNoError(err, "cannot get union CPU count for pod %s/%s", pod.Namespace, pod.Name)
|
||||
ginkgo.By(fmt.Sprintf("UnionCPUNumber = %d", UnionCPUNumber))
|
||||
|
||||
// no leaked CPUs
|
||||
gomega.Expect(UnionCPUNumber).To(gomega.Equal(podCPUsNumber))
|
||||
gomega.Expect(pod).To(HavePodCPUsCount(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.InitContainers[0]).Union(getContainerCPUSetFromCheckpoint(pod, &pod.Spec.Containers[0])), podCPUsNumber))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
@ -2143,7 +2336,7 @@ func HavePodCPUsCount(podCPUSet cpuset.CPUSet, val int) types.GomegaMatcher {
|
|||
if podCPUSet.Size() == val {
|
||||
return true, nil
|
||||
} else if podCPUSet.Size() > val {
|
||||
return false, fmt.Errorf("There are %d leaked CPUs", podCPUSet.Size() - val)
|
||||
return false, fmt.Errorf("There are %d leaked CPUs", podCPUSet.Size()-val)
|
||||
}
|
||||
return false, fmt.Errorf("CPU not enough")
|
||||
}).WithTemplate("Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} has allowed CPUs <{{.Data.CurrentCPUs}}> not matching expected count <{{.Data.Count}}> for container {{.Data.Name}}", md)
|
||||
|
|
@ -2870,7 +3063,7 @@ func makeCPUManagerInitContainersPod(podName string, ctnAttributes []ctnAttribut
|
|||
}
|
||||
}
|
||||
|
||||
func makeCPUManagerMultiTypeContainersPod(podName string, initCtnAttributes []ctnAttribute, ctnAttributes []ctnAttribute) *v1.Pod {
|
||||
func makeCPUManagerMultiTypeContainersPodWithRestartPolicy(podName string, initCtnAttributes []ctnAttribute, ctnAttributes []ctnAttribute, restartPolicy v1.RestartPolicy) *v1.Pod {
|
||||
var containers []v1.Container
|
||||
var initContainers []v1.Container
|
||||
cpusetCmd := "grep Cpus_allowed_list /proc/self/status | cut -f2"
|
||||
|
|
@ -2923,7 +3116,7 @@ func makeCPUManagerMultiTypeContainersPod(podName string, initCtnAttributes []ct
|
|||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
RestartPolicy: restartPolicy,
|
||||
InitContainers: initContainers,
|
||||
Containers: containers,
|
||||
},
|
||||
|
|
@ -2976,3 +3169,101 @@ func configureCPUManagerInKubelet(oldCfg *kubeletconfig.KubeletConfiguration, ku
|
|||
|
||||
return newCfg
|
||||
}
|
||||
|
||||
func getContainerCPUSetFromCheckpoint(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
|
||||
// initialize empty cpuset as default return value
|
||||
containerCPUSet := cpuset.CPUSet{}
|
||||
|
||||
// get the actual cpuset value from checkpoint file
|
||||
checkpointPath := "/var/lib/kubelet/cpu_manager_state"
|
||||
checkpointData, err := os.ReadFile(checkpointPath)
|
||||
if err != nil {
|
||||
fmt.Printf("Error reading checkpoint file %s: %v\n", checkpointPath, err)
|
||||
} else {
|
||||
// Parse the checkpoint JSON to extract CPU assignments
|
||||
var checkpoint struct {
|
||||
PolicyName string `json:"policyName"`
|
||||
DefaultCPUSet string `json:"defaultCpuSet"`
|
||||
Entries map[string]map[string]string `json:"entries,omitempty"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(checkpointData, &checkpoint); err != nil {
|
||||
fmt.Printf("Error parsing checkpoint JSON: %v\n", err)
|
||||
} else {
|
||||
podUID := string(pod.UID)
|
||||
if podEntries, exists := checkpoint.Entries[podUID]; exists {
|
||||
if containerCPUs, exists := podEntries[container.Name]; exists {
|
||||
// Parse the cpuset string and return it
|
||||
if parsedCPUs, parseErr := cpuset.Parse(containerCPUs); parseErr == nil {
|
||||
containerCPUSet = parsedCPUs
|
||||
} else {
|
||||
fmt.Printf("Error parsing cpuset string '%s': %v\n", containerCPUs, parseErr)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Container %s not found in checkpoint for pod %s\n", container.Name, podUID)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Pod %s not found in checkpoint entries\n", podUID)
|
||||
}
|
||||
}
|
||||
}
|
||||
return containerCPUSet
|
||||
}
|
||||
|
||||
func restartPodByRemovingSandbox(ctx context.Context, f *framework.Framework, pod *v1.Pod) {
|
||||
ginkgo.By("Getting the current pod sandbox ID")
|
||||
rs, _, err := getCRIClient()
|
||||
framework.ExpectNoError(err)
|
||||
sandboxes, err := rs.ListPodSandbox(ctx, &runtimeapi.PodSandboxFilter{
|
||||
LabelSelector: map[string]string{
|
||||
"io.kubernetes.pod.name": pod.Name,
|
||||
"io.kubernetes.pod.namespace": pod.Namespace,
|
||||
},
|
||||
})
|
||||
framework.ExpectNoError(err)
|
||||
gomega.Expect(sandboxes).To(gomega.HaveLen(1))
|
||||
podSandboxID := sandboxes[0].Id
|
||||
|
||||
ginkgo.By("Removing the pod sandbox")
|
||||
err = rs.StopPodSandbox(ctx, podSandboxID)
|
||||
framework.ExpectNoError(err)
|
||||
|
||||
ginkgo.By("Waiting for the pod to be restarted after sandbox removal")
|
||||
err = e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
||||
func getPodContainersCPUSetUnion(ctx context.Context, f *framework.Framework, pod *v1.Pod) (int, error) {
|
||||
var allCPUSets []cpuset.CPUSet
|
||||
|
||||
for _, initContainer := range pod.Spec.InitContainers {
|
||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, pod.Namespace, pod.Name, initContainer.Name)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to get logs for init container %s: %w", initContainer.Name, err)
|
||||
}
|
||||
|
||||
initContainerCPUs := getContainerAllowedCPUsFromLogs(pod.Name, initContainer.Name, logs)
|
||||
allCPUSets = append(allCPUSets, initContainerCPUs)
|
||||
}
|
||||
|
||||
for _, container := range pod.Spec.Containers {
|
||||
logs, err := e2epod.GetPodLogs(ctx, f.ClientSet, pod.Namespace, pod.Name, container.Name)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to get logs for container %s: %w", container.Name, err)
|
||||
}
|
||||
containerCPUs := getContainerAllowedCPUsFromLogs(pod.Name, container.Name, logs)
|
||||
allCPUSets = append(allCPUSets, containerCPUs)
|
||||
}
|
||||
|
||||
if len(allCPUSets) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
union := allCPUSets[0]
|
||||
for i := 1; i < len(allCPUSets); i++ {
|
||||
union = union.Union(allCPUSets[i])
|
||||
}
|
||||
|
||||
return union.Size(), nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue