diff --git a/test/e2e/dra/utils/builder.go b/test/e2e/dra/utils/builder.go index 330c298786e..35b12abbbf9 100644 --- a/test/e2e/dra/utils/builder.go +++ b/test/e2e/dra/utils/builder.go @@ -62,7 +62,7 @@ func (b *Builder) ExtendedResourceName(i int) string { case SingletonIndex: return "example.com/resource" default: - return b.driver.Name + "/resource" + fmt.Sprintf("-%d", i) + return b.Driver.Name + "/resource" + fmt.Sprintf("-%d", i) } } @@ -70,7 +70,7 @@ func (b *Builder) ExtendedResourceName(i int) string { // namespace. type Builder struct { namespace string - driver *Driver + Driver *Driver UseExtendedResourceName bool podCounter int @@ -81,7 +81,7 @@ type Builder struct { // ClassName returns the default device class name. func (b *Builder) ClassName() string { - return b.namespace + b.driver.NameSuffix + "-class" + return b.namespace + b.Driver.NameSuffix + "-class" } // SingletonIndex causes Builder.Class and ExtendedResourceName to create a @@ -117,14 +117,14 @@ func (b *Builder) Class(i int) *resourceapi.DeviceClass { } class.Spec.Selectors = []resourceapi.DeviceSelector{{ CEL: &resourceapi.CELDeviceSelector{ - Expression: fmt.Sprintf(`device.driver == "%s"`, b.driver.Name), + Expression: fmt.Sprintf(`device.driver == "%s"`, b.Driver.Name), }, }} if b.ClassParameters != "" { class.Spec.Config = []resourceapi.DeviceClassConfiguration{{ DeviceConfiguration: resourceapi.DeviceConfiguration{ Opaque: &resourceapi.OpaqueDeviceConfiguration{ - Driver: b.driver.Name, + Driver: b.Driver.Name, Parameters: runtime.RawExtension{Raw: []byte(b.ClassParameters)}, }, }, @@ -137,7 +137,7 @@ func (b *Builder) Class(i int) *resourceapi.DeviceClass { // that test pods can reference func (b *Builder) ExternalClaim() *resourceapi.ResourceClaim { b.claimCounter++ - name := "external-claim" + b.driver.NameSuffix // This is what podExternal expects. + name := "external-claim" + b.Driver.NameSuffix // This is what podExternal expects. if b.claimCounter > 1 { name += fmt.Sprintf("-%d", b.claimCounter) } @@ -162,7 +162,7 @@ func (b *Builder) claimSpecWithV1beta1() resourcev1beta1.ResourceClaimSpec { Config: []resourcev1beta1.DeviceClaimConfiguration{{ DeviceConfiguration: resourcev1beta1.DeviceConfiguration{ Opaque: &resourcev1beta1.OpaqueDeviceConfiguration{ - Driver: b.driver.Name, + Driver: b.Driver.Name, Parameters: runtime.RawExtension{ Raw: []byte(parameters), }, @@ -190,7 +190,7 @@ func (b *Builder) claimSpecWithV1beta2() resourcev1beta2.ResourceClaimSpec { Config: []resourcev1beta2.DeviceClaimConfiguration{{ DeviceConfiguration: resourcev1beta2.DeviceConfiguration{ Opaque: &resourcev1beta2.OpaqueDeviceConfiguration{ - Driver: b.driver.Name, + Driver: b.Driver.Name, Parameters: runtime.RawExtension{ Raw: []byte(parameters), }, @@ -218,7 +218,7 @@ func (b *Builder) ClaimSpec() resourceapi.ResourceClaimSpec { Config: []resourceapi.DeviceClaimConfiguration{{ DeviceConfiguration: resourceapi.DeviceConfiguration{ Opaque: &resourceapi.OpaqueDeviceConfiguration{ - Driver: b.driver.Name, + Driver: b.Driver.Name, Parameters: runtime.RawExtension{ Raw: []byte(parameters), }, @@ -251,7 +251,7 @@ func (b *Builder) Pod() *v1.Pod { pod.Spec.RestartPolicy = v1.RestartPolicyNever pod.GenerateName = "" b.podCounter++ - pod.Name = fmt.Sprintf("tester%s-%d", b.driver.NameSuffix, b.podCounter) + pod.Name = fmt.Sprintf("tester%s-%d", b.Driver.NameSuffix, b.podCounter) return pod } @@ -320,7 +320,7 @@ func (b *Builder) PodExternal() *v1.Pod { pod := b.Pod() pod.Spec.Containers[0].Name = "with-resource" podClaimName := "resource-claim" - externalClaimName := "external-claim" + b.driver.NameSuffix + externalClaimName := "external-claim" + b.Driver.NameSuffix pod.Spec.ResourceClaims = []v1.PodResourceClaim{ { Name: podClaimName, @@ -419,7 +419,7 @@ func (b *Builder) DeletePodAndWaitForNotFound(tCtx ktesting.TContext, pod *v1.Po func (b *Builder) TestPod(tCtx ktesting.TContext, pod *v1.Pod, env ...string) { tCtx.Helper() - if !b.driver.WithKubelet { + if !b.Driver.WithKubelet { // Less testing when we cannot rely on the kubelet to actually run the pod. err := e2epod.WaitForPodScheduled(tCtx, tCtx.Client(), pod.Namespace, pod.Name) tCtx.ExpectNoError(err, "schedule pod") @@ -476,7 +476,7 @@ func TestContainerEnv(tCtx ktesting.TContext, pod *v1.Pod, containerName string, } func NewBuilder(f *framework.Framework, driver *Driver) *Builder { - b := &Builder{driver: driver} + b := &Builder{Driver: driver} ginkgo.BeforeEach(func() { b.setUp(f.TContext(context.Background())) }) @@ -484,7 +484,7 @@ func NewBuilder(f *framework.Framework, driver *Driver) *Builder { } func NewBuilderNow(tCtx ktesting.TContext, driver *Driver) *Builder { - b := &Builder{driver: driver} + b := &Builder{Driver: driver} b.setUp(tCtx) return b } @@ -539,7 +539,7 @@ func (b *Builder) tearDown(tCtx ktesting.TContext) { } } - for host, plugin := range b.driver.Nodes { + for host, plugin := range b.Driver.Nodes { tCtx.Logf("Waiting for resources on %s to be unprepared", host) tCtx.Eventually(func(ktesting.TContext) []app.ClaimID { return plugin.GetPreparedResources() }).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "prepared claims on host %s", host) } diff --git a/test/e2e_dra/partitionabledevices_test.go b/test/e2e_dra/partitionabledevices_test.go new file mode 100644 index 00000000000..aadbee38b13 --- /dev/null +++ b/test/e2e_dra/partitionabledevices_test.go @@ -0,0 +1,283 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2edra + +import ( + "time" + + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + resourceapi "k8s.io/api/resource/v1" + resourceapiv1beta2 "k8s.io/api/resource/v1beta2" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + drautils "k8s.io/kubernetes/test/e2e/dra/utils" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + "k8s.io/kubernetes/test/utils/ktesting" + "k8s.io/utils/ptr" +) + +// partitionableDevices tests the DRAPartitionableDevices feature across upgrade/downgrade. +// It creates ResourceSlices with SharedCounters and devices that consume those counters, +// then verifies that these API fields are preserved across cluster version transitions. +// +// This test focuses on API preservation rather than scheduler behavior, similar to +// the resourceClaimDeviceStatus test. +func partitionableDevices(tCtx ktesting.TContext, b *drautils.Builder) upgradedTestFunc { + namespace := tCtx.Namespace() + driverName := b.Driver.Name + sliceName := "partitionable-slice" + + // Create a ResourceSlice with SharedCounters. + // This tests that the new API fields are accepted and preserved. + // We test multiple counter sets and multiple counters per set to + // ensure comprehensive coverage of the DRAPartitionableDevices feature. + sharedCounters := []resourceapiv1beta2.CounterSet{ + { + Name: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("8Gi"), + }, + }, + }, + } + + counterSlice := &resourceapiv1beta2.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: sliceName, + }, + Spec: resourceapiv1beta2.ResourceSliceSpec{ + Driver: driverName, + AllNodes: ptr.To(true), + SharedCounters: sharedCounters, + Pool: resourceapiv1beta2.ResourcePool{ + Name: "partitionable-pool", + Generation: 1, + ResourceSliceCount: 2, // One for counters, one for devices + }, + }, + } + + _, err := tCtx.Client().ResourceV1beta2().ResourceSlices().Create(tCtx, counterSlice, metav1.CreateOptions{}) + tCtx.ExpectNoError(err, "create ResourceSlice with SharedCounters") + + // Create a second ResourceSlice with devices that consume counters. + // This slice uses PerDeviceNodeSelection to test device-level node selection, + // and devices consume from multiple counter sets (max 2 per device). + devicesSliceName := "partitionable-devices-slice" + devices := []resourceapiv1beta2.Device{ + { + Name: "memory0-2g", + Capacity: map[resourceapiv1beta2.QualifiedName]resourceapiv1beta2.DeviceCapacity{ + "memory-size": {Value: resource.MustParse("2Gi")}, + }, + ConsumesCounters: []resourceapiv1beta2.DeviceCounterConsumption{ + { + CounterSet: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + { + Name: "memory1-2g", + Capacity: map[resourceapiv1beta2.QualifiedName]resourceapiv1beta2.DeviceCapacity{ + "memory-size": {Value: resource.MustParse("2Gi")}, + }, + ConsumesCounters: []resourceapiv1beta2.DeviceCounterConsumption{ + { + CounterSet: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + { + Name: "memory2-2g", + Capacity: map[resourceapiv1beta2.QualifiedName]resourceapiv1beta2.DeviceCapacity{ + "memory-size": {Value: resource.MustParse("2Gi")}, + }, + ConsumesCounters: []resourceapiv1beta2.DeviceCounterConsumption{ + { + CounterSet: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + { + Name: "memory3-2g", + Capacity: map[resourceapiv1beta2.QualifiedName]resourceapiv1beta2.DeviceCapacity{ + "memory-size": {Value: resource.MustParse("2Gi")}, + }, + ConsumesCounters: []resourceapiv1beta2.DeviceCounterConsumption{ + { + CounterSet: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("2Gi"), + }, + }, + }, + }, + }, + { + Name: "memory4-4g", + Capacity: map[resourceapiv1beta2.QualifiedName]resourceapiv1beta2.DeviceCapacity{ + "memory-size": {Value: resource.MustParse("4Gi")}, + }, + ConsumesCounters: []resourceapiv1beta2.DeviceCounterConsumption{ + { + CounterSet: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + { + Name: "memory5-4g", + Capacity: map[resourceapiv1beta2.QualifiedName]resourceapiv1beta2.DeviceCapacity{ + "memory-size": {Value: resource.MustParse("4Gi")}, + }, + ConsumesCounters: []resourceapiv1beta2.DeviceCounterConsumption{ + { + CounterSet: "memory-pool", + Counters: map[string]resourceapiv1beta2.Counter{ + "memory": { + Value: resource.MustParse("4Gi"), + }, + }, + }, + }, + }, + } + + devicesSlice := &resourceapiv1beta2.ResourceSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: devicesSliceName, + }, + Spec: resourceapiv1beta2.ResourceSliceSpec{ + Driver: driverName, + Devices: devices, + Pool: resourceapiv1beta2.ResourcePool{ + Name: "partitionable-pool", + Generation: 1, + ResourceSliceCount: 2, + }, + AllNodes: ptr.To(true), + }, + } + + _, err = tCtx.Client().ResourceV1beta2().ResourceSlices().Create(tCtx, devicesSlice, metav1.CreateOptions{}) + tCtx.ExpectNoError(err, "create ResourceSlice with devices") + + // Create a ResourceClaim with a device selector to get a 2Gi memory device. + claim2g := b.ExternalClaim() + claim2g.Spec.Devices.Requests[0].Exactly.Selectors = []resourceapi.DeviceSelector{ + { + CEL: &resourceapi.CELDeviceSelector{ + Expression: `device.capacity["memory-size"].compareTo(quantity("2Gi")) == 0`, + }, + }, + } + + pod2g := createPodWithClaim(tCtx, b, claim2g) + + claim4g := b.ExternalClaim() + claim4g.Spec.Devices.Requests[0].Exactly.Selectors = []resourceapi.DeviceSelector{ + { + CEL: &resourceapi.CELDeviceSelector{ + Expression: `device.capacity["memory-size"].compareTo(quantity("4Gi")) == 0`, + }, + }, + } + + pod4g := createPodWithClaim(tCtx, b, claim4g) + + // Wait for all pods to be running. + for _, pod := range []*v1.Pod{pod2g, pod4g} { + podName := pod.Name + tCtx.Eventually(func(tCtx ktesting.TContext) (*v1.Pod, error) { + return tCtx.Client().CoreV1().Pods(namespace).Get(tCtx, podName, metav1.GetOptions{}) + }).WithTimeout(2*time.Minute).Should(gomega.HaveField("Status.Phase", v1.PodRunning), "wait for Pod %s to be running", podName) + } + + claim4g2 := b.ExternalClaim() + claim4g2.Spec.Devices.Requests[0].Exactly.Selectors = []resourceapi.DeviceSelector{ + { + CEL: &resourceapi.CELDeviceSelector{ + Expression: `device.capacity["memory-size"].compareTo(quantity("4Gi")) == 0`, + }, + }, + } + + pod4g2 := createPodWithClaim(tCtx, b, claim4g2) + tCtx.ExpectNoError(e2epod.WaitForPodNameUnschedulableInNamespace(tCtx, tCtx.Client(), pod4g2.Name, namespace), "Pod with a claim over the counter should be unschedulable") + + return func(tCtx ktesting.TContext) downgradedTestFunc { + // After upgrade: The scheduled pods are still running, and the unschedulable pod remains unschedulable. + verifyPodsState(tCtx, namespace, []*v1.Pod{pod2g, pod4g}, pod4g2) + + return func(tCtx ktesting.TContext) { + // After downgrade: verify that the updated values are preserved + verifyPodsState(tCtx, namespace, []*v1.Pod{pod2g, pod4g}, pod4g2) + } + } +} + +func verifyPodsState(tCtx ktesting.TContext, namespace string, runningPods []*v1.Pod, unschedulablePod *v1.Pod) { + // Verify running pods are still running with no restarts. + for _, pod := range runningPods { + p, err := tCtx.Client().CoreV1().Pods(namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) + tCtx.ExpectNoError(err, "get Pod %s", pod.Name) + tCtx.Expect(p.Status.Phase).To(gomega.Equal(v1.PodRunning), "Pod %s should still be running", pod.Name) + for _, cs := range p.Status.ContainerStatuses { + tCtx.Expect(cs.RestartCount).To(gomega.Equal(int32(0)), "Container %s in Pod %s should have no restarts", cs.Name, pod.Name) + } + } + + // Verify unschedulable pod is still unschedulable. + tCtx.ExpectNoError(e2epod.WaitForPodNameUnschedulableInNamespace(tCtx, tCtx.Client(), unschedulablePod.Name, namespace), "Pod %s should still be unschedulable", unschedulablePod.Name) +} + +func createPodWithClaim(tCtx ktesting.TContext, b *drautils.Builder, claim *resourceapi.ResourceClaim) *v1.Pod { + pod := b.PodExternal() + pod.Spec.Containers[0].Name = "with-resource" + podClaimName := "resource-claim" + pod.Spec.ResourceClaims = []v1.PodResourceClaim{ + { + Name: podClaimName, + ResourceClaimName: &claim.Name, + }, + } + pod.Spec.Containers[0].Resources.Claims = []v1.ResourceClaim{{Name: podClaimName}} + b.Create(tCtx, claim, pod) + return pod +} diff --git a/test/e2e_dra/upgradedowngrade_test.go b/test/e2e_dra/upgradedowngrade_test.go index b53c06090f5..96e54260de7 100644 --- a/test/e2e_dra/upgradedowngrade_test.go +++ b/test/e2e_dra/upgradedowngrade_test.go @@ -74,6 +74,7 @@ func init() { var subTests = map[string]initialTestFunc{ "core DRA": coreDRA, "ResourceClaim device status": resourceClaimDeviceStatus, + "partitionable devices": partitionableDevices, } type initialTestFunc func(tCtx ktesting.TContext, builder *drautils.Builder) upgradedTestFunc