kubernetes/pkg/scheduler/framework/plugins/dynamicresources/dynamicresources_test.go
2026-01-11 14:19:18 +01:00

3740 lines
128 KiB
Go

/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dynamicresources
import (
"context"
"errors"
"fmt"
"math"
"slices"
"sort"
"strings"
"sync"
"testing"
"time"
goruntime "runtime"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apiruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
cgotesting "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/events"
featuregatetesting "k8s.io/component-base/featuregate/testing"
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/testutil"
"k8s.io/dynamic-resource-allocation/deviceclass/extendedresourcecache"
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
"k8s.io/dynamic-resource-allocation/structured"
kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
configv1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
"k8s.io/kubernetes/pkg/scheduler/metrics"
st "k8s.io/kubernetes/pkg/scheduler/testing"
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
)
func init() {
metrics.InitMetrics()
}
var (
podKind = v1.SchemeGroupVersion.WithKind("Pod")
nodeName = "worker"
node2Name = "worker-2"
node3Name = "worker-3"
driver = "some-driver"
driver2 = "some-driver-2"
podName = "my-pod"
podUID = "1234"
resourceName = "my-resource"
resourceName2 = resourceName + "-2"
claimName = podName + "-" + resourceName
claimName2 = podName + "-" + resourceName2
className = "my-resource-class"
namespace = "default"
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
extendedResourceName = "example.com/gpu"
extendedResourceName2 = "example.com/gpu2"
implicitExtendedResourceName = "deviceclass.resource.kubernetes.io/my-resource-class"
deviceClass = &resourceapi.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className,
},
}
deviceClassWithExtendResourceName = &resourceapi.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className,
},
Spec: resourceapi.DeviceClassSpec{
ExtendedResourceName: &extendedResourceName,
},
}
deviceClassWithExtendResourceName2 = &resourceapi.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className + "2",
},
Spec: resourceapi.DeviceClassSpec{
ExtendedResourceName: &extendedResourceName2,
},
}
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
Obj()
podWithClaimTemplate = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimTemplateName: &claimName}).
Obj()
podWithClaimTemplateInStatus = func() *v1.Pod {
pod := podWithClaimTemplate.DeepCopy()
pod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{
{
Name: pod.Spec.ResourceClaims[0].Name,
ResourceClaimName: &claimName,
},
}
return pod
}()
podWithTwoClaimTemplates = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimTemplateName: &claimName}).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName2, ResourceClaimTemplateName: &claimName}).
Obj()
podWithTwoClaimNames = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName2, ResourceClaimName: &claimName2}).
Obj()
podWithExtendedResourceName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Req(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
}).
Obj()
podWithExtendedResourceName2 = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Req(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
v1.ResourceName(extendedResourceName2): "1",
}).
Obj()
podWithImplicitExtendedResourceName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Req(map[v1.ResourceName]string{
v1.ResourceName(implicitExtendedResourceName): "1",
v1.ResourceName(extendedResourceName): "2",
}).
Obj()
podWithImplicitExtendedResourceNameTwoContainers = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Req(map[v1.ResourceName]string{
v1.ResourceName(implicitExtendedResourceName): "1",
}).
Req(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "2",
}).
Obj()
// Node with "instance-1" device and no device attributes.
workerNode = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Node
workerNodeSlice = st.MakeResourceSlice(nodeName, driver).Device("instance-1").Obj()
largeWorkerNodeSlice = st.MakeResourceSlice(nodeName, driver).Device("instance-1").Device("instance-2").Device("instance-3").Device("instance-4").Obj()
// Node with same device, but now with a "healthy" boolean attribute.
workerNode2 = &st.MakeNode().Name(node2Name).Label("kubernetes.io/hostname", node2Name).Node
workerNode2Slice = st.MakeResourceSlice(node2Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
// Yet another node, same as the second one.
workerNode3 = &st.MakeNode().Name(node3Name).Label("kubernetes.io/hostname", node3Name).Node
workerNode3Slice = st.MakeResourceSlice(node3Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
workerNodeWithExtendedResource = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Capacity(map[v1.ResourceName]string{v1.ResourceName(extendedResourceName): "1"}).Node
workerNodeWithExtendedResourceZeroAllocatable = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Capacity(map[v1.ResourceName]string{v1.ResourceName(extendedResourceName): "0"}).Node
brokenSelector = resourceapi.DeviceSelector{
CEL: &resourceapi.CELDeviceSelector{
// Not set for workerNode.
Expression: fmt.Sprintf(`device.attributes["%s"].%s`, driver, attrName),
},
}
claim = st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
Request(className).
Obj()
largeClaim = st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
Request(className).
Request(className).
Request(className).
Request(className).
Request(className).
Obj()
claim2 = st.MakeResourceClaim().
Name(claimName2).
Namespace(namespace).
Request(className).
Obj()
claimWithPrioritzedList = st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
RequestWithPrioritizedList(
st.SubRequest("subreq-1", className, 1),
).
Obj()
claimWithPrioritizedListAndSelector = st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
RequestWithPrioritizedList(
st.SubRequestWithSelector("subreq-1", className, fmt.Sprintf(`device.attributes["%s"].%s`, driver, attrName)),
st.SubRequest("subreq-2", className, 1),
).
Obj()
claimWithMultiplePrioritizedListRequests = st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
RequestWithPrioritizedList(
st.SubRequest("subreq-1", className, 2),
st.SubRequest("subreq-2", className, 1),
).
RequestWithPrioritizedList(
st.SubRequest("subreq-1", className, 2),
st.SubRequest("subreq-2", className, 1),
).Obj()
claim2WithPrioritizedListAndMultipleSubrequests = st.MakeResourceClaim().
Name(claimName2).
Namespace(namespace).
RequestWithPrioritizedList(
st.SubRequest("subreq-1", className, 4),
st.SubRequest("subreq-2", className, 3),
st.SubRequest("subreq-3", className, 2),
st.SubRequest("subreq-4", className, 1),
).Obj()
pendingClaim = st.FromResourceClaim(claim).
OwnerReference(podName, podUID, podKind).
Obj()
pendingClaim2 = st.FromResourceClaim(claim2).
OwnerReference(podName, podUID, podKind).
Obj()
pendingClaimWithPrioritizedList = st.FromResourceClaim(claimWithPrioritzedList).
OwnerReference(podName, podUID, podKind).
Obj()
pendingClaimWithPrioritizedListAndSelector = st.FromResourceClaim(claimWithPrioritizedListAndSelector).
OwnerReference(podName, podUID, podKind).
Obj()
pendingClaim2WithPrioritizedListAndMultipleSubrequests = st.FromResourceClaim(claim2WithPrioritizedListAndMultipleSubrequests).
OwnerReference(podName, podUID, podKind).
Obj()
pendingClaimWithMultiplePrioritizedListRequests = st.FromResourceClaim(claimWithMultiplePrioritizedListRequests).
OwnerReference(podName, podUID, podKind).
Obj()
allocationResult = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
allocationResult2 = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver2,
Pool: nodeName,
Device: "instance-2",
Request: "req-2",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
extendedResourceAllocationResult = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "container-0-request-0",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
extendedResourceAllocationResult2 = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "container-0-request-1",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
implicitExtendedResourceAllocationResult = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "container-0-request-0",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-2",
Request: "container-0-request-1",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-3",
Request: "container-0-request-1",
},
},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
implicitExtendedResourceAllocationResultTwoContainers = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "container-0-request-0",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-2",
Request: "container-1-request-0",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-3",
Request: "container-1-request-0",
},
},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
extendedResourceAllocationResultNode2 = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "container-0-request-0",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{node2Name}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
allocationResultWithPrioritizedList = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1/subreq-1",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
allocationResultWithPrioritizedListAndSelector = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1/subreq-1",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
allocationResultWithPrioritizedListAndMultipleSubrequests = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1/subreq-2",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-2",
Request: "req-1/subreq-2",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-3",
Request: "req-1/subreq-2",
},
},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
allocationResultWithMultiplePrioritizedListRequests = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1/subreq-1",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-2",
Request: "req-1/subreq-1",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-2/subreq-1",
},
{
Driver: driver,
Pool: nodeName,
Device: "instance-2",
Request: "req-2/subreq-1",
},
},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
inUseClaim = st.FromResourceClaim(pendingClaim).
Allocation(allocationResult).
ReservedForPod(podName, types.UID(podUID)).
Obj()
inUseClaimWithPrioritizedList = st.FromResourceClaim(pendingClaimWithPrioritizedList).
Allocation(allocationResultWithPrioritizedList).
ReservedForPod(podName, types.UID(podUID)).
Obj()
inUseClaimWithPrioritizedListAndSelector = st.FromResourceClaim(pendingClaimWithPrioritizedListAndSelector).
Allocation(allocationResultWithPrioritizedListAndSelector).
ReservedForPod(podName, types.UID(podUID)).
Obj()
inUseClaim2WithPrioritizedListAndMultipleSubrequests = st.FromResourceClaim(pendingClaim2WithPrioritizedListAndMultipleSubrequests).
Allocation(allocationResultWithPrioritizedListAndMultipleSubrequests).
ReservedForPod(podName, types.UID(podUID)).
Obj()
inUseClaimWithMultiplePrioritizedListRequests = st.FromResourceClaim(pendingClaimWithMultiplePrioritizedListRequests).
Allocation(allocationResultWithMultiplePrioritizedListRequests).
ReservedForPod(podName, types.UID(podUID)).
Obj()
allocatedClaim = st.FromResourceClaim(pendingClaim).
Allocation(allocationResult).
Obj()
allocatedClaim2 = st.FromResourceClaim(pendingClaim2).
Allocation(allocationResult2).
Obj()
allocatedClaimWithPrioritizedList = st.FromResourceClaim(pendingClaimWithPrioritizedList).
Allocation(allocationResultWithPrioritizedList).
Obj()
allocatedClaimWithPrioritizedListAndSelector = st.FromResourceClaim(pendingClaimWithPrioritizedListAndSelector).
Allocation(allocationResultWithPrioritizedListAndSelector).
Obj()
allocatedClaim2WithPrioritizedListAndMultipleSubrequests = st.FromResourceClaim(pendingClaim2WithPrioritizedListAndMultipleSubrequests).
Allocation(allocationResultWithPrioritizedListAndMultipleSubrequests).
Obj()
allocatedClaimWithMultiplePrioritizedListRequests = st.FromResourceClaim(pendingClaimWithMultiplePrioritizedListRequests).
Allocation(allocationResultWithMultiplePrioritizedListRequests).
Obj()
allocatedClaimWithWrongTopology = st.FromResourceClaim(allocatedClaim).
Allocation(&resourceapi.AllocationResult{NodeSelector: st.MakeNodeSelector().In("no-such-label", []string{"no-such-value"}, st.NodeSelectorTypeMatchExpressions).Obj()}).
Obj()
allocatedClaimWithGoodTopology = st.FromResourceClaim(allocatedClaim).
Allocation(&resourceapi.AllocationResult{NodeSelector: st.MakeNodeSelector().In("kubernetes.io/hostname", []string{nodeName}, st.NodeSelectorTypeMatchExpressions).Obj()}).
Obj()
otherClaim = st.MakeResourceClaim().
Name("not-my-claim").
Namespace(namespace).
Request(className).
Obj()
otherAllocatedClaim = st.FromResourceClaim(otherClaim).
Allocation(allocationResult).
Obj()
extendedResourceClaim = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
Allocation(extendedResourceAllocationResult).
Obj()
extendedResourceClaim2 = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-1", className+"2").
Allocation(extendedResourceAllocationResult2).
Obj()
extendedResourceClaimNoName = st.MakeResourceClaim().
Name(specialClaimInMemName).
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
Allocation(extendedResourceAllocationResult).
Obj()
extendedResourceClaimNoName2 = st.MakeResourceClaim().
Name(specialClaimInMemName).
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-1", className+"2").
Allocation(extendedResourceAllocationResult2).
Obj()
implicitExtendedResourceClaim = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
RequestWithNameCount("container-0-request-1", className, 2).
Allocation(implicitExtendedResourceAllocationResult).
Obj()
implicitExtendedResourceClaimNoName = st.MakeResourceClaim().
Name(specialClaimInMemName).
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
RequestWithNameCount("container-0-request-1", className, 2).
Allocation(implicitExtendedResourceAllocationResult).
Obj()
implicitExtendedResourceClaimTwoContainers = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
RequestWithNameCount("container-1-request-0", className, 2).
Allocation(implicitExtendedResourceAllocationResultTwoContainers).
Obj()
implicitExtendedResourceClaimNoNameTwoContainers = st.MakeResourceClaim().
Name(specialClaimInMemName).
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
RequestWithNameCount("container-1-request-0", className, 2).
Allocation(implicitExtendedResourceAllocationResultTwoContainers).
Obj()
extendedResourceClaimNode2 = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-0", className).
Allocation(extendedResourceAllocationResultNode2).
Obj()
deviceTaint = resourceapi.DeviceTaint{
Key: "taint-key",
Value: "taint-value",
Effect: resourceapi.DeviceTaintEffectNoSchedule,
}
// for DRA Device Binding Conditions
bindingConditions = []string{"condition"}
bindingFailureConditions = []string{"failed"}
fabricSlice = func() *resourceapi.ResourceSlice {
res := st.MakeResourceSlice(nodeName, driver).Device("instance-1").Obj()
res.Spec.Devices[0].BindsToNode = ptr.To(true)
res.Spec.Devices[0].BindingConditions = bindingConditions
res.Spec.Devices[0].BindingFailureConditions = bindingFailureConditions
res.Spec.NodeSelector = st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
return res
}()
fabricSlice2 = func() *resourceapi.ResourceSlice {
res := st.MakeResourceSlice(nodeName, driver2).Device("instance-2").Obj()
res.Spec.Devices[0].BindsToNode = ptr.To(true)
res.Spec.Devices[0].BindingConditions = bindingConditions
res.Spec.Devices[0].BindingFailureConditions = bindingFailureConditions
res.Spec.NodeSelector = st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
return res
}()
allocationResultWithBindingConditions = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "req-1",
BindingConditions: bindingConditions,
BindingFailureConditions: bindingFailureConditions,
}},
},
NodeSelector: st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj(),
}
allocationResultWithBindingConditions2 = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver2,
Pool: nodeName,
Device: "instance-2",
Request: "req-2",
BindingConditions: bindingConditions,
BindingFailureConditions: bindingFailureConditions,
}},
},
NodeSelector: st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj(),
}
boundClaim = st.FromResourceClaim(allocatedClaim).
Allocation(allocationResultWithBindingConditions).
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Conditions: []metav1.Condition{
{Type: "condition", Status: metav1.ConditionTrue},
{Type: "failed", Status: metav1.ConditionFalse},
},
},
}).
Obj()
boundClaim2 = st.FromResourceClaim(allocatedClaim2).
Allocation(allocationResultWithBindingConditions2).
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
{
Driver: driver2,
Pool: nodeName,
Device: "instance-2",
Conditions: []metav1.Condition{
{Type: "condition", Status: metav1.ConditionTrue},
{Type: "failed", Status: metav1.ConditionFalse},
},
},
}).
Obj()
failedBindingClaim = st.FromResourceClaim(allocatedClaim).
Allocation(allocationResultWithBindingConditions).
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Conditions: []metav1.Condition{
{Type: "condition", Status: metav1.ConditionFalse},
{Type: "failed", Status: metav1.ConditionTrue},
},
},
}).
Obj()
failedBindingClaim2 = st.FromResourceClaim(allocatedClaim2).
Allocation(allocationResultWithBindingConditions2).
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
{
Driver: driver2,
Pool: nodeName,
Device: "instance-2",
Conditions: []metav1.Condition{
{Type: "condition", Status: metav1.ConditionFalse},
{Type: "failed", Status: metav1.ConditionTrue},
},
},
}).
Obj()
)
func taintDevices(slice *resourceapi.ResourceSlice) *resourceapi.ResourceSlice {
slice = slice.DeepCopy()
for i := range slice.Spec.Devices {
slice.Spec.Devices[i].Taints = append(slice.Spec.Devices[i].Taints, deviceTaint)
}
return slice
}
func reserve(claim *resourceapi.ResourceClaim, pod *v1.Pod) *resourceapi.ResourceClaim {
return st.FromResourceClaim(claim).
ReservedForPod(pod.Name, types.UID(pod.UID)).
Obj()
}
func adminAccess(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claim = claim.DeepCopy()
for i := range claim.Spec.Devices.Requests {
claim.Spec.Devices.Requests[i].Exactly.AdminAccess = ptr.To(true)
}
if claim.Status.Allocation != nil {
for i := range claim.Status.Allocation.Devices.Results {
claim.Status.Allocation.Devices.Results[i].AdminAccess = ptr.To(true)
}
}
return claim
}
func breakCELInClaim(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claim = claim.DeepCopy()
for i := range claim.Spec.Devices.Requests {
for e := range claim.Spec.Devices.Requests[i].Exactly.Selectors {
claim.Spec.Devices.Requests[i].Exactly.Selectors[e] = brokenSelector
}
if len(claim.Spec.Devices.Requests[i].Exactly.Selectors) == 0 {
claim.Spec.Devices.Requests[i].Exactly.Selectors = []resourceapi.DeviceSelector{brokenSelector}
}
}
return claim
}
func breakCELInClass(class *resourceapi.DeviceClass) *resourceapi.DeviceClass {
class = class.DeepCopy()
for i := range class.Spec.Selectors {
class.Spec.Selectors[i] = brokenSelector
}
if len(class.Spec.Selectors) == 0 {
class.Spec.Selectors = []resourceapi.DeviceSelector{brokenSelector}
}
return class
}
func updateDeviceClassName(claim *resourceapi.ResourceClaim, deviceClassName string) *resourceapi.ResourceClaim {
claim = claim.DeepCopy()
for i := range claim.Spec.Devices.Requests {
// If the firstAvailable list is empty we update the device class name
// on the base request.
if len(claim.Spec.Devices.Requests[i].FirstAvailable) == 0 {
claim.Spec.Devices.Requests[i].Exactly.DeviceClassName = deviceClassName
} else {
// If subrequests are specified, update the device class name on
// all of them.
for j := range claim.Spec.Devices.Requests[i].FirstAvailable {
claim.Spec.Devices.Requests[i].FirstAvailable[j].DeviceClassName = deviceClassName
}
}
}
return claim
}
func getDefaultDynamicResourcesArgs() *config.DynamicResourcesArgs {
v1dra := &kubeschedulerconfigv1.DynamicResourcesArgs{}
configv1.SetDefaults_DynamicResourcesArgs(v1dra)
dra := &config.DynamicResourcesArgs{}
_ = configv1.Convert_v1_DynamicResourcesArgs_To_config_DynamicResourcesArgs(v1dra, dra, nil)
return dra
}
// result defines the expected outcome of some operation. It covers
// operation's status and the state of the world (= objects).
type result struct {
status *fwk.Status
// changes contains a mapping of name to an update function for
// the corresponding object. These functions apply exactly the expected
// changes to a copy of the object as it existed before the operation.
changes change
// added contains objects created by the operation.
added []metav1.Object
// removed contains objects deleted by the operation.
removed []metav1.Object
// assumedClaim is the one claim which is expected to be assumed,
// nil if none.
assumedClaim *resourceapi.ResourceClaim
// inFlightClaims is a list of claims which are expected to be tracked as
// in flight, nil if none.
inFlightClaims []metav1.Object
}
// change contains functions for modifying objects of a certain type. These
// functions will get called for all objects of that type. If they needs to
// make changes only to a particular instance, then it must check the name.
type change struct {
claim func(*resourceapi.ResourceClaim) *resourceapi.ResourceClaim
}
type perNodeResult map[string]result
func (p perNodeResult) forNode(nodeName string) result {
if p == nil {
return result{}
}
return p[nodeName]
}
type perNodeScoreResult map[string]int64
func (p perNodeScoreResult) forNode(nodeName string) int64 {
if p == nil {
return 0
}
return p[nodeName]
}
type want struct {
preenqueue result
preFilterResult *fwk.PreFilterResult
prefilter result
filter perNodeResult
prescore result
scoreResult perNodeScoreResult
score perNodeResult
normalizeScoreResult fwk.NodeScoreList
normalizeScore result
reserve result
unreserve result
preBindPreFlightStatus *fwk.Status
prebind result
postbind result
postFilterResult *fwk.PostFilterResult
postfilter result
// unreserveAfterBindFailure, if set, triggers a call to Unreserve
// after PreBind, as if the actual Bind had failed.
unreserveAfterBindFailure *result
// unreserveBeforePreBind, if set, triggers a call to Unreserve
// before PreBind, as if the some other PreBind plugin had failed.
unreserveBeforePreBind *result
}
// prepare contains changes for objects in the API server.
// Those changes are applied before running the steps. This can
// be used to simulate concurrent changes by some other entities
// like a resource driver.
type prepare struct {
filter change
prescore change
reserve change
unreserve change
prebind change
postbind change
postfilter change
}
type testPluginCase struct {
// patchTestCase gets called right before the test case is tested.
// It can be used to update time stamps in those test cases
// which are sensitive to the current time.
patchTestCase func(tc *testPluginCase)
args *config.DynamicResourcesArgs
nodes []*v1.Node // default if unset is workerNode
pod *v1.Pod
claims []*resourceapi.ResourceClaim
classes []*resourceapi.DeviceClass
// objs get stored directly in the fake client, without passing
// through reactors, in contrast to the types above.
objs []apiruntime.Object
prepare prepare
want want
// Invoke Filter with a canceled context.
cancelFilter bool
// enableDRAAdminAccess is set to true if the DRAAdminAccess feature gate is enabled.
enableDRAAdminAccess bool
// enableDRADeviceBindingConditions is set to true if the DRADeviceBindingConditions feature gate is enabled.
enableDRADeviceBindingConditions bool
// EnableDRAResourceClaimDeviceStatus is set to true if the DRAResourceClaimDeviceStatus feature gate is enabled.
enableDRAResourceClaimDeviceStatus bool
// Feature gates. False is chosen so that the uncommon case
// doesn't need to be set.
disableDRA bool
enableDRAExtendedResource bool
enableDRAPrioritizedList bool
enableDRADeviceTaints bool
disableDRASchedulerFilterTimeout bool
skipOnWindows string
failPatch bool
reactors []cgotesting.Reactor
metrics func(ktesting.TContext, compbasemetrics.Gatherer)
}
func TestPlugin(t *testing.T) {
testPlugin(ktesting.Init(t))
}
func testPlugin(tCtx ktesting.TContext) {
testcases := map[string]testPluginCase{
"empty": {
pod: st.MakePod().Name("foo").Namespace("default").Obj(),
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.Skip),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable),
},
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
},
},
"empty-with-extended-resources-enabled": {
enableDRAExtendedResource: true,
pod: st.MakePod().Name("foo").Namespace("default").Obj(),
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.Skip),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable),
},
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
},
},
"claim-reference": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
want: want{
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Status.ReservedFor = inUseClaim.Status.ReservedFor
}
return claim
},
},
},
},
},
"claim-template": {
pod: podWithClaimTemplateInStatus,
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
want: want{
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimTemplateInStatus),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Status.ReservedFor = inUseClaim.Status.ReservedFor
}
return claim
},
},
},
},
},
"missing-claim": {
pod: podWithClaimTemplate, // status not set
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
want: want{
preenqueue: result{
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `pod "default/my-pod": ResourceClaim not created yet`),
},
},
},
"deleted-claim": {
pod: podWithClaimTemplateInStatus,
claims: func() []*resourceapi.ResourceClaim {
claim := allocatedClaim.DeepCopy()
claim.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return []*resourceapi.ResourceClaim{claim}
}(),
want: want{
preenqueue: result{
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim "my-pod-my-resource" is being deleted`),
},
},
},
"wrong-claim": {
pod: podWithClaimTemplateInStatus,
claims: func() []*resourceapi.ResourceClaim {
claim := allocatedClaim.DeepCopy()
claim.OwnerReferences[0].UID += "123"
return []*resourceapi.ResourceClaim{claim}
}(),
want: want{
preenqueue: result{
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `ResourceClaim default/my-pod-my-resource was not created for pod default/my-pod (pod is not owner)`),
},
},
},
"no-resources": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
},
},
},
"with-resources": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"with-resources-has-finalizer": {
// As before. but the finalizer is already set. Could happen if
// the scheduler got interrupted.
pod: podWithClaimName,
claims: func() []*resourceapi.ResourceClaim {
claim := pendingClaim
claim.Finalizers = allocatedClaim.Finalizers
return []*resourceapi.ResourceClaim{claim}
}(),
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"with-resources-finalizer-gets-removed": {
// As before. but the finalizer is already set. Then it gets
// removed before the scheduler reaches PreBind.
pod: podWithClaimName,
claims: func() []*resourceapi.ResourceClaim {
claim := pendingClaim
claim.Finalizers = allocatedClaim.Finalizers
return []*resourceapi.ResourceClaim{claim}
}(),
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
prepare: prepare{
prebind: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claim.Finalizers = nil
return claim
},
},
},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"with-resources-finalizer-gets-added": {
// No finalizer initially, then it gets added before
// the scheduler reaches PreBind. Shouldn't happen?
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
prepare: prepare{
prebind: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
claim.Finalizers = allocatedClaim.Finalizers
return claim
},
},
},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"skip-bind": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
unreserveBeforePreBind: &result{},
},
},
"exhausted-resources": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim, otherAllocatedClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
},
},
},
// The two test cases for device tainting only need to cover
// whether the feature gate is passed through to the allocator
// correctly. The actual logic around device taints and allocation
// is in the allocator.
"tainted-device-disabled": {
enableDRADeviceTaints: false,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{taintDevices(workerNodeSlice)},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"tainted-device-enabled": {
enableDRADeviceTaints: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{taintDevices(workerNodeSlice)},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
},
},
},
"request-admin-access-with-DRAAdminAccess-featuregate": {
// When the DRAAdminAccess feature gate is enabled,
// Because the pending claim asks for admin access,
// allocation succeeds despite resources being exhausted.
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{adminAccess(pendingClaim), otherAllocatedClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{adminAccess(allocatedClaim)},
},
prebind: result{
assumedClaim: reserve(adminAccess(allocatedClaim), podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = adminAccess(inUseClaim).Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(adminAccess(allocatedClaim), podWithClaimName),
},
},
enableDRAAdminAccess: true,
},
"request-admin-access-without-DRAAdminAccess-featuregate": {
// When the DRAAdminAccess feature gate is disabled,
// even though the pending claim requests admin access,
// the scheduler returns an unschedulable status.
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{adminAccess(pendingClaim), otherAllocatedClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource, request req-1: admin access is requested, but the feature is disabled`),
},
},
},
enableDRAAdminAccess: false,
},
"structured-ignore-allocated-admin-access": {
// The allocated claim uses admin access, so a second claim may use
// the same device.
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim, adminAccess(otherAllocatedClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"claim-parameters-CEL-runtime-error": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{breakCELInClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
},
},
},
},
"class-parameters-CEL-runtime-error": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{breakCELInClass(deviceClass)},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.AsStatus(errors.New(`class my-resource-class: selector #0: CEL runtime error: no such key: ` + string(attrName))),
},
},
},
},
// When pod scheduling encounters CEL runtime errors for some nodes, but not all,
// it should still not schedule the pod because there is something wrong with it.
// Scheduling it would make it harder to detect that there is a problem.
//
// This matches the "keeps pod pending because of CEL runtime errors" E2E test.
"CEL-runtime-error-for-one-of-two-nodes": {
nodes: []*v1.Node{workerNode, workerNode2},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{breakCELInClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
},
},
},
},
// When two nodes where found, PreScore gets called.
"CEL-runtime-error-for-one-of-three-nodes": {
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{breakCELInClaim(pendingClaim)},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice, workerNode3Slice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: `+string(attrName)),
},
},
prescore: result{
// This is the error found during Filter.
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `filter node worker: claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: healthy`),
},
},
},
"missing-class": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, fmt.Sprintf("request req-1: device class %s does not exist", className)),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
},
},
},
"wrong-topology": {
// PostFilter tries to get the pod scheduleable by
// deallocating the claim.
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{allocatedClaimWithWrongTopology},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
},
},
postfilter: result{
// Claims get deallocated immediately.
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
Allocation(nil).
Obj()
},
},
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
},
},
},
"good-topology": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{allocatedClaimWithGoodTopology},
want: want{
prebind: result{
assumedClaim: reserve(allocatedClaimWithGoodTopology, podWithClaimName),
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
Obj()
},
},
},
},
},
"bind-failure": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{allocatedClaimWithGoodTopology},
want: want{
prebind: result{
assumedClaim: reserve(allocatedClaimWithGoodTopology, podWithClaimName),
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
Obj()
},
},
},
unreserveAfterBindFailure: &result{
assumedClaim: reserve(allocatedClaimWithGoodTopology, podWithClaimName),
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
out := in.DeepCopy()
out.Status.ReservedFor = []resourceapi.ResourceClaimConsumerReference{}
return out
},
},
},
},
},
"reserved-okay": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{inUseClaim},
},
"DRA-disabled": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{inUseClaim},
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.Skip),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `plugin disabled`),
},
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
},
disableDRA: true,
},
"claim-with-request-with-unknown-device-class": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{updateDeviceClassName(claim, "does-not-exist")},
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `request req-1: device class does-not-exist does not exist`),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
},
},
},
"claim-with-prioritized-list-feature-disabled": {
enableDRAPrioritizedList: false,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{claimWithPrioritzedList},
classes: []*resourceapi.DeviceClass{deviceClass},
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource, request req-1: has subrequests, but the DRAPrioritizedList feature is disabled`),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
},
},
},
"claim-with-prioritized-list-unknown-device-class": {
enableDRAPrioritizedList: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{updateDeviceClassName(claimWithPrioritzedList, "does-not-exist")},
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `request req-1/subreq-1: device class does-not-exist does not exist`),
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
},
},
},
"claim-with-prioritized-list": {
enableDRAPrioritizedList: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithPrioritizedList},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaimWithPrioritizedList},
},
prebind: result{
assumedClaim: reserve(allocatedClaimWithPrioritizedList, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaimWithPrioritizedList.Finalizers
claim.Status = inUseClaimWithPrioritizedList.Status
}
return claim
},
},
},
},
},
"extended-resource-name-with-node-resource": {
enableDRAExtendedResource: true,
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
nodes: []*v1.Node{workerNodeWithExtendedResource},
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
want: want{},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
"extended-resource-one-device-plugin-one-dra": {
enableDRAExtendedResource: true,
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
nodes: []*v1.Node{workerNodeWithExtendedResource},
pod: podWithExtendedResourceName2,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName, deviceClassWithExtendResourceName2},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName2},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName2},
},
prebind: result{
assumedClaim: reserve(extendedResourceClaim2, podWithExtendedResourceName2),
added: []metav1.Object{reserve(extendedResourceClaim2, podWithExtendedResourceName2)},
},
postbind: result{
assumedClaim: reserve(extendedResourceClaim2, podWithExtendedResourceName2),
},
},
},
"extended-resource-name-with-zero-allocatable": {
enableDRAExtendedResource: true,
nodes: []*v1.Node{workerNodeWithExtendedResourceZeroAllocatable},
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
},
prebind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
},
postbind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
},
},
},
"non-DRA-extended-resource-name-with-zero-allocatable": {
enableDRAExtendedResource: true,
nodes: []*v1.Node{workerNodeWithExtendedResourceZeroAllocatable},
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
prefilter: result{
status: fwk.NewStatus(fwk.Skip),
},
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
},
},
"extended-resource-name-no-resource": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
"extended-resource-name-with-resources": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
},
prebind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
},
postbind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
},
"implicit-extended-resource-name-with-resources": {
enableDRAExtendedResource: true,
pod: podWithImplicitExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{largeWorkerNodeSlice, podWithImplicitExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{implicitExtendedResourceClaimNoName},
},
prebind: result{
assumedClaim: reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName),
added: []metav1.Object{reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName)},
},
postbind: result{
assumedClaim: reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName),
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
},
"implicit-extended-resource-name-two-containers-with-resources": {
enableDRAExtendedResource: true,
pod: podWithImplicitExtendedResourceNameTwoContainers,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{largeWorkerNodeSlice, podWithImplicitExtendedResourceNameTwoContainers},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{implicitExtendedResourceClaimNoNameTwoContainers},
},
prebind: result{
assumedClaim: reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers),
added: []metav1.Object{reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers)},
},
postbind: result{
assumedClaim: reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers),
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
},
"extended-resource-name-with-resources-fail-patch": {
enableDRAExtendedResource: true,
failPatch: true,
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
},
prebind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
status: fwk.NewStatus(fwk.Unschedulable, `patch error`),
},
postbind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
},
"extended-resource-name-with-resources-has-claim": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
claims: []*resourceapi.ResourceClaim{extendedResourceClaim},
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot schedule extended resource claim`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `deletion of ResourceClaim completed`),
removed: []metav1.Object{extendedResourceClaim},
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
"extended-resource-name-with-resources-delete-claim": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
claims: []*resourceapi.ResourceClaim{extendedResourceClaimNode2},
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot schedule extended resource claim`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `deletion of ResourceClaim completed`),
removed: []metav1.Object{extendedResourceClaimNode2},
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.ErrorContains(tCtx, err, "not found")
},
},
"extended-resource-name-bind-failure": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
},
prebind: result{
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
},
unreserveAfterBindFailure: &result{
removed: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
},
"extended-resource-name-skip-bind": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
},
unreserveBeforePreBind: &result{},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["success"]))
},
},
"extended-resource-name-claim-creation-failure": {
enableDRAExtendedResource: true,
pod: podWithExtendedResourceName,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
},
prebind: result{
status: fwk.NewStatus(fwk.Unschedulable, `claim creation errors`),
},
unreserveAfterBindFailure: &result{
removed: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
},
},
reactors: []cgotesting.Reactor{
&cgotesting.SimpleReactor{
Verb: "create",
Resource: "resourceclaims",
Reaction: func(action cgotesting.Action) (handled bool, ret apiruntime.Object, err error) {
return true, nil, apierrors.NewBadRequest("claim creation errors")
},
},
},
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
require.NoError(tCtx, err)
require.Equal(tCtx, 1, int(metric["failure"]))
},
},
"canceled": {
cancelFilter: true,
args: &config.DynamicResourcesArgs{
FilterTimeout: &metav1.Duration{Duration: time.Nanosecond},
},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{largeClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{largeWorkerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `asked by caller to stop allocating devices: test canceling Filter`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
},
},
},
"timeout": {
args: &config.DynamicResourcesArgs{
FilterTimeout: &metav1.Duration{Duration: time.Nanosecond},
},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{largeClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{largeWorkerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `timed out trying to allocate devices`),
},
},
postfilter: result{
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
},
},
// Skipping this test case on Windows as a 1ns timeout is not guaranteed to
// expire immediately on Windows due to its coarser timer granularity -
// typically in the range of 0.5 to 15.6 ms
skipOnWindows: "coarse timer granularity",
},
"timeout_disabled": {
// This variant uses the normal test objects to avoid excessive runtime.
// It could theoretically pass even though the 1 ns limit is enforced
// although it shouldn't be (which then would be a false positive),
// but that's unlikely.
disableDRASchedulerFilterTimeout: true,
args: &config.DynamicResourcesArgs{},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"timeout_zero": {
args: &config.DynamicResourcesArgs{
FilterTimeout: &metav1.Duration{Duration: 0},
},
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
classes: []*resourceapi.DeviceClass{deviceClass},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaim},
},
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaim.Finalizers
claim.Status = inUseClaim.Status
}
return claim
},
},
},
postbind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimName),
},
},
},
"bound-claim-with-succeeded-binding-conditions": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{boundClaim},
want: want{
prebind: result{
assumedClaim: reserve(boundClaim, podWithClaimName),
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
Obj()
},
},
status: nil,
},
},
},
"bound-claim-with-failed-binding": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{failedBindingClaim},
objs: []apiruntime.Object{workerNodeSlice},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
},
},
postfilter: result{
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
Allocation(nil).
AllocatedDeviceStatuses(nil).
Obj()
},
},
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
},
},
},
"bound-claim-with-timed-out-binding": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
pod: podWithClaimName,
claims: func() []*resourceapi.ResourceClaim {
claim := allocatedClaim.DeepCopy()
claim.Status.Allocation = allocationResultWithBindingConditions.DeepCopy()
// This claim has binding conditions but is timed out.
claim.Status.Allocation.AllocationTimestamp = ptr.To(metav1.NewTime(time.Now().Add(-10 * time.Minute)))
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
},
}
return []*resourceapi.ResourceClaim{claim}
}(),
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
},
},
postfilter: result{
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
Allocation(nil).
AllocatedDeviceStatuses(nil).
Obj()
},
},
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
},
},
},
"prebind-fail-with-binding-timeout": {
patchTestCase: func(tc *testPluginCase) {
// The time stamps must be injected into the test case right
// before it starts to get tested.
now := time.Now()
// Set the allocation time so that the claim is not timed out
// yet when the test starts, but then times out relatively quickly (the 10 seconds)
// when the test executes PreBind.
bindingTimeout := tc.args.BindingTimeout.Duration
timeoutAfter := 10 * time.Second
allocatedAt := now.Add(-bindingTimeout).Add(timeoutAfter)
claim := allocatedClaim.DeepCopy()
claim.Status.Allocation = allocationResultWithBindingConditions.DeepCopy()
// This claim has binding conditions but is not timed out.
claim.Status.Allocation.AllocationTimestamp = ptr.To(metav1.NewTime(allocatedAt))
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
},
}
tc.claims = []*resourceapi.ResourceClaim{claim}
claim = claim.DeepCopy()
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
},
}
tc.want.prebind.assumedClaim = reserve(claim, podWithClaimName)
},
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
args: &config.DynamicResourcesArgs{
BindingTimeout: &metav1.Duration{Duration: 600 * time.Second},
},
pod: podWithClaimName,
claims: nil, // Set in patchTestCase.
want: want{
prebind: result{
assumedClaim: nil, // Set in patchTestCase.
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
Obj()
},
},
status: fwk.AsStatus(errors.New("claim " + claim.Name + " binding timeout")),
},
},
},
"bound-claim-with-mixed-binding-conditions": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
pod: podWithClaimName,
claims: func() []*resourceapi.ResourceClaim {
claim := allocatedClaim.DeepCopy()
claim.Status.Allocation = allocationResultWithBindingConditions.DeepCopy()
// This claim has binding conditions but is timed out.
claim.Status.Allocation.AllocationTimestamp = ptr.To(metav1.NewTime(time.Now().Add(-10 * time.Minute)))
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Conditions: []metav1.Condition{
{Type: "condition1", Status: metav1.ConditionTrue},
{Type: "condition2", Status: metav1.ConditionFalse},
},
},
}
return []*resourceapi.ResourceClaim{claim}
}(),
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
},
},
postfilter: result{
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
Allocation(nil).
AllocatedDeviceStatuses(nil).
Obj()
},
},
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
},
},
},
"bound-claim-without-binding-conditions": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
// This test ensures that when DRADeviceBindingConditions is enabled,
// but the claim has no binding conditions or binding failures,
// the plugin proceeds as if all conditions are satisfied.
pod: podWithClaimTemplateInStatus,
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
want: want{
prebind: result{
assumedClaim: reserve(allocatedClaim, podWithClaimTemplateInStatus),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Status.ReservedFor = inUseClaim.Status.ReservedFor
}
return claim
},
},
status: nil,
},
},
},
"multi-claims-binding-conditions-all-success": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
pod: podWithTwoClaimNames,
claims: []*resourceapi.ResourceClaim{boundClaim, boundClaim2},
classes: []*resourceapi.DeviceClass{deviceClass},
nodes: []*v1.Node{workerNode},
objs: []apiruntime.Object{fabricSlice, fabricSlice2},
want: want{
prebind: result{
assumedClaim: reserve(boundClaim, podWithTwoClaimNames),
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
return st.FromResourceClaim(in).
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
Obj()
},
},
status: nil,
},
},
},
"multi-claims-binding-conditions-one-fail": {
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
pod: podWithTwoClaimNames,
claims: []*resourceapi.ResourceClaim{boundClaim, failedBindingClaim2},
classes: []*resourceapi.DeviceClass{deviceClass},
nodes: []*v1.Node{workerNode},
objs: []apiruntime.Object{fabricSlice, fabricSlice2},
want: want{
filter: perNodeResult{
workerNode.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
},
},
postfilter: result{
changes: change{
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if in.Name == claimName2 {
return st.FromResourceClaim(in).
Allocation(nil).
AllocatedDeviceStatuses(nil).
Obj()
} else {
return in
}
},
},
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
},
},
},
"single-claim-prioritized-list-scoring": {
enableDRAPrioritizedList: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithPrioritizedListAndSelector},
classes: []*resourceapi.DeviceClass{deviceClass},
nodes: []*v1.Node{workerNode, workerNode2},
objs: []apiruntime.Object{
st.MakeResourceSlice(nodeName, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj(),
st.MakeResourceSlice(node2Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(false)}}).Obj(),
},
want: want{
scoreResult: perNodeScoreResult{
nodeName: 8,
node2Name: 7,
},
normalizeScoreResult: fwk.NodeScoreList{
{
Name: nodeName,
Score: 100,
},
{
Name: node2Name,
Score: 87,
},
},
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaimWithPrioritizedListAndSelector},
},
prebind: result{
assumedClaim: reserve(allocatedClaimWithPrioritizedListAndSelector, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = allocatedClaimWithPrioritizedListAndSelector.Finalizers
claim.Status = inUseClaimWithPrioritizedListAndSelector.Status
}
return claim
},
},
},
},
},
"multiple-claims-prioritized-list-scoring": {
enableDRAPrioritizedList: true,
pod: podWithTwoClaimNames,
claims: []*resourceapi.ResourceClaim{pendingClaimWithPrioritizedList, pendingClaim2WithPrioritizedListAndMultipleSubrequests},
classes: []*resourceapi.DeviceClass{deviceClass},
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
objs: []apiruntime.Object{
st.MakeResourceSlice(nodeName, driver).
Device("instance-1").
Device("instance-2").
Device("instance-3").
Device("instance-4").Obj(),
st.MakeResourceSlice(node2Name, driver).
Device("instance-1").
Device("instance-2").Obj(),
st.MakeResourceSlice(node3Name, driver).
Device("instance-1").Obj(),
},
want: want{
filter: perNodeResult{
workerNode3.Name: {
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
},
},
scoreResult: perNodeScoreResult{
workerNode.Name: 15,
workerNode2.Name: 13,
},
normalizeScoreResult: fwk.NodeScoreList{
{
Name: workerNode.Name,
Score: 100,
},
{
Name: workerNode2.Name,
Score: 86,
},
},
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaimWithPrioritizedList, allocatedClaim2WithPrioritizedListAndMultipleSubrequests},
},
prebind: result{
assumedClaim: reserve(allocatedClaimWithPrioritizedList, podWithTwoClaimNames),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = inUseClaimWithPrioritizedList.Finalizers
claim.Status = inUseClaimWithPrioritizedList.Status
}
if claim.Name == claimName2 {
claim = claim.DeepCopy()
claim.Finalizers = inUseClaim2WithPrioritizedListAndMultipleSubrequests.Finalizers
claim.Status = inUseClaim2WithPrioritizedListAndMultipleSubrequests.Status
}
return claim
},
},
},
},
},
"multiple-requests-prioritized-list-scoring": {
enableDRAPrioritizedList: true,
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaimWithMultiplePrioritizedListRequests},
classes: []*resourceapi.DeviceClass{deviceClass},
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
objs: []apiruntime.Object{
st.MakeResourceSlice(nodeName, driver).
Device("instance-1").
Device("instance-2").
Device("instance-3").
Device("instance-4").Obj(),
st.MakeResourceSlice(node2Name, driver).
Device("instance-1").
Device("instance-2").
Device("instance-3").Obj(),
st.MakeResourceSlice(node3Name, driver).
Device("instance-1").
Device("instance-2").Obj(),
},
want: want{
scoreResult: perNodeScoreResult{
workerNode.Name: 16,
workerNode2.Name: 15,
workerNode3.Name: 14,
},
normalizeScoreResult: fwk.NodeScoreList{
{
Name: workerNode.Name,
Score: 100,
},
{
Name: workerNode2.Name,
Score: 93,
},
{
Name: workerNode3.Name,
Score: 87,
},
},
reserve: result{
inFlightClaims: []metav1.Object{allocatedClaimWithMultiplePrioritizedListRequests},
},
prebind: result{
assumedClaim: reserve(allocatedClaimWithMultiplePrioritizedListRequests, podWithClaimName),
changes: change{
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
if claim.Name == claimName {
claim = claim.DeepCopy()
claim.Finalizers = inUseClaimWithMultiplePrioritizedListRequests.Finalizers
claim.Status = inUseClaimWithMultiplePrioritizedListRequests.Status
}
return claim
},
},
},
},
},
}
for name, tc := range testcases {
if len(tc.skipOnWindows) > 0 && goruntime.GOOS == "windows" {
tCtx.Skipf("Skipping '%s' test case on Windows, reason: %s", name, tc.skipOnWindows)
}
tCtx.Run(name, func(tCtx ktesting.TContext) {
if tc.patchTestCase != nil {
tc.patchTestCase(&tc)
}
nodes := tc.nodes
if nodes == nil {
nodes = []*v1.Node{workerNode}
}
feats := feature.Features{
EnableDRAAdminAccess: tc.enableDRAAdminAccess,
EnableDRADeviceBindingConditions: tc.enableDRADeviceBindingConditions,
EnableDRAResourceClaimDeviceStatus: tc.enableDRAResourceClaimDeviceStatus,
EnableDRADeviceTaints: tc.enableDRADeviceTaints,
EnableDRASchedulerFilterTimeout: !tc.disableDRASchedulerFilterTimeout,
EnableDynamicResourceAllocation: !tc.disableDRA,
EnableDRAPrioritizedList: tc.enableDRAPrioritizedList,
EnableDRAExtendedResource: tc.enableDRAExtendedResource,
}
featuregatetesting.SetFeatureGateDuringTest(tCtx, utilfeature.DefaultFeatureGate, features.DRAExtendedResource, tc.enableDRAExtendedResource)
testCtx := setup(tCtx, tc.args, nodes, tc.claims, tc.classes, tc.objs, feats, tc.failPatch, tc.reactors)
initialObjects := testCtx.listAll(tCtx)
var registry compbasemetrics.KubeRegistry
if tc.metrics != nil {
registry = setupMetrics(feats)
}
status := testCtx.p.PreEnqueue(tCtx, tc.pod)
tCtx.Run("PreEnqueue", func(tCtx ktesting.TContext) {
testCtx.verify(tCtx, tc.want.preenqueue, initialObjects, nil, status)
})
if !status.IsSuccess() {
return
}
nodeInfo := framework.NewNodeInfo()
result, status := testCtx.p.PreFilter(tCtx, testCtx.state, tc.pod, []fwk.NodeInfo{nodeInfo})
tCtx.Run("prefilter", func(tCtx ktesting.TContext) {
assert.Equal(tCtx, tc.want.preFilterResult, result)
testCtx.verify(tCtx, tc.want.prefilter, initialObjects, result, status)
})
unschedulable := status.IsRejected()
var potentialNodes []fwk.NodeInfo
initialObjects = testCtx.listAll(tCtx)
testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.filter)
if !unschedulable {
for _, nodeInfo := range testCtx.nodeInfos {
var status *fwk.Status
tCtx.Run(fmt.Sprintf("filter/%s", nodeInfo.Node().Name), func(tCtx ktesting.TContext) {
initialObjects = testCtx.listAll(tCtx)
ctx := context.Context(tCtx)
if tc.cancelFilter {
c, cancel := context.WithCancelCause(ctx)
ctx = c
cancel(errors.New("test canceling Filter"))
}
status = testCtx.p.Filter(ctx, testCtx.state, tc.pod, nodeInfo)
nodeName := nodeInfo.Node().Name
testCtx.verify(tCtx, tc.want.filter.forNode(nodeName), initialObjects, nil, status)
})
if status.Code() == fwk.Success {
potentialNodes = append(potentialNodes, nodeInfo)
}
if status.Code() == fwk.Error {
// An error aborts scheduling.
return
}
}
if len(potentialNodes) == 0 {
unschedulable = true
}
}
var scores fwk.NodeScoreList
if !unschedulable && len(potentialNodes) > 1 {
initialObjects = testCtx.listAll(tCtx)
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.prescore)
for _, potentialNode := range potentialNodes {
initialObjects = testCtx.listAll(tCtx)
score, status := testCtx.p.Score(tCtx, testCtx.state, tc.pod, potentialNode)
nodeName := potentialNode.Node().Name
tCtx.Run(fmt.Sprintf("score/%s", nodeName), func(tCtx ktesting.TContext) {
assert.Equal(tCtx, tc.want.scoreResult.forNode(nodeName), score)
testCtx.verify(tCtx, tc.want.score.forNode(nodeName), initialObjects, nil, status)
})
scores = append(scores, fwk.NodeScore{Name: nodeName, Score: score})
}
initialObjects = testCtx.listAll(tCtx)
status := testCtx.p.NormalizeScore(tCtx, testCtx.state, tc.pod, scores)
tCtx.Run("normalizeScore", func(tCtx ktesting.TContext) {
assert.Equal(tCtx, tc.want.normalizeScoreResult, scores)
testCtx.verify(tCtx, tc.want.normalizeScore, initialObjects, nil, status)
})
}
var selectedNodeName string
if !unschedulable && len(potentialNodes) > 0 {
if len(scores) > 0 {
nodeScore := scores[0]
for _, score := range scores {
if score.Score > nodeScore.Score {
nodeScore = score
}
}
selectedNodeName = nodeScore.Name
} else {
selectedNodeName = potentialNodes[0].Node().Name
}
initialObjects = testCtx.listAll(tCtx)
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.reserve)
status := testCtx.p.Reserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
tCtx.Run("reserve", func(tCtx ktesting.TContext) {
testCtx.verify(tCtx, tc.want.reserve, initialObjects, nil, status)
})
if status.Code() != fwk.Success {
unschedulable = true
}
}
if selectedNodeName != "" {
if unschedulable {
initialObjects = testCtx.listAll(tCtx)
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.unreserve)
testCtx.p.Unreserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
tCtx.Run("unreserve", func(tCtx ktesting.TContext) {
testCtx.verify(tCtx, tc.want.unreserve, initialObjects, nil, status)
})
} else {
if tc.want.unreserveBeforePreBind != nil {
initialObjects = testCtx.listAll(tCtx)
testCtx.p.Unreserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
tCtx.Run("unreserveBeforePreBind", func(tCtx ktesting.TContext) {
testCtx.verify(tCtx, *tc.want.unreserveBeforePreBind, initialObjects, nil, status)
})
return
}
initialObjects = testCtx.listAll(tCtx)
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.prebind)
preBindPreFlightResult, preBindPreFlightStatus := testCtx.p.PreBindPreFlight(tCtx, testCtx.state, tc.pod, selectedNodeName)
tCtx.Run("preBindPreFlightStatus", func(tContext ktesting.TContext) {
assert.Equal(tCtx, tc.want.preBindPreFlightStatus, preBindPreFlightStatus)
})
tCtx.Run("preBindPreFlightResult", func(tContext ktesting.TContext) {
assert.Equal(tCtx, &fwk.PreBindPreFlightResult{AllowParallel: true}, preBindPreFlightResult)
})
preBindStatus := testCtx.p.PreBind(tCtx, testCtx.state, tc.pod, selectedNodeName)
tCtx.Run("prebind", func(tCtx ktesting.TContext) {
testCtx.verify(tCtx, tc.want.prebind, initialObjects, nil, preBindStatus)
})
if tc.want.unreserveAfterBindFailure != nil {
initialObjects = testCtx.listAll(tCtx)
testCtx.p.Unreserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
tCtx.Run("unreserverAfterBindFailure", func(tCtx ktesting.TContext) {
testCtx.verify(tCtx, *tc.want.unreserveAfterBindFailure, initialObjects, nil, status)
})
} else if status.IsSuccess() {
initialObjects = testCtx.listAll(tCtx)
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.postbind)
}
}
} else if len(potentialNodes) == 0 {
initialObjects = testCtx.listAll(tCtx)
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.postfilter)
result, status := testCtx.p.PostFilter(tCtx, testCtx.state, tc.pod, nil /* filteredNodeStatusMap not used by plugin */)
tCtx.Run("postfilter", func(tCtx ktesting.TContext) {
assert.Equal(tCtx, tc.want.postFilterResult, result)
testCtx.verify(tCtx, tc.want.postfilter, initialObjects, nil, status)
})
}
if tc.metrics != nil {
tc.metrics(tCtx, registry)
}
})
}
}
func setupMetrics(features feature.Features) compbasemetrics.KubeRegistry {
// Since feature gate is not set globally, we can't use metrics.Register().
// We use a new registry instead of using global registry.
testRegistry := compbasemetrics.NewKubeRegistry()
if features.EnableDRAExtendedResource {
testRegistry.MustRegister(metrics.ResourceClaimCreatesTotal)
metrics.ResourceClaimCreatesTotal.Reset()
}
return testRegistry
}
type testContext struct {
client *fake.Clientset
informerFactory informers.SharedInformerFactory
draManager *DefaultDRAManager
p *DynamicResources
nodeInfos []fwk.NodeInfo
state fwk.CycleState
}
func (tc *testContext) verify(tCtx ktesting.TContext, expected result, initialObjects []metav1.Object, result interface{}, status *fwk.Status) {
tCtx.Helper()
if expected.status == nil {
assert.Nil(tCtx, status)
} else if actualErr := status.AsError(); actualErr != nil {
// Compare only the error strings.
assert.ErrorContains(tCtx, actualErr, expected.status.AsError().Error())
} else {
assert.Equal(tCtx, expected.status, status)
}
objects := tc.listAll(tCtx)
wantObjects := update(initialObjects, expected.changes)
wantObjects = append(wantObjects, expected.added...)
for _, remove := range expected.removed {
for i, obj := range wantObjects {
// This is a bit relaxed (no GVR comparison, no UID
// comparison) to simplify writing the test cases.
if obj.GetName() == remove.GetName() && obj.GetNamespace() == remove.GetNamespace() {
wantObjects = append(wantObjects[0:i], wantObjects[i+1:]...)
break
}
}
}
sortObjects(wantObjects)
if wantObjects == nil {
wantObjects = []metav1.Object{}
}
if objects == nil {
objects = []metav1.Object{}
}
// Sometimes assert strips the diff too much, let's do it ourselves...
ignoreFieldsInResourceClaims := []cmp.Option{
cmpopts.IgnoreFields(metav1.ObjectMeta{}, "UID", "ResourceVersion"),
cmpopts.IgnoreFields(resourceapi.AllocationResult{}, "AllocationTimestamp"),
// It does not matter which specific device is allocated for the testing purpose.
cmpopts.IgnoreFields(resourceapi.DeviceRequestAllocationResult{}, "Device"),
}
if diff := cmp.Diff(wantObjects, objects, ignoreFieldsInResourceClaims...); diff != "" {
tCtx.Errorf("Stored objects are different (- expected, + actual):\n%s", diff)
}
var expectAssumedClaims []metav1.Object
if expected.assumedClaim != nil {
expectAssumedClaims = append(expectAssumedClaims, expected.assumedClaim)
}
// actualAssumedClaims are claims in assumed cache with different latest and api object
// sameAssumedClaims are claims in assumed cache with same latest and api object
actualAssumedClaims, sameAssumedClaims := tc.listAssumedClaims()
// error when expecting no claims in assumed cache with different latest and api object
if len(expectAssumedClaims) == 0 && len(actualAssumedClaims) != 0 {
// In case we delete the claim API object, wait for assumed cache to sync with informer,
// then assumed cache should be empty.
err := wait.PollUntilContextTimeout(tCtx, 200*time.Millisecond, time.Minute, true,
func(ctx context.Context) (bool, error) {
actualAssumedClaims, sameAssumedClaims = tc.listAssumedClaims()
return len(actualAssumedClaims) == 0, nil
})
if err != nil || len(actualAssumedClaims) != 0 {
tCtx.Errorf("Assumed claims are different, err=%v, expected: nil, actual:\n%v", err, actualAssumedClaims)
}
}
if len(expectAssumedClaims) > 0 {
// it is not an error as long as the expected claim is present in the assumed cache, no
// matter its latest and api object are different or not.
for _, expected := range expectAssumedClaims {
seen := false
for _, actual := range actualAssumedClaims {
if cmp.Equal(expected, actual, ignoreFieldsInResourceClaims...) {
seen = true
}
}
for _, same := range sameAssumedClaims {
if cmp.Equal(expected, same, ignoreFieldsInResourceClaims...) {
seen = true
}
}
if !seen {
tCtx.Errorf("Assumed claims are different, expected: %v not found", expected)
}
}
}
actualInFlightClaims := tc.listInFlightClaims()
if diff := cmp.Diff(expected.inFlightClaims, actualInFlightClaims, ignoreFieldsInResourceClaims...); diff != "" {
tCtx.Errorf("In-flight claims are different (- expected, + actual):\n%s", diff)
}
}
func (tc *testContext) listAll(tCtx ktesting.TContext) (objects []metav1.Object) {
tCtx.Helper()
claims, err := tc.client.ResourceV1().ResourceClaims("").List(tCtx, metav1.ListOptions{})
tCtx.ExpectNoError(err, "list claims")
for _, claim := range claims.Items {
claim := claim
objects = append(objects, &claim)
}
sortObjects(objects)
return
}
func (tc *testContext) listAssumedClaims() ([]metav1.Object, []metav1.Object) {
var assumedClaims []metav1.Object
var sameClaims []metav1.Object
for _, obj := range tc.draManager.resourceClaimTracker.cache.List(nil) {
claim := obj.(*resourceapi.ResourceClaim)
obj, _ := tc.draManager.resourceClaimTracker.cache.Get(claim.Namespace + "/" + claim.Name)
apiObj, _ := tc.draManager.resourceClaimTracker.cache.GetAPIObj(claim.Namespace + "/" + claim.Name)
if obj != apiObj {
assumedClaims = append(assumedClaims, claim)
} else {
sameClaims = append(sameClaims, claim)
}
}
sortObjects(assumedClaims)
sortObjects(sameClaims)
return assumedClaims, sameClaims
}
func (tc *testContext) listInFlightClaims() []metav1.Object {
var inFlightClaims []metav1.Object
tc.draManager.resourceClaimTracker.inFlightAllocations.Range(func(key, value any) bool {
inFlightClaims = append(inFlightClaims, value.(*resourceapi.ResourceClaim))
return true
})
sortObjects(inFlightClaims)
return inFlightClaims
}
// updateAPIServer modifies objects and stores any changed object in the API server.
func (tc *testContext) updateAPIServer(tCtx ktesting.TContext, objects []metav1.Object, updates change) []metav1.Object {
modified := update(objects, updates)
for i := range modified {
obj := modified[i]
if diff := cmp.Diff(objects[i], obj); diff != "" {
tCtx.Logf("Updating %T %q, diff (-old, +new):\n%s", obj, obj.GetName(), diff)
switch obj := obj.(type) {
case *resourceapi.ResourceClaim:
obj, err := tc.client.ResourceV1().ResourceClaims(obj.Namespace).Update(tCtx, obj, metav1.UpdateOptions{})
tCtx.ExpectNoError(err, "prepare update")
modified[i] = obj
default:
tCtx.Fatalf("unsupported object type %T", obj)
}
}
}
return modified
}
func sortObjects(objects []metav1.Object) {
sort.Slice(objects, func(i, j int) bool {
if objects[i].GetNamespace() < objects[j].GetNamespace() {
return true
}
return objects[i].GetName() < objects[j].GetName()
})
}
// update walks through all existing objects, finds the corresponding update
// function based on name and kind, and replaces those objects that have an
// update function. The rest is left unchanged.
func update(objects []metav1.Object, updates change) []metav1.Object {
var updated []metav1.Object
for _, obj := range objects {
switch in := obj.(type) {
case *resourceapi.ResourceClaim:
if updates.claim != nil {
obj = updates.claim(in)
}
}
updated = append(updated, obj)
}
return updated
}
func setup(tCtx ktesting.TContext, args *config.DynamicResourcesArgs, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, objs []apiruntime.Object, features feature.Features, failPatch bool, apiReactors []cgotesting.Reactor) (result *testContext) {
tCtx.Helper()
tc := &testContext{}
tc.client = fake.NewSimpleClientset(objs...)
reactor := createReactor(tc.client.Tracker(), failPatch)
tc.client.PrependReactor("*", "*", reactor)
// Prepends reactors to the client.
tc.client.ReactionChain = append(apiReactors, tc.client.ReactionChain...)
tc.informerFactory = informers.NewSharedInformerFactory(tc.client, 0)
resourceSliceTrackerOpts := resourceslicetracker.Options{
EnableDeviceTaintRules: true,
SliceInformer: tc.informerFactory.Resource().V1().ResourceSlices(),
TaintInformer: tc.informerFactory.Resource().V1alpha3().DeviceTaintRules(),
ClassInformer: tc.informerFactory.Resource().V1().DeviceClasses(),
KubeClient: tc.client,
}
resourceSliceTracker, err := resourceslicetracker.StartTracker(tCtx, resourceSliceTrackerOpts)
require.NoError(tCtx, err, "couldn't start resource slice tracker")
claimsCache := assumecache.NewAssumeCache(tCtx.Logger(), tc.informerFactory.Resource().V1().ResourceClaims().Informer(), "resource claim", "", nil)
// NewAssumeCache calls the informer's AddEventHandler method to register
// a handler in order to stay in sync with the informer's store, but
// NewAssumeCache does not return the ResourceEventHandlerRegistration.
// We call AddEventHandler of the assume cache, passing it a noop
// ResourceEventHandler in order to get access to the
// ResourceEventHandlerRegistration returned by the informer.
//
// This is not the registered handler that is used by the DRA
// manager, but it is close enough because the assume cache
// uses a single boolean for "is synced" for all handlers.
registeredHandler := claimsCache.AddEventHandler(cache.ResourceEventHandlerFuncs{})
tc.draManager = NewDRAManager(tCtx, claimsCache, resourceSliceTracker, tc.informerFactory)
if features.EnableDRAExtendedResource {
cache := tc.draManager.DeviceClassResolver().(*extendedresourcecache.ExtendedResourceCache)
if _, err := tc.informerFactory.Resource().V1().DeviceClasses().Informer().AddEventHandler(cache); err != nil {
tCtx.Logger().Error(err, "failed to add device class informer event handler")
}
}
opts := []runtime.Option{
runtime.WithClientSet(tc.client),
runtime.WithInformerFactory(tc.informerFactory),
runtime.WithEventRecorder(&events.FakeRecorder{}),
runtime.WithSharedDRAManager(tc.draManager),
}
fh, err := runtime.NewFramework(tCtx, nil, nil, opts...)
tCtx.ExpectNoError(err, "create scheduler framework")
tCtx.Cleanup(func() {
tCtx.Cancel("test has completed")
runtime.WaitForShutdown(fh)
})
if args == nil {
args = getDefaultDynamicResourcesArgs()
}
pl, err := New(tCtx, args, fh, features)
tCtx.ExpectNoError(err, "create plugin")
tc.p = pl.(*DynamicResources)
// The tests use the API to create the objects because then reactors
// get triggered.
for _, claim := range claims {
_, err := tc.client.ResourceV1().ResourceClaims(claim.Namespace).Create(tCtx, claim, metav1.CreateOptions{})
tCtx.ExpectNoError(err, "create resource claim")
}
for _, class := range classes {
_, err := tc.client.ResourceV1().DeviceClasses().Create(tCtx, class, metav1.CreateOptions{})
tCtx.ExpectNoError(err, "create resource class")
}
tc.informerFactory.Start(tCtx.Done())
tCtx.Cleanup(func() {
// Need to cancel before waiting for the shutdown.
tCtx.Cancel("test is done")
// Now we can wait for all goroutines to stop.
tc.informerFactory.Shutdown()
})
tc.informerFactory.WaitForCacheSync(tCtx.Done())
// The above does not tell us if the registered handler (from NewAssumeCache)
// is synced, we need to wait until HasSynced of the handler returns
// true, this ensures that the assume cache is in sync with the informer's
// store which has been informed by at least one full LIST of the underlying storage.
cache.WaitForNamedCacheSyncWithContext(tCtx, registeredHandler.HasSynced, resourceSliceTracker.HasSynced)
for _, node := range nodes {
nodeInfo := framework.NewNodeInfo()
nodeInfo.SetNode(node)
tc.nodeInfos = append(tc.nodeInfos, nodeInfo)
}
tc.state = framework.NewCycleState()
return tc
}
// createReactor implements the logic required for the UID and ResourceVersion
// fields to work when using the fake client. Add it with client.PrependReactor
// to your fake client. ResourceVersion handling is required for conflict
// detection during updates, which is covered by some scenarios.
func createReactor(tracker cgotesting.ObjectTracker, failPatch bool) func(action cgotesting.Action) (handled bool, ret apiruntime.Object, err error) {
var nameCounter int
var uidCounter int
var resourceVersionCounter int
var mutex sync.Mutex
return func(action cgotesting.Action) (handled bool, ret apiruntime.Object, err error) {
if failPatch {
if _, ok := action.(cgotesting.PatchAction); ok {
return true, nil, errors.New("patch error")
}
}
createAction, ok := action.(cgotesting.CreateAction)
if !ok {
return false, nil, nil
}
obj, ok := createAction.GetObject().(metav1.Object)
if !ok {
return false, nil, nil
}
mutex.Lock()
defer mutex.Unlock()
switch action.GetVerb() {
case "create":
if obj.GetUID() != "" {
return true, nil, errors.New("UID must not be set on create")
}
if obj.GetResourceVersion() != "" {
return true, nil, errors.New("ResourceVersion must not be set on create")
}
obj.SetUID(types.UID(fmt.Sprintf("UID-%d", uidCounter)))
uidCounter++
obj.SetResourceVersion(fmt.Sprintf("%d", resourceVersionCounter))
resourceVersionCounter++
if obj.GetName() == "" {
obj.SetName(obj.GetGenerateName() + fmt.Sprintf("%d", nameCounter))
nameCounter++
}
case "update":
uid := obj.GetUID()
resourceVersion := obj.GetResourceVersion()
if uid == "" {
return true, nil, errors.New("UID must be set on update")
}
if resourceVersion == "" {
return true, nil, errors.New("ResourceVersion must be set on update")
}
oldObj, err := tracker.Get(action.GetResource(), obj.GetNamespace(), obj.GetName())
if err != nil {
return true, nil, err
}
oldObjMeta, ok := oldObj.(metav1.Object)
if !ok {
return true, nil, errors.New("internal error: unexpected old object type")
}
if oldObjMeta.GetResourceVersion() != resourceVersion {
return true, nil, errors.New("ResourceVersion must match the object that gets updated")
}
obj.SetResourceVersion(fmt.Sprintf("%d", resourceVersionCounter))
resourceVersionCounter++
}
return false, nil, nil
}
}
func TestIsSchedulableAfterClaimChange(t *testing.T) {
testIsSchedulableAfterClaimChange(ktesting.Init(t))
}
func testIsSchedulableAfterClaimChange(tCtx ktesting.TContext) {
testcases := map[string]struct {
pod *v1.Pod
claims []*resourceapi.ResourceClaim
oldObj, newObj interface{}
wantHint fwk.QueueingHint
wantErr bool
}{
"skip-deletes": {
pod: podWithClaimTemplate,
oldObj: allocatedClaim,
newObj: nil,
wantHint: fwk.QueueSkip,
},
"backoff-wrong-new-object": {
pod: podWithClaimTemplate,
newObj: "not-a-claim",
wantErr: true,
},
"skip-wrong-claim": {
pod: podWithClaimTemplate,
newObj: func() *resourceapi.ResourceClaim {
claim := allocatedClaim.DeepCopy()
claim.OwnerReferences[0].UID += "123"
return claim
}(),
wantHint: fwk.QueueSkip,
},
"skip-unrelated-claim": {
pod: podWithClaimTemplate,
claims: []*resourceapi.ResourceClaim{allocatedClaim},
newObj: func() *resourceapi.ResourceClaim {
claim := allocatedClaim.DeepCopy()
claim.Name += "-foo"
claim.UID += "123"
return claim
}(),
wantHint: fwk.QueueSkip,
},
"queue-on-add": {
pod: podWithClaimName,
newObj: pendingClaim,
wantHint: fwk.Queue,
},
"backoff-wrong-old-object": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
oldObj: "not-a-claim",
newObj: pendingClaim,
wantErr: true,
},
"skip-adding-finalizer": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
oldObj: pendingClaim,
newObj: func() *resourceapi.ResourceClaim {
claim := pendingClaim.DeepCopy()
claim.Finalizers = append(claim.Finalizers, "foo")
return claim
}(),
wantHint: fwk.QueueSkip,
},
"queue-on-status-change": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim},
oldObj: pendingClaim,
newObj: func() *resourceapi.ResourceClaim {
claim := pendingClaim.DeepCopy()
claim.Status.Allocation = &resourceapi.AllocationResult{}
return claim
}(),
wantHint: fwk.Queue,
},
"claim-deallocate": {
pod: podWithClaimName,
claims: []*resourceapi.ResourceClaim{pendingClaim, otherAllocatedClaim},
oldObj: otherAllocatedClaim,
newObj: func() *resourceapi.ResourceClaim {
claim := otherAllocatedClaim.DeepCopy()
claim.Status.Allocation = nil
return claim
}(),
wantHint: fwk.Queue,
},
}
for name, tc := range testcases {
tCtx.SyncTest(name, func(tCtx ktesting.TContext) {
features := feature.Features{
EnableDRASchedulerFilterTimeout: true,
EnableDynamicResourceAllocation: true,
}
testCtx := setup(tCtx, nil, nil, tc.claims, nil, nil, features, false, nil)
oldObj := tc.oldObj
newObj := tc.newObj
if claim, ok := tc.newObj.(*resourceapi.ResourceClaim); ok {
// Add or update through the client and wait until the event is processed.
claimKey := claim.Namespace + "/" + claim.Name
if tc.oldObj == nil {
// Some test claims already have it. Clear for create.
createClaim := claim.DeepCopy()
createClaim.UID = ""
storedClaim, err := testCtx.client.ResourceV1().ResourceClaims(createClaim.Namespace).Create(tCtx, createClaim, metav1.CreateOptions{})
if err != nil {
tCtx.Fatalf("create claim: expected no error, got: %v", err)
}
claim = storedClaim
} else {
cachedClaim, err := testCtx.draManager.resourceClaimTracker.cache.Get(claimKey)
if err != nil {
tCtx.Fatalf("retrieve old claim: expected no error, got: %v", err)
}
updateClaim := claim.DeepCopy()
// The test claim doesn't have those (generated dynamically), so copy them.
updateClaim.UID = cachedClaim.(*resourceapi.ResourceClaim).UID
updateClaim.ResourceVersion = cachedClaim.(*resourceapi.ResourceClaim).ResourceVersion
storedClaim, err := testCtx.client.ResourceV1().ResourceClaims(updateClaim.Namespace).Update(tCtx, updateClaim, metav1.UpdateOptions{})
if err != nil {
tCtx.Fatalf("update claim: expected no error, got: %v", err)
}
claim = storedClaim
}
// Eventually the assume cache will have it, too.
tCtx.Wait()
cachedClaim, err := testCtx.draManager.resourceClaimTracker.cache.Get(claimKey)
tCtx.ExpectNoError(err, "retrieve claim")
if cachedClaim.(*resourceapi.ResourceClaim).ResourceVersion != claim.ResourceVersion {
tCtx.Errorf("cached claim not updated yet")
}
// This has the actual UID and ResourceVersion,
// which is relevant for
// isSchedulableAfterClaimChange.
newObj = claim
}
gotHint, err := testCtx.p.isSchedulableAfterClaimChange(tCtx.Logger(), tc.pod, oldObj, newObj)
if tc.wantErr {
if err == nil {
tCtx.Fatal("want an error, got none")
}
return
}
if err != nil {
tCtx.Fatalf("want no error, got: %v", err)
}
if tc.wantHint != gotHint {
tCtx.Fatalf("want %#v, got %#v", tc.wantHint.String(), gotHint.String())
}
})
}
}
func TestIsSchedulableAfterPodChange(t *testing.T) {
testIsSchedulableAfterPodChange(ktesting.Init(t))
}
func testIsSchedulableAfterPodChange(tCtx ktesting.TContext) {
testcases := map[string]struct {
objs []apiruntime.Object
pod *v1.Pod
claims []*resourceapi.ResourceClaim
obj interface{}
wantHint fwk.QueueingHint
wantErr bool
}{
"backoff-wrong-new-object": {
pod: podWithClaimTemplate,
obj: "not-a-claim",
wantErr: true,
},
"complete": {
objs: []apiruntime.Object{pendingClaim},
pod: podWithClaimTemplate,
obj: podWithClaimTemplateInStatus,
wantHint: fwk.Queue,
},
"wrong-pod": {
objs: []apiruntime.Object{pendingClaim},
pod: func() *v1.Pod {
pod := podWithClaimTemplate.DeepCopy()
pod.Name += "2"
pod.UID += "2" // This is the relevant difference.
return pod
}(),
obj: podWithClaimTemplateInStatus,
wantHint: fwk.QueueSkip,
},
"missing-claim": {
objs: nil,
pod: podWithClaimTemplate,
obj: podWithClaimTemplateInStatus,
wantHint: fwk.QueueSkip,
},
"incomplete": {
objs: []apiruntime.Object{pendingClaim},
pod: podWithTwoClaimTemplates,
obj: func() *v1.Pod {
pod := podWithTwoClaimTemplates.DeepCopy()
// Only one of two claims created.
pod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{{
Name: pod.Spec.ResourceClaims[0].Name,
ResourceClaimName: &claimName,
}}
return pod
}(),
wantHint: fwk.QueueSkip,
},
}
for name, tc := range testcases {
tCtx.Run(name, func(tCtx ktesting.TContext) {
features := feature.Features{
EnableDRASchedulerFilterTimeout: true,
EnableDynamicResourceAllocation: true,
}
testCtx := setup(tCtx, nil, nil, tc.claims, nil, tc.objs, features, false, nil)
gotHint, err := testCtx.p.isSchedulableAfterPodChange(tCtx.Logger(), tc.pod, nil, tc.obj)
if tc.wantErr {
if err == nil {
tCtx.Fatal("want an error, got none")
}
return
}
if err != nil {
tCtx.Fatalf("want no error, got: %v", err)
}
if tc.wantHint != gotHint {
tCtx.Fatalf("want %#v, got %#v", tc.wantHint.String(), gotHint.String())
}
})
}
}
// mockDeviceClassResolver is a simple mock implementation of fwk.DeviceClassResolver for testing
type mockDeviceClassResolver struct {
mapping map[v1.ResourceName]*resourceapi.DeviceClass
}
func (m *mockDeviceClassResolver) GetDeviceClass(resourceName v1.ResourceName) *resourceapi.DeviceClass {
return m.mapping[resourceName]
}
// TestAllocatorSelection covers the selection of a structured allocation implementation
// based on actual Kubernetes feature gates. This test lives here instead of
// k8s.io/dynamic-resource-allocation/structured because that code has no access
// to feature gate definitions.
func TestAllocatorSelection(t *testing.T) {
for name, tc := range map[string]struct {
features string
expectImplementation string
}{
// The most conservative implementation: only used when explicitly asking
// for the most stable Kubernetes (no alpha or beta features).
"only-GA": {
features: "AllAlpha=false,AllBeta=false",
expectImplementation: "stable",
},
// By default, some beta features are on and the incubating implementation
// is used.
"default": {
features: "",
expectImplementation: "incubating",
},
// Alpha features need the experimental implementation.
"alpha": {
features: "AllAlpha=true,AllBeta=true",
expectImplementation: "experimental",
},
} {
t.Run(name, func(t *testing.T) {
tCtx := ktesting.Init(t)
featureGate := utilfeature.DefaultFeatureGate.DeepCopy()
tCtx.ExpectNoError(featureGate.Set(tc.features), "set features")
fts := feature.NewSchedulerFeaturesFromGates(featureGate)
features := AllocatorFeatures(fts)
// Slightly hacky: most arguments are not valid and the constructor
// is expected to not use them yet.
allocator, err := structured.NewAllocator(tCtx, features, structured.AllocatedState{}, nil, nil, nil)
tCtx.ExpectNoError(err, "create allocator")
allocatorType := fmt.Sprintf("%T", allocator)
if !strings.Contains(allocatorType, tc.expectImplementation) {
tCtx.Fatalf("Expected allocator implementation %q, got %s", tc.expectImplementation, allocatorType)
}
})
}
}
func Test_computesScore(t *testing.T) {
testcases := map[string]struct {
claims []*resourceapi.ResourceClaim
allocations nodeAllocation
expectedScore int64
expectErr bool
}{
"more-claims-than-allocations": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
).
Obj(),
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-2",
st.SubRequest("subreq-1", className, 1),
).
Obj(),
},
allocations: nodeAllocation{},
expectErr: true,
},
"single-request-only-subrequest-allocated": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
).
Obj(),
},
allocations: nodeAllocation{
allocationResults: []resourceapi.AllocationResult{
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-1/subreq-1",
},
},
},
},
},
},
expectedScore: 8,
},
"single-request-last-subrequest-allocated": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
st.SubRequest("subreq-2", className, 1),
st.SubRequest("subreq-3", className, 1),
st.SubRequest("subreq-4", className, 1),
st.SubRequest("subreq-5", className, 1),
st.SubRequest("subreq-6", className, 1),
st.SubRequest("subreq-7", className, 1),
st.SubRequest("subreq-8", className, 1),
).
Obj(),
},
allocations: nodeAllocation{
allocationResults: []resourceapi.AllocationResult{
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-1/subreq-8",
},
},
},
},
},
},
expectedScore: 1,
},
"multiple-requests-with-middle-subrequests-allocated": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
st.SubRequest("subreq-2", className, 1),
st.SubRequest("subreq-3", className, 1),
st.SubRequest("subreq-4", className, 1),
).
NamedRequestWithPrioritizedList("req-2",
st.SubRequest("subreq-1", className, 1),
st.SubRequest("subreq-2", className, 1),
st.SubRequest("subreq-3", className, 1),
st.SubRequest("subreq-4", className, 1),
st.SubRequest("subreq-5", className, 1),
).
Obj(),
},
allocations: nodeAllocation{
allocationResults: []resourceapi.AllocationResult{
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-1/subreq-4",
},
{
Request: "req-2/subreq-5",
},
},
},
},
},
},
expectedScore: 9,
},
"multiple-requests-with-top-subrequests-allocated": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
st.SubRequest("subreq-2", className, 1),
st.SubRequest("subreq-3", className, 1),
st.SubRequest("subreq-4", className, 1),
st.SubRequest("subreq-5", className, 1),
st.SubRequest("subreq-6", className, 1),
st.SubRequest("subreq-7", className, 1),
st.SubRequest("subreq-8", className, 1),
).
NamedRequestWithPrioritizedList("req-2",
st.SubRequest("subreq-1", className, 1),
).
Obj(),
},
allocations: nodeAllocation{
allocationResults: []resourceapi.AllocationResult{
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-1/subreq-8",
},
{
Request: "req-2/subreq-1",
},
},
},
},
},
},
expectedScore: 9,
},
"multiple-claims-with-last-subrequests-allocated": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
st.SubRequest("subreq-2", className, 1),
st.SubRequest("subreq-3", className, 1),
st.SubRequest("subreq-4", className, 1),
st.SubRequest("subreq-5", className, 1),
st.SubRequest("subreq-6", className, 1),
st.SubRequest("subreq-7", className, 1),
st.SubRequest("subreq-8", className, 1),
).
Obj(),
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-2",
st.SubRequest("subreq-1", className, 1),
st.SubRequest("subreq-2", className, 1),
st.SubRequest("subreq-3", className, 1),
st.SubRequest("subreq-4", className, 1),
st.SubRequest("subreq-5", className, 1),
st.SubRequest("subreq-6", className, 1),
st.SubRequest("subreq-7", className, 1),
st.SubRequest("subreq-8", className, 1),
).
Obj(),
},
allocations: nodeAllocation{
allocationResults: []resourceapi.AllocationResult{
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-1/subreq-8",
},
},
},
},
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-2/subreq-8",
},
},
},
},
},
},
expectedScore: 2,
},
"multiple-claims-with-top-subrequests-allocated": {
claims: []*resourceapi.ResourceClaim{
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-1",
st.SubRequest("subreq-1", className, 1),
).
Obj(),
st.MakeResourceClaim().
NamedRequestWithPrioritizedList("req-2",
st.SubRequest("subreq-1", className, 1),
).
Obj(),
},
allocations: nodeAllocation{
allocationResults: []resourceapi.AllocationResult{
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-1/subreq-1",
},
},
},
},
{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
{
Request: "req-2/subreq-1",
},
},
},
},
},
},
expectedScore: 16,
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
iterator := slices.All(tc.claims)
score, err := computeScore(iterator, tc.allocations)
if err != nil {
if !tc.expectErr {
t.Fatalf("unexpected error: %v", err)
}
return
}
if tc.expectErr {
t.Fatal("expected error, got none")
}
assert.Equal(t, tc.expectedScore, score)
})
}
}
func TestNormalizeScore(t *testing.T) {
testcases := map[string]struct {
scores fwk.NodeScoreList
expectedScores fwk.NodeScoreList
}{
"empty": {
scores: fwk.NodeScoreList{},
expectedScores: fwk.NodeScoreList{},
},
"single-score": {
scores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 42,
},
},
expectedScores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 100,
},
},
},
"all-same": {
scores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 8,
},
{
Name: "node-2",
Score: 8,
},
},
expectedScores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 100,
},
{
Name: "node-2",
Score: 100,
},
},
},
"all-same-very-large": {
scores: fwk.NodeScoreList{
{
Name: "node-1",
Score: math.MaxInt32,
},
{
Name: "node-2",
Score: math.MaxInt32,
},
},
expectedScores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 100,
},
{
Name: "node-2",
Score: 100,
},
},
},
"max-and-min-values": {
scores: fwk.NodeScoreList{
{
Name: "node-1",
Score: math.MaxInt32,
},
{
Name: "node-2",
Score: 0,
},
},
expectedScores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 100,
},
{
Name: "node-2",
Score: 0,
},
},
},
"mid-value": {
scores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 99,
},
{
Name: "node-2",
Score: 98,
},
{
Name: "node-3",
Score: 97,
},
},
expectedScores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 100,
},
{
Name: "node-2",
Score: 98,
},
{
Name: "node-3",
Score: 97,
},
},
},
"large-spread-lost-precision": {
scores: fwk.NodeScoreList{
{
Name: "node-1",
Score: math.MaxInt32,
},
{
Name: "node-2",
Score: math.MaxInt32 - 1,
},
{
Name: "node-3",
Score: 1,
},
{
Name: "node-4",
Score: 0,
},
},
expectedScores: fwk.NodeScoreList{
{
Name: "node-1",
Score: 100,
},
{
Name: "node-2",
Score: 99,
},
{
Name: "node-3",
Score: 0,
},
{
Name: "node-4",
Score: 0,
},
},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
pl := &DynamicResources{
enabled: true,
}
scores := tc.scores
_ = pl.NormalizeScore(context.Background(), nil, nil, scores)
assert.Equal(t, tc.expectedScores, scores)
})
}
}