mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-03 20:40:26 -05:00
Validate native resouce claims in dynamic resouces plugin
Validation currently returs an error if 1. Pod level resouces are native resource claims are used together. 2. Multiple pods reference a claim with namtive resouces mappings.
This commit is contained in:
parent
5d7c1fbb73
commit
f104d6c1e7
3 changed files with 113 additions and 13 deletions
|
|
@ -84,7 +84,7 @@ func (pl *DynamicResources) checkNativeResources(ctx context.Context, state *sta
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
totalPodDemand, nativeClaimStatus, status := pl.getPodNativeResourceFootprint(ctx, pod, state, allocations)
|
||||
totalPodDemand, nativeClaimStatus, status := pl.getPodNativeResourceFootprint(logger, nodeInfo, pod, state, allocations)
|
||||
if status != nil {
|
||||
return nil, status
|
||||
}
|
||||
|
|
@ -149,6 +149,8 @@ func (pl *DynamicResources) buildNativeDRAInfo(pod *v1.Pod, claimByName map[stri
|
|||
continue
|
||||
}
|
||||
|
||||
hasNativeClaims := false
|
||||
|
||||
for _, result := range alloc.Devices.Results {
|
||||
device, err := getDeviceFromManager(pl.draManager, result.Pool, result.Device)
|
||||
if err != nil {
|
||||
|
|
@ -209,11 +211,14 @@ func (pl *DynamicResources) buildNativeDRAInfo(pod *v1.Pod, claimByName map[stri
|
|||
DriverName: result.Driver,
|
||||
})
|
||||
}
|
||||
hasNativeClaims = true
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
nativeClaimInfo[actualClaim.UID] = currentClaimStatus
|
||||
if hasNativeClaims {
|
||||
nativeClaimInfo[actualClaim.UID] = currentClaimStatus
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -224,8 +229,27 @@ func (pl *DynamicResources) buildNativeDRAInfo(pod *v1.Pod, claimByName map[stri
|
|||
return nativeClaimInfoList, nil
|
||||
}
|
||||
|
||||
func (pl *DynamicResources) getPodNativeResourceFootprint(ctx context.Context, pod *v1.Pod, state *stateData, allocations map[string]*resourceapi.AllocationResult) (*framework.Resource, []v1.PodNativeResourceClaimStatus, *fwk.Status) {
|
||||
logger := klog.FromContext(ctx)
|
||||
func (pl *DynamicResources) validateNativeDRAClaims(pod *v1.Pod, nodeInfo fwk.NodeInfo, nativeResourceClaimStatus []v1.PodNativeResourceClaimStatus) error {
|
||||
if len(nativeResourceClaimStatus) == 0 {
|
||||
return nil
|
||||
}
|
||||
if pod.Spec.Resources != nil {
|
||||
return fmt.Errorf("cannot use pod level resources with native resouce claims")
|
||||
}
|
||||
for _, claim := range nativeResourceClaimStatus {
|
||||
claimStates := nodeInfo.GetNativeResourceDRAClaimStates()
|
||||
state, ok := claimStates[claim.ClaimInfo.UID]
|
||||
if ok && state != nil && state.ConsumerPods.Len() > 0 {
|
||||
if state.ConsumerPods.Has(pod.UID) {
|
||||
// TODO(KEP-5517): Handle shared claims accross pods.
|
||||
return fmt.Errorf("cannot share native resource claims across pods")
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pl *DynamicResources) getPodNativeResourceFootprint(logger klog.Logger, nodeInfo fwk.NodeInfo, pod *v1.Pod, state *stateData, allocations map[string]*resourceapi.AllocationResult) (*framework.Resource, []v1.PodNativeResourceClaimStatus, *fwk.Status) {
|
||||
|
||||
claimByName := make(map[string]*resourceapi.ResourceClaim)
|
||||
for _, claim := range state.claims.allUserClaims() {
|
||||
|
|
@ -253,6 +277,10 @@ func (pl *DynamicResources) getPodNativeResourceFootprint(ctx context.Context, p
|
|||
return nil, nil, statusError(logger, err)
|
||||
}
|
||||
|
||||
if err := pl.validateNativeDRAClaims(pod, nodeInfo, podNativeDRAStatus); err != nil {
|
||||
return nil, nil, statusError(logger, err)
|
||||
}
|
||||
|
||||
// Calculate Effective Container Requests for PodRequests helper
|
||||
opts := resourcehelper.PodResourcesOptions{
|
||||
SkipPodLevelResources: true,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ import (
|
|||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
|
|
@ -205,6 +206,9 @@ type NodeInfo struct {
|
|||
|
||||
// DeclaredFeatures is a set of features published by the node
|
||||
DeclaredFeatures ndf.FeatureSet
|
||||
|
||||
// NativeDRAClaimStates tracks the state of DRA claims on this node.
|
||||
NativeDRAClaimStates map[types.UID]*fwk.NativeDRAClaimAllocationState
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetPods() []fwk.PodInfo {
|
||||
|
|
@ -252,6 +256,18 @@ func (n *NodeInfo) GetNodeDeclaredFeatures() ndf.FeatureSet {
|
|||
return n.DeclaredFeatures
|
||||
}
|
||||
|
||||
func (n *NodeInfo) IsNativeResourceDRAClaimAllocated(claimUID types.UID) bool {
|
||||
if n.NativeDRAClaimStates == nil {
|
||||
return false
|
||||
}
|
||||
state := n.NativeDRAClaimStates[claimUID]
|
||||
return state != nil && state.ConsumerPods.Len() > 0
|
||||
}
|
||||
|
||||
func (n *NodeInfo) GetNativeResourceDRAClaimStates() map[types.UID]*fwk.NativeDRAClaimAllocationState {
|
||||
return n.NativeDRAClaimStates
|
||||
}
|
||||
|
||||
// NodeInfo implements KMetadata, so for example klog.KObjSlice(nodes) works
|
||||
// when nodes is a []*NodeInfo.
|
||||
var _ klog.KMetadata = &NodeInfo{}
|
||||
|
|
@ -290,15 +306,16 @@ func (n *NodeInfo) Snapshot() fwk.NodeInfo {
|
|||
// SnapshotConcrete returns a copy of this node, Except that ImageStates is copied without the Nodes field.
|
||||
func (n *NodeInfo) SnapshotConcrete() *NodeInfo {
|
||||
clone := &NodeInfo{
|
||||
node: n.node,
|
||||
Requested: n.Requested.Clone(),
|
||||
NonZeroRequested: n.NonZeroRequested.Clone(),
|
||||
Allocatable: n.Allocatable.Clone(),
|
||||
UsedPorts: make(fwk.HostPortInfo),
|
||||
ImageStates: make(map[string]*fwk.ImageStateSummary),
|
||||
PVCRefCounts: make(map[string]int),
|
||||
Generation: n.Generation,
|
||||
DeclaredFeatures: n.DeclaredFeatures.Clone(),
|
||||
node: n.node,
|
||||
Requested: n.Requested.Clone(),
|
||||
NonZeroRequested: n.NonZeroRequested.Clone(),
|
||||
Allocatable: n.Allocatable.Clone(),
|
||||
UsedPorts: make(fwk.HostPortInfo),
|
||||
ImageStates: make(map[string]*fwk.ImageStateSummary),
|
||||
PVCRefCounts: make(map[string]int),
|
||||
Generation: n.Generation,
|
||||
DeclaredFeatures: n.DeclaredFeatures.Clone(),
|
||||
NativeDRAClaimStates: make(map[types.UID]*fwk.NativeDRAClaimAllocationState),
|
||||
}
|
||||
if len(n.Pods) > 0 {
|
||||
clone.Pods = append([]fwk.PodInfo(nil), n.Pods...)
|
||||
|
|
@ -329,6 +346,9 @@ func (n *NodeInfo) SnapshotConcrete() *NodeInfo {
|
|||
for key, value := range n.PVCRefCounts {
|
||||
clone.PVCRefCounts[key] = value
|
||||
}
|
||||
for key, value := range n.NativeDRAClaimStates {
|
||||
clone.NativeDRAClaimStates[key] = value.Snapshot()
|
||||
}
|
||||
return clone
|
||||
}
|
||||
|
||||
|
|
@ -441,6 +461,38 @@ func (n *NodeInfo) update(podInfo fwk.PodInfo, sign int64) {
|
|||
n.updatePVCRefCounts(podInfo.GetPod(), sign > 0)
|
||||
|
||||
n.Generation = nextGeneration()
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(features.DRANativeResources) {
|
||||
n.updateNativeDRAClaimState(podInfo, sign)
|
||||
}
|
||||
}
|
||||
|
||||
// updateNativeDRAClaimState updates the NodeInfo based on DRA native resource claims in the pod.
|
||||
func (n *NodeInfo) updateNativeDRAClaimState(podInfo fwk.PodInfo, sign int64) {
|
||||
pod := podInfo.GetPod()
|
||||
|
||||
if n.NativeDRAClaimStates == nil {
|
||||
n.NativeDRAClaimStates = make(map[types.UID]*fwk.NativeDRAClaimAllocationState)
|
||||
}
|
||||
|
||||
for _, claimStatus := range pod.Status.NativeResourceClaimStatus {
|
||||
claimUID := claimStatus.ClaimInfo.UID
|
||||
if _, exists := n.NativeDRAClaimStates[claimUID]; !exists {
|
||||
n.NativeDRAClaimStates[claimUID] = &fwk.NativeDRAClaimAllocationState{
|
||||
ConsumerPods: sets.Set[types.UID]{},
|
||||
}
|
||||
}
|
||||
state := n.NativeDRAClaimStates[claimUID]
|
||||
|
||||
if sign > 0 { // Pod Added
|
||||
state.ConsumerPods.Insert(pod.UID)
|
||||
} else { // Pod Removed
|
||||
state.ConsumerPods.Delete(pod.UID)
|
||||
if state.ConsumerPods.Len() == 0 {
|
||||
delete(n.NativeDRAClaimStates, claimUID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// updateUsedPorts updates the UsedPorts of NodeInfo.
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import (
|
|||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
ndf "k8s.io/component-helpers/nodedeclaredfeatures"
|
||||
"k8s.io/klog/v2"
|
||||
|
|
@ -287,6 +288,10 @@ type NodeInfo interface {
|
|||
Snapshot() NodeInfo
|
||||
// String returns representation of human readable format of this NodeInfo.
|
||||
String() string
|
||||
// IsNativeDRAClaimAllocated checks if the given native resource DRA claim UID has already been allocated on this node.
|
||||
IsNativeResourceDRAClaimAllocated(claimUID types.UID) bool
|
||||
// GetNativeDRAClaimStates returns the native DRA claim allocation states on this node.
|
||||
GetNativeResourceDRAClaimStates() map[types.UID]*NativeDRAClaimAllocationState
|
||||
|
||||
// AddPodInfo adds pod information to this NodeInfo.
|
||||
// Consider using this instead of AddPod if a PodInfo is already computed.
|
||||
|
|
@ -636,3 +641,18 @@ func (h HostPortInfo) sanitize(ip, protocol *string) {
|
|||
*protocol = string(v1.ProtocolTCP)
|
||||
}
|
||||
}
|
||||
|
||||
// NativeDRAClaimAllocationState holds information about a native resource DRA claim's allocation on a node.
|
||||
type NativeDRAClaimAllocationState struct {
|
||||
// Pods using this claim on this node.
|
||||
ConsumerPods sets.Set[types.UID]
|
||||
}
|
||||
|
||||
func (s *NativeDRAClaimAllocationState) Snapshot() *NativeDRAClaimAllocationState {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
return &NativeDRAClaimAllocationState{
|
||||
ConsumerPods: s.ConsumerPods.Clone(),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue