mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-15 08:47:59 -05:00
* First version of batching w/out signatures. * First version of pod signatures. * Integrate batching with signatures. * Fix merge conflicts. * Fixes from self-review. * Test fixes. * Fix a bug that limited batches to size 2 Also add some new high-level logging and simplify the pod affinity signature. * Re-enable batching on perf tests for now. * fwk.NewStatus(fwk.Success) * Review feedback. * Review feedback. * Comment fix. * Two plugin specific unit tests.: * Add cycle state to the sign call, apply to topo spread. Also add unit tests for several plugi signature calls. * Review feedback. * Switch to distinct stats for hint and store calls. * Switch signature from string to []byte * Revert cyclestate in signs. Update node affinity. Node affinity now sorts all of the various nested arrays in the structure. CycleState no longer in signature; revert to signing fewer cases for pod spread. * hack/update-vendor.sh * Disable signatures when extenders are configured. * Update pkg/scheduler/framework/runtime/batch.go Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com> * Update staging/src/k8s.io/kube-scheduler/framework/interface.go Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com> * Review feedback. * Disable node resource signatures when extended DRA enabled. * Review feedback. * Update pkg/scheduler/framework/plugins/imagelocality/image_locality.go Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com> * Update pkg/scheduler/framework/interface.go Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com> * Update pkg/scheduler/framework/plugins/nodedeclaredfeatures/nodedeclaredfeatures.go Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com> * Update pkg/scheduler/framework/runtime/batch.go Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com> * Review feedback. * Fixes for review suggestions. * Add integration tests. * Linter fixes, test fix. * Whitespace fix. * Remove broken test. * Unschedulable test. * Remove go.mod changes. --------- Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>
165 lines
7.3 KiB
Go
165 lines
7.3 KiB
Go
/*
|
|
Copyright 2019 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package nodeunschedulable
|
|
|
|
import (
|
|
"context"
|
|
|
|
v1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
|
v1helper "k8s.io/component-helpers/scheduling/corev1"
|
|
"k8s.io/klog/v2"
|
|
fwk "k8s.io/kube-scheduler/framework"
|
|
"k8s.io/kubernetes/pkg/features"
|
|
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
|
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
|
|
"k8s.io/kubernetes/pkg/scheduler/util"
|
|
)
|
|
|
|
// NodeUnschedulable plugin filters nodes that set node.Spec.Unschedulable=true unless
|
|
// the pod tolerates {key=node.kubernetes.io/unschedulable, effect:NoSchedule} taint.
|
|
type NodeUnschedulable struct {
|
|
enableSchedulingQueueHint bool
|
|
}
|
|
|
|
var _ fwk.FilterPlugin = &NodeUnschedulable{}
|
|
var _ fwk.EnqueueExtensions = &NodeUnschedulable{}
|
|
var _ fwk.SignPlugin = &NodeUnschedulable{}
|
|
|
|
// Name is the name of the plugin used in the plugin registry and configurations.
|
|
const Name = names.NodeUnschedulable
|
|
|
|
const (
|
|
// ErrReasonUnknownCondition is used for NodeUnknownCondition predicate error.
|
|
ErrReasonUnknownCondition = "node(s) had unknown conditions"
|
|
// ErrReasonUnschedulable is used for NodeUnschedulable predicate error.
|
|
ErrReasonUnschedulable = "node(s) were unschedulable"
|
|
)
|
|
|
|
// EventsToRegister returns the possible events that may make a Pod
|
|
// failed by this plugin schedulable.
|
|
func (pl *NodeUnschedulable) EventsToRegister(_ context.Context) ([]fwk.ClusterEventWithHint, error) {
|
|
if !pl.enableSchedulingQueueHint {
|
|
return []fwk.ClusterEventWithHint{
|
|
// A note about UpdateNodeLabel event:
|
|
// Ideally, it's supposed to register only Add | UpdateNodeTaint because UpdateNodeLabel will never change the result from this plugin.
|
|
// But, we may miss Node/Add event due to preCheck, and we decided to register UpdateNodeTaint | UpdateNodeLabel for all plugins registering Node/Add.
|
|
// See: https://github.com/kubernetes/kubernetes/issues/109437
|
|
{Event: fwk.ClusterEvent{Resource: fwk.Node, ActionType: fwk.Add | fwk.UpdateNodeTaint | fwk.UpdateNodeLabel}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
|
}, nil
|
|
}
|
|
|
|
return []fwk.ClusterEventWithHint{
|
|
// When QueueingHint is enabled, we don't use preCheck and we don't need to register UpdateNodeLabel event.
|
|
{Event: fwk.ClusterEvent{Resource: fwk.Node, ActionType: fwk.Add | fwk.UpdateNodeTaint}, QueueingHintFn: pl.isSchedulableAfterNodeChange},
|
|
// When the QueueingHint feature is enabled,
|
|
// the scheduling queue uses Pod/Update Queueing Hint
|
|
// to determine whether a Pod's update makes the Pod schedulable or not.
|
|
// https://github.com/kubernetes/kubernetes/pull/122234
|
|
{Event: fwk.ClusterEvent{Resource: fwk.Pod, ActionType: fwk.UpdatePodToleration}, QueueingHintFn: pl.isSchedulableAfterPodTolerationChange},
|
|
}, nil
|
|
}
|
|
|
|
// isSchedulableAfterPodTolerationChange is invoked whenever a pod's toleration changed.
|
|
func (pl *NodeUnschedulable) isSchedulableAfterPodTolerationChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (fwk.QueueingHint, error) {
|
|
_, modifiedPod, err := util.As[*v1.Pod](oldObj, newObj)
|
|
if err != nil {
|
|
return fwk.Queue, err
|
|
}
|
|
|
|
if pod.UID == modifiedPod.UID {
|
|
// Note: we don't need to check oldPod tolerations the taint because:
|
|
// - Taint can be added, but can't be modified nor removed.
|
|
// - If the Pod already has the toleration, it shouldn't have rejected by this plugin in the first place.
|
|
// Meaning, here this Pod has been rejected by this plugin, and hence it shouldn't have the toleration yet.
|
|
if v1helper.TolerationsTolerateTaint(logger, modifiedPod.Spec.Tolerations, &v1.Taint{
|
|
Key: v1.TaintNodeUnschedulable,
|
|
Effect: v1.TaintEffectNoSchedule,
|
|
}, utilfeature.DefaultFeatureGate.Enabled(features.TaintTolerationComparisonOperators)) {
|
|
// This update makes the pod tolerate the unschedulable taint.
|
|
logger.V(5).Info("a new toleration is added for the unschedulable Pod, and it may make it schedulable", "pod", klog.KObj(modifiedPod))
|
|
return fwk.Queue, nil
|
|
}
|
|
logger.V(5).Info("a new toleration is added for the unschedulable Pod, but it's an unrelated toleration", "pod", klog.KObj(modifiedPod))
|
|
return fwk.QueueSkip, nil
|
|
}
|
|
|
|
logger.V(5).Info("a new toleration is added for a Pod, but it's an unrelated Pod and wouldn't change the TaintToleration plugin's decision", "pod", klog.KObj(modifiedPod))
|
|
|
|
return fwk.QueueSkip, nil
|
|
}
|
|
|
|
// isSchedulableAfterNodeChange is invoked for all node events reported by
|
|
// an informer. It checks whether that change made a previously unschedulable
|
|
// pod schedulable.
|
|
func (pl *NodeUnschedulable) isSchedulableAfterNodeChange(logger klog.Logger, pod *v1.Pod, oldObj, newObj interface{}) (fwk.QueueingHint, error) {
|
|
originalNode, modifiedNode, err := util.As[*v1.Node](oldObj, newObj)
|
|
if err != nil {
|
|
return fwk.Queue, err
|
|
}
|
|
|
|
// We queue this Pod when -
|
|
// 1. the node is updated from unschedulable to schedulable.
|
|
// 2. the node is added and is schedulable.
|
|
if (originalNode != nil && originalNode.Spec.Unschedulable && !modifiedNode.Spec.Unschedulable) ||
|
|
(originalNode == nil && !modifiedNode.Spec.Unschedulable) {
|
|
logger.V(5).Info("node was created or updated, pod may be schedulable now", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
|
return fwk.Queue, nil
|
|
}
|
|
|
|
logger.V(5).Info("node was created or updated, but it doesn't make this pod schedulable", "pod", klog.KObj(pod), "node", klog.KObj(modifiedNode))
|
|
return fwk.QueueSkip, nil
|
|
}
|
|
|
|
// Name returns name of the plugin. It is used in logs, etc.
|
|
func (pl *NodeUnschedulable) Name() string {
|
|
return Name
|
|
}
|
|
|
|
// Feasibility and scoring based on the pod's tolerations.
|
|
func (pl *NodeUnschedulable) SignPod(ctx context.Context, pod *v1.Pod) ([]fwk.SignFragment, *fwk.Status) {
|
|
return []fwk.SignFragment{
|
|
{Key: fwk.TolerationsSignerName, Value: fwk.TolerationsSigner(pod)},
|
|
}, nil
|
|
}
|
|
|
|
// Filter invoked at the filter extension point.
|
|
func (pl *NodeUnschedulable) Filter(ctx context.Context, _ fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
|
|
node := nodeInfo.Node()
|
|
|
|
if !node.Spec.Unschedulable {
|
|
return nil
|
|
}
|
|
|
|
logger := klog.FromContext(ctx)
|
|
// If pod tolerate unschedulable taint, it's also tolerate `node.Spec.Unschedulable`.
|
|
podToleratesUnschedulable := v1helper.TolerationsTolerateTaint(logger, pod.Spec.Tolerations, &v1.Taint{
|
|
Key: v1.TaintNodeUnschedulable,
|
|
Effect: v1.TaintEffectNoSchedule,
|
|
}, utilfeature.DefaultFeatureGate.Enabled(features.TaintTolerationComparisonOperators))
|
|
if !podToleratesUnschedulable {
|
|
return fwk.NewStatus(fwk.UnschedulableAndUnresolvable, ErrReasonUnschedulable)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// New initializes a new plugin and returns it.
|
|
func New(_ context.Context, _ runtime.Object, _ fwk.Handle, fts feature.Features) (fwk.Plugin, error) {
|
|
return &NodeUnschedulable{enableSchedulingQueueHint: fts.EnableSchedulingQueueHint}, nil
|
|
}
|