kubernetes/pkg/scheduler/framework/runtime/batch_test.go
bwsalmon 854e67bb51
KEP 5598: Opportunistic Batching (#135231)
* First version of batching w/out signatures.

* First version of pod signatures.

* Integrate batching with signatures.

* Fix merge conflicts.

* Fixes from self-review.

* Test fixes.

* Fix a bug that limited batches to size 2
Also add some new high-level logging and
simplify the pod affinity signature.

* Re-enable batching on perf tests for now.

* fwk.NewStatus(fwk.Success)

* Review feedback.

* Review feedback.

* Comment fix.

* Two plugin specific unit tests.:

* Add cycle state to the sign call, apply to topo spread.
Also add unit tests for several plugi signature
calls.

* Review feedback.

* Switch to distinct stats for hint and store calls.

* Switch signature from string to []byte

* Revert cyclestate in signs. Update node affinity.
Node affinity now sorts all of the various
nested arrays in the structure. CycleState no
longer in signature; revert to signing fewer
cases for pod spread.

* hack/update-vendor.sh

* Disable signatures when extenders are configured.

* Update pkg/scheduler/framework/runtime/batch.go

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>

* Update staging/src/k8s.io/kube-scheduler/framework/interface.go

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>

* Review feedback.

* Disable node resource signatures when extended DRA enabled.

* Review feedback.

* Update pkg/scheduler/framework/plugins/imagelocality/image_locality.go

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>

* Update pkg/scheduler/framework/interface.go

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>

* Update pkg/scheduler/framework/plugins/nodedeclaredfeatures/nodedeclaredfeatures.go

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>

* Update pkg/scheduler/framework/runtime/batch.go

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>

* Review feedback.

* Fixes for review suggestions.

* Add integration tests.

* Linter fixes, test fix.

* Whitespace fix.

* Remove broken test.

* Unschedulable test.

* Remove go.mod changes.

---------

Co-authored-by: Maciej Skoczeń <87243939+macsko@users.noreply.github.com>
2025-11-12 21:51:37 -08:00

388 lines
12 KiB
Go

/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package runtime
import (
"bytes"
"context"
"fmt"
"log"
"testing"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2/ktesting"
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
"k8s.io/kubernetes/pkg/scheduler/framework"
)
// nodeInfoLister declares a fwk.NodeInfo type for testing.
type nodeInfoLister []fwk.NodeInfo
// Get returns a fake node object in the fake nodes.
func (nodes nodeInfoLister) Get(nodeName string) (fwk.NodeInfo, error) {
for _, node := range nodes {
if node != nil && node.Node().Name == nodeName {
return node, nil
}
}
return nil, fmt.Errorf("unable to find node: %s", nodeName)
}
// List lists all nodes.
func (nodes nodeInfoLister) List() ([]fwk.NodeInfo, error) {
return nodes, nil
}
// HavePodsWithAffinityList is supposed to list nodes with at least one pod with affinity. For the fake lister
// we just return everything.
func (nodes nodeInfoLister) HavePodsWithAffinityList() ([]fwk.NodeInfo, error) {
return nodes, nil
}
// HavePodsWithRequiredAntiAffinityList is supposed to list nodes with at least one pod with
// required anti-affinity. For the fake lister we just return everything.
func (nodes nodeInfoLister) HavePodsWithRequiredAntiAffinityList() ([]fwk.NodeInfo, error) {
return nodes, nil
}
type sharedLister struct {
nodes nodeInfoLister
}
func (s sharedLister) NodeInfos() fwk.NodeInfoLister {
return s.nodes
}
type storageInfoListerContract struct{}
func (c *storageInfoListerContract) IsPVCUsedByPods(_ string) bool {
return false
}
func (s sharedLister) StorageInfos() fwk.StorageInfoLister {
return &storageInfoListerContract{}
}
var batchRegistry = func() Registry {
r := make(Registry)
err := r.Register("batchTest", newBatchTestPlugin)
if err != nil {
log.Fatal("Couldn't register test.")
}
err = r.Register(queueSortPlugin, newQueueSortPlugin)
if err != nil {
log.Fatal("Couldn't register test.")
}
err = r.Register(bindPlugin, newBindPlugin)
if err != nil {
log.Fatal("Couldn't register test.")
}
return r
}()
type BatchTestPlugin struct{}
func (pl *BatchTestPlugin) Name() string {
return "batchTest"
}
const blockingPodPrefix = 'b'
func blockingPodID(suffix string) string {
return fmt.Sprintf("%c-%s", blockingPodPrefix, suffix)
}
const nonBlockingPodPrefix = 'a'
func nonBlockingPodID(suffix string) string {
return fmt.Sprintf("%c-%s", nonBlockingPodPrefix, suffix)
}
// Test plugin assumes that each node can hold only one pod whose id begins with blockingPrefix. This allows
// us to construct pods that block future selves or not.
func (pl *BatchTestPlugin) Filter(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
podID := pod.GetUID()
for _, nodePod := range nodeInfo.GetPods() {
npid := nodePod.GetPod().GetUID()
if podID[0] == blockingPodPrefix && npid[0] == blockingPodPrefix {
return fwk.NewStatus(fwk.Unschedulable, "unsched")
}
}
return nil
}
func newBatchTestPlugin(_ context.Context, injArgs runtime.Object, f fwk.Handle) (fwk.Plugin, error) {
return &BatchTestPlugin{}, nil
}
func newBatchTestFramework(ctx context.Context, r Registry) (framework.Framework, *sharedLister, error) {
plugins := &config.Plugins{}
profile := config.KubeSchedulerProfile{Plugins: plugins}
if _, ok := r[queueSortPlugin]; !ok {
r[queueSortPlugin] = newQueueSortPlugin
}
if _, ok := r[bindPlugin]; !ok {
r[bindPlugin] = newBindPlugin
}
if len(profile.Plugins.QueueSort.Enabled) == 0 {
profile.Plugins.QueueSort.Enabled = append(profile.Plugins.QueueSort.Enabled, config.Plugin{Name: queueSortPlugin})
}
if len(profile.Plugins.Bind.Enabled) == 0 {
profile.Plugins.Bind.Enabled = append(profile.Plugins.Bind.Enabled, config.Plugin{Name: bindPlugin})
}
profile.Plugins.Filter.Enabled = []config.Plugin{{Name: "batchTest"}}
lister := &sharedLister{nodes: nodeInfoLister{}}
ret, err := NewFramework(ctx, r, &profile, WithSnapshotSharedLister(lister))
return ret, lister, err
}
type testSortedScoredNodes struct {
Nodes []string
}
var _ framework.SortedScoredNodes = &testSortedScoredNodes{}
func (t *testSortedScoredNodes) Pop() string {
ret := t.Nodes[0]
t.Nodes = t.Nodes[1:]
return ret
}
func (t *testSortedScoredNodes) Len() int {
return len(t.Nodes)
}
func newTestNodes(n []string) *testSortedScoredNodes {
return &testSortedScoredNodes{Nodes: n}
}
func newTestSigFunc(sig *fwk.PodSignature) SignatureFunc {
return func(h fwk.Handle, ctx context.Context, p *v1.Pod, shouldRecordStats bool) fwk.PodSignature {
return *sig
}
}
func TestBatchBasic(t *testing.T) {
// This test first let OpportunisticBatch handle the first pod, and then see how it behaves with the second pod.
tests := []struct {
name string
firstPodID string
firstSig string
firstPodScheduledSuccessfully bool
// firstChosenNode is supposed to set only if firstPodScheduledSuccessfully is true.
firstChosenNode string
// firstOtherNodes is supposed to set only if firstPodScheduledSuccessfully is true.
firstOtherNodes framework.SortedScoredNodes
// if it's true, the test case behaves as if there is another pod handled by another profile between the first and second pod.
skipPod bool
secondPodID string
secondSig string
secondChosenNode string
secondOtherNodes framework.SortedScoredNodes
expectedHint string
expectedState *batchState
}{
{
name: "a second pod with the same signature gets a hint",
firstPodID: blockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{"n1"}),
firstPodScheduledSuccessfully: true,
secondPodID: blockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n1",
expectedHint: "n1",
},
{
name: "a second pod with a different signature doesn't get a hint",
firstPodID: nonBlockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{"n1"}),
firstPodScheduledSuccessfully: true,
secondPodID: nonBlockingPodID("2"),
secondSig: "sig2",
secondChosenNode: "n1",
expectedHint: "",
},
{
name: "a second pod doesn't get a hint if it's not 1-pod-per-node",
firstPodID: nonBlockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{"n1"}),
firstPodScheduledSuccessfully: true,
secondPodID: nonBlockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n1",
expectedHint: "",
},
{
name: "pod doesn't get hint if previous pod didn't scheduled",
firstPodID: blockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{"n1"}),
firstPodScheduledSuccessfully: false,
secondPodID: blockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n1",
expectedHint: "",
},
{
name: "empty list",
firstPodID: blockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{}),
firstPodScheduledSuccessfully: true,
secondPodID: blockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n4",
expectedHint: "",
},
{
name: "nil list",
firstPodID: blockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: nil,
firstPodScheduledSuccessfully: true,
secondPodID: blockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n4",
expectedHint: "",
},
{
name: "pod doesn't get hint because the previous pod is to a different profile",
firstPodID: blockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{"n1"}),
firstPodScheduledSuccessfully: true,
skipPod: true,
secondPodID: blockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n1",
expectedHint: "",
},
{
name: "pod uses batch from preceding pod and leaves remaining batch to next pod",
firstPodID: blockingPodID("1"),
firstSig: "sig",
firstChosenNode: "n3",
firstOtherNodes: newTestNodes([]string{"n1", "n2"}),
firstPodScheduledSuccessfully: true,
secondPodID: blockingPodID("2"),
secondSig: "sig",
secondChosenNode: "n1",
expectedHint: "n1",
expectedState: &batchState{
signature: []byte("sig"),
sortedNodes: newTestNodes([]string{"n2"}),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
testFwk, lister, err := newBatchTestFramework(ctx, batchRegistry)
if err != nil {
t.Fatalf("Failed to create framework for testing: %v", err)
}
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod",
UID: types.UID(tt.firstPodID),
},
}
signature := fwk.PodSignature(tt.firstSig)
batch := newOpportunisticBatch(testFwk, newTestSigFunc(&signature))
state := framework.NewCycleState()
// Run the first "pod" through
hint, _ := batch.GetNodeHint(ctx, pod, state, 1)
if hint != "" {
t.Fatalf("Got unexpected hint %s", hint)
}
if tt.firstPodScheduledSuccessfully {
batch.StoreScheduleResults(ctx, []byte(tt.firstSig), hint, tt.firstChosenNode, tt.firstOtherNodes, 1)
}
var cycleCount int64 = 2
if tt.skipPod {
cycleCount = 3
}
// Run the second pod
pod2 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod2",
UID: types.UID(tt.secondPodID),
},
}
lastChosenNode := framework.NewNodeInfo(pod)
lastChosenNode.SetNode(&v1.Node{ObjectMeta: metav1.ObjectMeta{
Name: tt.firstChosenNode,
UID: types.UID(tt.firstChosenNode),
}})
lister.nodes = nodeInfoLister{lastChosenNode}
signature = fwk.PodSignature(tt.secondSig)
hint, _ = batch.GetNodeHint(ctx, pod2, state, cycleCount)
if hint != tt.expectedHint {
t.Fatalf("Got hint '%s' expected '%s' for test '%s'", hint, tt.expectedHint, tt.name)
}
batch.StoreScheduleResults(ctx, []byte(tt.secondSig), hint, tt.secondChosenNode, tt.secondOtherNodes, cycleCount)
batchEmpty := batch.state == nil || batch.state.sortedNodes == nil || batch.state.sortedNodes.Len() == 0
expectedEmpty := tt.expectedState == nil
if batchEmpty != expectedEmpty {
t.Fatalf("Expected empty %t, got empty %t for %s", expectedEmpty, batchEmpty, tt.name)
}
if !expectedEmpty {
if !bytes.Equal(batch.state.signature, []byte(tt.expectedState.signature)) {
t.Fatalf("Got state signature '%s' expected '%s' for test '%s'", batch.state.signature, tt.expectedState.signature, tt.name)
}
nodesDiff := cmp.Diff(tt.expectedState.sortedNodes, batch.state.sortedNodes)
if nodesDiff != "" {
t.Fatalf("Diff between sortedNodes (-want,+got):\n%s", nodesDiff)
}
}
})
}
}