2015-07-27 21:21:37 -04:00
/ *
2016-06-02 20:25:58 -04:00
Copyright 2015 The Kubernetes Authors .
2015-07-27 21:21:37 -04:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controller
import (
2019-12-17 03:01:53 -05:00
"context"
2016-11-18 15:50:17 -05:00
"encoding/json"
2015-07-27 21:21:37 -04:00
"fmt"
2017-05-17 19:53:46 -04:00
"math"
2015-07-27 21:21:37 -04:00
"math/rand"
"net/http/httptest"
"sort"
"sync"
"testing"
"time"
2018-03-19 19:47:20 -04:00
apps "k8s.io/api/apps/v1"
2019-12-13 11:28:11 -05:00
v1 "k8s.io/api/core/v1"
2017-01-25 08:39:54 -05:00
apiequality "k8s.io/apimachinery/pkg/api/equality"
2020-06-02 04:39:39 -04:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-01-11 09:09:48 -05:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
2018-05-01 10:54:37 -04:00
"k8s.io/apimachinery/pkg/runtime/schema"
2023-07-10 15:52:28 -04:00
"k8s.io/apimachinery/pkg/types"
2017-01-11 09:09:48 -05:00
"k8s.io/apimachinery/pkg/util/sets"
2017-01-24 09:35:22 -05:00
"k8s.io/apimachinery/pkg/util/uuid"
2021-02-17 15:39:42 -05:00
utilfeature "k8s.io/apiserver/pkg/util/feature"
2025-07-01 06:35:28 -04:00
"k8s.io/client-go/informers"
2017-06-23 16:56:37 -04:00
clientset "k8s.io/client-go/kubernetes"
2017-08-07 07:29:39 -04:00
"k8s.io/client-go/kubernetes/fake"
2019-12-13 11:28:11 -05:00
clientscheme "k8s.io/client-go/kubernetes/scheme"
2017-01-19 13:27:59 -05:00
restclient "k8s.io/client-go/rest"
2017-01-24 09:11:51 -05:00
"k8s.io/client-go/tools/cache"
2017-01-30 13:39:54 -05:00
"k8s.io/client-go/tools/record"
2017-01-23 13:37:22 -05:00
utiltesting "k8s.io/client-go/util/testing"
2021-02-17 15:39:42 -05:00
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/apis/core"
2017-11-08 17:34:54 -05:00
_ "k8s.io/kubernetes/pkg/apis/core/install"
2017-08-04 19:34:28 -04:00
"k8s.io/kubernetes/pkg/controller/testutil"
2021-02-17 15:39:42 -05:00
"k8s.io/kubernetes/pkg/features"
2015-08-05 18:03:47 -04:00
"k8s.io/kubernetes/pkg/securitycontext"
2023-07-12 04:45:00 -04:00
"k8s.io/kubernetes/test/utils/ktesting"
2021-09-17 05:48:22 -04:00
testingclock "k8s.io/utils/clock/testing"
2024-02-29 13:00:53 -05:00
"k8s.io/utils/ptr"
2017-09-20 01:24:07 -04:00
2023-07-07 00:01:06 -04:00
"github.com/google/go-cmp/cmp"
2017-09-20 01:24:07 -04:00
"github.com/stretchr/testify/assert"
2024-07-05 15:06:21 -04:00
"github.com/stretchr/testify/require"
2015-07-27 21:21:37 -04:00
)
// NewFakeControllerExpectationsLookup creates a fake store for PodExpectations.
2021-09-17 05:48:22 -04:00
func NewFakeControllerExpectationsLookup ( ttl time . Duration ) ( * ControllerExpectations , * testingclock . FakeClock ) {
2015-07-27 21:21:37 -04:00
fakeTime := time . Date ( 2009 , time . November , 10 , 23 , 0 , 0 , 0 , time . UTC )
2021-09-17 05:48:22 -04:00
fakeClock := testingclock . NewFakeClock ( fakeTime )
2019-07-10 00:00:52 -04:00
ttlPolicy := & cache . TTLPolicy { TTL : ttl , Clock : fakeClock }
2015-07-27 21:21:37 -04:00
ttlStore := cache . NewFakeExpirationStore (
ExpKeyFunc , nil , ttlPolicy , fakeClock )
return & ControllerExpectations { ttlStore } , fakeClock
}
2016-11-18 15:50:17 -05:00
func newReplicationController ( replicas int ) * v1 . ReplicationController {
rc := & v1 . ReplicationController {
2018-05-01 10:54:37 -04:00
TypeMeta : metav1 . TypeMeta { APIVersion : "v1" } ,
2017-01-16 22:38:19 -05:00
ObjectMeta : metav1 . ObjectMeta {
2016-07-26 11:13:18 -04:00
UID : uuid . NewUUID ( ) ,
2015-07-27 21:21:37 -04:00
Name : "foobar" ,
2017-01-21 22:36:02 -05:00
Namespace : metav1 . NamespaceDefault ,
2015-07-27 21:21:37 -04:00
ResourceVersion : "18" ,
} ,
2016-11-18 15:50:17 -05:00
Spec : v1 . ReplicationControllerSpec {
2025-07-07 07:13:39 -04:00
Replicas : ptr . To [ int32 ] ( int32 ( replicas ) ) ,
2015-07-27 21:21:37 -04:00
Selector : map [ string ] string { "foo" : "bar" } ,
2016-11-18 15:50:17 -05:00
Template : & v1 . PodTemplateSpec {
2017-01-16 22:38:19 -05:00
ObjectMeta : metav1 . ObjectMeta {
2015-07-27 21:21:37 -04:00
Labels : map [ string ] string {
"name" : "foo" ,
"type" : "production" ,
} ,
} ,
2016-11-18 15:50:17 -05:00
Spec : v1 . PodSpec {
Containers : [ ] v1 . Container {
2015-07-27 21:21:37 -04:00
{
2018-10-05 15:59:38 -04:00
Image : "foo/bar" ,
2016-11-18 15:50:17 -05:00
TerminationMessagePath : v1 . TerminationMessagePathDefault ,
ImagePullPolicy : v1 . PullIfNotPresent ,
2015-07-27 21:21:37 -04:00
SecurityContext : securitycontext . ValidSecurityContextWithContainerDefaults ( ) ,
} ,
} ,
2016-11-18 15:50:17 -05:00
RestartPolicy : v1 . RestartPolicyAlways ,
DNSPolicy : v1 . DNSDefault ,
2015-07-27 21:21:37 -04:00
NodeSelector : map [ string ] string {
"baz" : "blah" ,
} ,
} ,
} ,
} ,
}
return rc
}
// create count pods with the given phase for the given rc (same selectors and namespace), and add them to the store.
2016-11-18 15:50:17 -05:00
func newPodList ( store cache . Store , count int , status v1 . PodPhase , rc * v1 . ReplicationController ) * v1 . PodList {
pods := [ ] v1 . Pod { }
2015-07-27 21:21:37 -04:00
for i := 0 ; i < count ; i ++ {
2016-11-18 15:50:17 -05:00
newPod := v1 . Pod {
2017-01-16 22:38:19 -05:00
ObjectMeta : metav1 . ObjectMeta {
2015-07-27 21:21:37 -04:00
Name : fmt . Sprintf ( "pod%d" , i ) ,
Labels : rc . Spec . Selector ,
Namespace : rc . Namespace ,
} ,
2016-11-18 15:50:17 -05:00
Status : v1 . PodStatus { Phase : status } ,
2015-07-27 21:21:37 -04:00
}
if store != nil {
store . Add ( & newPod )
}
pods = append ( pods , newPod )
}
2016-11-18 15:50:17 -05:00
return & v1 . PodList {
2015-07-27 21:21:37 -04:00
Items : pods ,
}
}
2023-07-10 15:52:28 -04:00
func newReplicaSet ( name string , replicas int , rsUuid types . UID ) * apps . ReplicaSet {
2018-03-19 19:47:20 -04:00
return & apps . ReplicaSet {
2018-05-01 10:54:37 -04:00
TypeMeta : metav1 . TypeMeta { APIVersion : "v1" } ,
2017-02-08 17:18:13 -05:00
ObjectMeta : metav1 . ObjectMeta {
2023-07-10 15:52:28 -04:00
UID : rsUuid ,
2017-02-08 17:18:13 -05:00
Name : name ,
Namespace : metav1 . NamespaceDefault ,
ResourceVersion : "18" ,
} ,
2018-03-19 19:47:20 -04:00
Spec : apps . ReplicaSetSpec {
2025-07-07 07:13:39 -04:00
Replicas : ptr . To [ int32 ] ( int32 ( replicas ) ) ,
2017-02-08 17:18:13 -05:00
Selector : & metav1 . LabelSelector { MatchLabels : map [ string ] string { "foo" : "bar" } } ,
Template : v1 . PodTemplateSpec {
ObjectMeta : metav1 . ObjectMeta {
Labels : map [ string ] string {
"name" : "foo" ,
"type" : "production" ,
} ,
} ,
Spec : v1 . PodSpec {
Containers : [ ] v1 . Container {
{
2018-10-05 15:59:38 -04:00
Image : "foo/bar" ,
2017-02-08 17:18:13 -05:00
TerminationMessagePath : v1 . TerminationMessagePathDefault ,
ImagePullPolicy : v1 . PullIfNotPresent ,
SecurityContext : securitycontext . ValidSecurityContextWithContainerDefaults ( ) ,
} ,
} ,
RestartPolicy : v1 . RestartPolicyAlways ,
DNSPolicy : v1 . DNSDefault ,
NodeSelector : map [ string ] string {
"baz" : "blah" ,
} ,
} ,
} ,
} ,
}
}
2015-07-27 21:21:37 -04:00
func TestControllerExpectations ( t * testing . T ) {
2023-07-12 04:45:00 -04:00
logger , _ := ktesting . NewTestContext ( t )
2015-07-27 21:21:37 -04:00
ttl := 30 * time . Second
e , fakeClock := NewFakeControllerExpectationsLookup ( ttl )
// In practice we can't really have add and delete expectations since we only either create or
// delete replicas in one rc pass, and the rc goes to sleep soon after until the expectations are
// either fulfilled or timeout.
adds , dels := 10 , 30
rc := newReplicationController ( 1 )
// RC fires off adds and deletes at apiserver, then sets expectations
rcKey , err := KeyFunc ( rc )
2024-07-05 15:06:21 -04:00
require . NoError ( t , err , "Couldn't get key for object %#v: %v" , rc , err )
2017-09-20 01:24:07 -04:00
2023-07-12 04:45:00 -04:00
e . SetExpectations ( logger , rcKey , adds , dels )
2015-07-27 21:21:37 -04:00
var wg sync . WaitGroup
for i := 0 ; i < adds + 1 ; i ++ {
wg . Add ( 1 )
go func ( ) {
// In prod this can happen either because of a failed create by the rc
// or after having observed a create via informer
2023-07-12 04:45:00 -04:00
e . CreationObserved ( logger , rcKey )
2015-07-27 21:21:37 -04:00
wg . Done ( )
} ( )
}
wg . Wait ( )
// There are still delete expectations
2023-07-12 04:45:00 -04:00
assert . False ( t , e . SatisfiedExpectations ( logger , rcKey ) , "Rc will sync before expectations are met" )
2017-09-20 01:24:07 -04:00
2015-07-27 21:21:37 -04:00
for i := 0 ; i < dels + 1 ; i ++ {
wg . Add ( 1 )
go func ( ) {
2023-07-12 04:45:00 -04:00
e . DeletionObserved ( logger , rcKey )
2015-07-27 21:21:37 -04:00
wg . Done ( )
} ( )
}
wg . Wait ( )
2023-07-10 15:52:28 -04:00
tests := [ ] struct {
name string
expectationsToSet [ ] int
expireExpectations bool
wantPodExpectations [ ] int64
wantExpectationsSatisfied bool
} {
{
name : "Expectations have been surpassed" ,
expireExpectations : false ,
wantPodExpectations : [ ] int64 { int64 ( - 1 ) , int64 ( - 1 ) } ,
wantExpectationsSatisfied : true ,
} ,
{
name : "Old expectations are cleared because of ttl" ,
expectationsToSet : [ ] int { 1 , 2 } ,
expireExpectations : true ,
wantPodExpectations : [ ] int64 { int64 ( 1 ) , int64 ( 2 ) } ,
wantExpectationsSatisfied : false ,
} ,
}
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
if len ( test . expectationsToSet ) > 0 {
e . SetExpectations ( logger , rcKey , test . expectationsToSet [ 0 ] , test . expectationsToSet [ 1 ] )
}
podExp , exists , err := e . GetExpectations ( rcKey )
2024-07-05 15:06:21 -04:00
require . NoError ( t , err , "Could not get expectations for rc, exists %v and err %v" , exists , err )
2023-07-10 15:52:28 -04:00
assert . True ( t , exists , "Could not get expectations for rc, exists %v and err %v" , exists , err )
add , del := podExp . GetExpectations ( )
assert . Equal ( t , test . wantPodExpectations [ 0 ] , add , "Unexpected pod expectations %#v" , podExp )
assert . Equal ( t , test . wantPodExpectations [ 1 ] , del , "Unexpected pod expectations %#v" , podExp )
assert . Equal ( t , test . wantExpectationsSatisfied , e . SatisfiedExpectations ( logger , rcKey ) , "Expectations are met but the rc will not sync" )
if test . expireExpectations {
fakeClock . Step ( ttl + 1 )
assert . True ( t , e . SatisfiedExpectations ( logger , rcKey ) , "Expectations should have expired but didn't" )
}
} )
}
2015-07-27 21:21:37 -04:00
}
2016-03-04 19:51:01 -05:00
func TestUIDExpectations ( t * testing . T ) {
2023-07-12 04:45:00 -04:00
logger , _ := ktesting . NewTestContext ( t )
2016-03-04 19:51:01 -05:00
uidExp := NewUIDTrackingControllerExpectations ( NewControllerExpectations ( ) )
2023-07-10 15:52:28 -04:00
type test struct {
name string
numReplicas int
}
shuffleTests := func ( tests [ ] test ) {
for i := range tests {
j := rand . Intn ( i + 1 )
tests [ i ] , tests [ j ] = tests [ j ] , tests [ i ]
}
}
getRcDataFrom := func ( test test ) ( string , [ ] string ) {
rc := newReplicationController ( test . numReplicas )
rcName := fmt . Sprintf ( "rc-%v" , test . numReplicas )
2016-03-04 19:51:01 -05:00
rc . Name = rcName
rc . Spec . Selector [ rcName ] = rcName
2023-07-10 15:52:28 -04:00
2016-11-18 15:50:17 -05:00
podList := newPodList ( nil , 5 , v1 . PodRunning , rc )
2016-03-04 19:51:01 -05:00
rcKey , err := KeyFunc ( rc )
if err != nil {
2016-06-14 08:04:38 -04:00
t . Fatalf ( "Couldn't get key for object %#v: %v" , rc , err )
2016-03-04 19:51:01 -05:00
}
2023-07-10 15:52:28 -04:00
2016-03-04 19:51:01 -05:00
rcPodNames := [ ] string { }
for i := range podList . Items {
p := & podList . Items [ i ]
p . Name = fmt . Sprintf ( "%v-%v" , p . Name , rc . Name )
rcPodNames = append ( rcPodNames , PodKey ( p ) )
}
2023-07-12 04:45:00 -04:00
uidExp . ExpectDeletions ( logger , rcKey , rcPodNames )
2023-07-10 15:52:28 -04:00
return rcKey , rcPodNames
2016-03-04 19:51:01 -05:00
}
2023-07-10 15:52:28 -04:00
tests := [ ] test {
{ name : "Replication controller with 2 replicas" , numReplicas : 2 } ,
{ name : "Replication controller with 1 replica" , numReplicas : 1 } ,
{ name : "Replication controller with no replicas" , numReplicas : 0 } ,
{ name : "Replication controller with 5 replicas" , numReplicas : 5 } ,
2016-03-04 19:51:01 -05:00
}
2017-09-20 01:24:07 -04:00
2023-07-10 15:52:28 -04:00
shuffleTests ( tests )
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
2017-09-20 01:24:07 -04:00
2023-07-10 15:52:28 -04:00
rcKey , rcPodNames := getRcDataFrom ( test )
assert . False ( t , uidExp . SatisfiedExpectations ( logger , rcKey ) ,
"Controller %v satisfied expectations before deletion" , rcKey )
2017-09-20 01:24:07 -04:00
2023-07-10 15:52:28 -04:00
for _ , p := range rcPodNames {
uidExp . DeletionObserved ( logger , rcKey , p )
}
2017-09-20 01:24:07 -04:00
2023-07-10 15:52:28 -04:00
assert . True ( t , uidExp . SatisfiedExpectations ( logger , rcKey ) ,
"Controller %v didn't satisfy expectations after deletion" , rcKey )
2016-03-04 19:51:01 -05:00
2023-07-10 15:52:28 -04:00
uidExp . DeleteExpectations ( logger , rcKey )
2015-07-27 21:21:37 -04:00
2023-07-10 15:52:28 -04:00
assert . Nil ( t , uidExp . GetUIDs ( rcKey ) ,
"Failed to delete uid expectations for %v" , rcKey )
} )
2015-07-27 21:21:37 -04:00
}
2023-07-10 15:52:28 -04:00
}
2015-07-27 21:21:37 -04:00
2023-07-10 15:52:28 -04:00
func TestCreatePodsWithGenerateName ( t * testing . T ) {
ns := metav1 . NamespaceDefault
generateName := "hello-"
2015-07-27 21:21:37 -04:00
controllerSpec := newReplicationController ( 1 )
2021-05-20 16:27:21 -04:00
controllerRef := metav1 . NewControllerRef ( controllerSpec , v1 . SchemeGroupVersion . WithKind ( "ReplicationController" ) )
2015-07-27 21:21:37 -04:00
2023-07-10 15:52:28 -04:00
type test struct {
name string
podCreationFunc func ( podControl RealPodControl ) error
wantPod * v1 . Pod
}
var tests = [ ] test {
{
name : "Create pod" ,
podCreationFunc : func ( podControl RealPodControl ) error {
return podControl . CreatePods ( context . TODO ( ) , ns , controllerSpec . Spec . Template , controllerSpec , controllerRef )
} ,
wantPod : & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Labels : controllerSpec . Spec . Template . Labels ,
GenerateName : fmt . Sprintf ( "%s-" , controllerSpec . Name ) ,
} ,
Spec : controllerSpec . Spec . Template . Spec ,
} ,
} ,
{
name : "Create pod with generate name" ,
podCreationFunc : func ( podControl RealPodControl ) error {
// Make sure createReplica sends a POST to the apiserver with a pod from the controllers pod template
return podControl . CreatePodsWithGenerateName ( context . TODO ( ) , ns , controllerSpec . Spec . Template , controllerSpec , controllerRef , generateName )
} ,
wantPod : & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Labels : controllerSpec . Spec . Template . Labels ,
GenerateName : generateName ,
OwnerReferences : [ ] metav1 . OwnerReference { * controllerRef } ,
} ,
Spec : controllerSpec . Spec . Template . Spec ,
} ,
2015-07-27 21:21:37 -04:00
} ,
2021-04-28 23:33:36 -04:00
}
2023-07-10 15:52:28 -04:00
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
body := runtime . EncodeOrDie ( clientscheme . Codecs . LegacyCodec ( v1 . SchemeGroupVersion ) , & v1 . Pod { ObjectMeta : metav1 . ObjectMeta { Name : "empty_pod" } } )
fakeHandler := utiltesting . FakeHandler {
StatusCode : 200 ,
ResponseBody : string ( body ) ,
}
testServer := httptest . NewServer ( & fakeHandler )
defer testServer . Close ( )
2024-08-19 10:38:16 -04:00
clientset := clientset . NewForConfigOrDie ( & restclient . Config { Host : testServer . URL , ContentConfig : restclient . ContentConfig { GroupVersion : & schema . GroupVersion { Group : "" , Version : "v1" } , ContentType : runtime . ContentTypeJSON } } )
2021-04-28 23:33:36 -04:00
2023-07-10 15:52:28 -04:00
podControl := RealPodControl {
KubeClient : clientset ,
Recorder : & record . FakeRecorder { } ,
}
2021-04-28 23:33:36 -04:00
2023-07-10 15:52:28 -04:00
err := test . podCreationFunc ( podControl )
2024-07-05 15:06:21 -04:00
require . NoError ( t , err , "unexpected error: %v" , err )
2021-04-28 23:33:36 -04:00
2023-07-10 15:52:28 -04:00
fakeHandler . ValidateRequest ( t , "/api/v1/namespaces/default/pods" , "POST" , nil )
var actualPod = & v1 . Pod { }
err = json . Unmarshal ( [ ] byte ( fakeHandler . RequestBody ) , actualPod )
2024-07-05 15:06:21 -04:00
require . NoError ( t , err , "unexpected error: %v" , err )
2023-07-10 15:52:28 -04:00
assert . True ( t , apiequality . Semantic . DeepDerivative ( test . wantPod , actualPod ) ,
"Body: %s" , fakeHandler . RequestBody )
} )
2021-04-28 23:33:36 -04:00
}
2015-07-27 21:21:37 -04:00
}
2018-08-24 09:03:51 -04:00
func TestDeletePodsAllowsMissing ( t * testing . T ) {
fakeClient := fake . NewSimpleClientset ( )
podControl := RealPodControl {
KubeClient : fakeClient ,
Recorder : & record . FakeRecorder { } ,
}
controllerSpec := newReplicationController ( 1 )
2021-11-10 17:56:46 -05:00
err := podControl . DeletePod ( context . TODO ( ) , "namespace-name" , "podName" , controllerSpec )
2020-06-02 04:39:39 -04:00
assert . True ( t , apierrors . IsNotFound ( err ) )
2018-08-24 09:03:51 -04:00
}
2023-07-18 13:25:15 -04:00
func TestCountTerminatingPods ( t * testing . T ) {
now := metav1 . Now ( )
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController ( 0 )
podList := newPodList ( nil , 7 , v1 . PodRunning , rc )
podList . Items [ 0 ] . Status . Phase = v1 . PodSucceeded
podList . Items [ 1 ] . Status . Phase = v1 . PodFailed
podList . Items [ 2 ] . Status . Phase = v1 . PodPending
podList . Items [ 2 ] . SetDeletionTimestamp ( & now )
podList . Items [ 3 ] . Status . Phase = v1 . PodRunning
podList . Items [ 3 ] . SetDeletionTimestamp ( & now )
var podPointers [ ] * v1 . Pod
for i := range podList . Items {
podPointers = append ( podPointers , & podList . Items [ i ] )
}
terminatingPods := CountTerminatingPods ( podPointers )
2024-07-05 15:06:21 -04:00
assert . Equal ( t , int32 ( 2 ) , terminatingPods )
2023-07-18 13:25:15 -04:00
terminatingList := FilterTerminatingPods ( podPointers )
2024-07-05 15:06:21 -04:00
assert . Len ( t , terminatingList , int ( 2 ) )
2023-07-18 13:25:15 -04:00
}
2024-02-29 13:00:53 -05:00
func TestClaimedPodFiltering ( t * testing . T ) {
rsUUID := uuid . NewUUID ( )
type podData struct {
podName string
ownerReferences [ ] metav1 . OwnerReference
labels map [ string ] string
}
type test struct {
name string
pods [ ] podData
wantPodNames [ ] string
}
tests := [ ] test {
{
name : "Filters claimed pods" ,
pods : [ ] podData {
// single owner reference
{ podName : "claimed-1" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : rsUUID , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "wrong-selector-1" , labels : map [ string ] string { "foo" : "baz" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : rsUUID , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "non-controller-1" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : rsUUID , Controller : nil } ,
} } ,
{ podName : "other-controller-1" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "other-workload-1" , labels : map [ string ] string { "foo" : "bee" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "standalone-pod-1" , labels : map [ string ] string { "foo" : "beetle" } , ownerReferences : [ ] metav1 . OwnerReference { } } ,
// additional controller owner reference set to controller=false
{ podName : "claimed-2" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( false ) } ,
{ UID : rsUUID , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "wrong-selector-2" , labels : map [ string ] string { "foo" : "baz" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( false ) } ,
{ UID : rsUUID , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "non-controller-2" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( false ) } ,
{ UID : rsUUID , Controller : ptr . To ( false ) } ,
} } ,
{ podName : "other-controller-2" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( false ) } ,
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "other-workload-1" , labels : map [ string ] string { "foo" : "bee" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( false ) } ,
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "standalone-pod-1" , labels : map [ string ] string { "foo" : "beetle" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( false ) } ,
} } ,
// additional controller owner reference set to controller=nil
{ podName : "claimed-3" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) } ,
{ UID : rsUUID , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "wrong-selector-3" , labels : nil , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) } ,
{ UID : rsUUID , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "non-controller-3" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) } ,
{ UID : rsUUID , Controller : nil } ,
} } ,
{ podName : "other-controller-3" , labels : map [ string ] string { "foo" : "bar" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) } ,
{ UID : uuid . NewUUID ( ) , Controller : ptr . To ( true ) } ,
} } ,
{ podName : "other-workload-1" , labels : map [ string ] string { "foo" : "bee" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) } ,
} } ,
{ podName : "standalone-pod-1" , labels : map [ string ] string { "foo" : "beetle" } , ownerReferences : [ ] metav1 . OwnerReference {
{ UID : uuid . NewUUID ( ) } ,
} } ,
} ,
wantPodNames : [ ] string { "claimed-1" , "claimed-2" , "claimed-3" } ,
} ,
}
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rs := newReplicaSet ( "test-claim" , 3 , rsUUID )
var pods [ ] * v1 . Pod
for _ , p := range test . pods {
pods = append ( pods , & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Name : p . podName ,
Namespace : rs . Namespace ,
Labels : p . labels ,
OwnerReferences : p . ownerReferences ,
} ,
Status : v1 . PodStatus { Phase : v1 . PodRunning } ,
} )
}
selector , err := metav1 . LabelSelectorAsSelector ( rs . Spec . Selector )
if err != nil {
t . Fatalf ( "Couldn't get selector for object %#v: %v" , rs , err )
}
got := FilterClaimedPods ( rs , selector , pods )
gotNames := sets . NewString ( )
for _ , pod := range got {
gotNames . Insert ( pod . Name )
}
if diff := cmp . Diff ( test . wantPodNames , gotNames . List ( ) ) ; diff != "" {
t . Errorf ( "Active pod names (-want,+got):\n%s" , diff )
}
} )
}
}
2015-07-27 21:21:37 -04:00
func TestActivePodFiltering ( t * testing . T ) {
2023-07-12 04:45:00 -04:00
logger , _ := ktesting . NewTestContext ( t )
2023-07-10 15:52:28 -04:00
type podData struct {
podName string
podPhase v1 . PodPhase
2015-07-27 21:21:37 -04:00
}
2023-07-10 15:52:28 -04:00
type test struct {
name string
pods [ ] podData
wantPodNames [ ] string
2016-08-17 16:05:37 -04:00
}
2023-07-10 15:52:28 -04:00
tests := [ ] test {
{
name : "Filters active pods" ,
pods : [ ] podData {
{ podName : "pod-1" , podPhase : v1 . PodSucceeded } ,
{ podName : "pod-2" , podPhase : v1 . PodFailed } ,
{ podName : "pod-3" } ,
{ podName : "pod-4" } ,
{ podName : "pod-5" } ,
} ,
wantPodNames : [ ] string { "pod-3" , "pod-4" , "pod-5" } ,
} ,
2015-07-27 21:21:37 -04:00
}
2017-09-20 01:24:07 -04:00
2023-07-10 15:52:28 -04:00
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
// This rc is not needed by the test, only the newPodList to give the pods labels/a namespace.
rc := newReplicationController ( 0 )
podList := newPodList ( nil , 5 , v1 . PodRunning , rc )
for idx , testPod := range test . pods {
podList . Items [ idx ] . Name = testPod . podName
podList . Items [ idx ] . Status . Phase = testPod . podPhase
}
var podPointers [ ] * v1 . Pod
for i := range podList . Items {
podPointers = append ( podPointers , & podList . Items [ i ] )
}
got := FilterActivePods ( logger , podPointers )
gotNames := sets . NewString ( )
for _ , pod := range got {
gotNames . Insert ( pod . Name )
}
if diff := cmp . Diff ( test . wantPodNames , gotNames . List ( ) ) ; diff != "" {
t . Errorf ( "Active pod names (-want,+got):\n%s" , diff )
}
} )
2023-07-07 00:01:06 -04:00
}
2015-07-27 21:21:37 -04:00
}
func TestSortingActivePods ( t * testing . T ) {
2016-12-03 13:57:26 -05:00
now := metav1 . Now ( )
then := metav1 . Time { Time : now . AddDate ( 0 , - 1 , 0 ) }
2015-07-27 21:21:37 -04:00
2024-05-19 11:50:25 -04:00
restartAlways := v1 . ContainerRestartPolicyAlways
2023-07-10 15:52:28 -04:00
tests := [ ] struct {
name string
pods [ ] v1 . Pod
wantOrder [ ] string
} {
{
name : "Sorts by active pod" ,
pods : [ ] v1 . Pod {
{
ObjectMeta : metav1 . ObjectMeta { Name : "unscheduled" } ,
Spec : v1 . PodSpec { NodeName : "" } ,
Status : v1 . PodStatus { Phase : v1 . PodPending } ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "scheduledButPending" } ,
Spec : v1 . PodSpec { NodeName : "bar" } ,
Status : v1 . PodStatus { Phase : v1 . PodPending } ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "unknownPhase" } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus { Phase : v1 . PodUnknown } ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "runningButNotReady" } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus { Phase : v1 . PodRunning } ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "runningNoLastTransitionTime" } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus {
Phase : v1 . PodRunning ,
Conditions : [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue } } ,
ContainerStatuses : [ ] v1 . ContainerStatus { { RestartCount : 3 } , { RestartCount : 0 } } ,
} ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "runningWithLastTransitionTime" } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus {
Phase : v1 . PodRunning ,
Conditions : [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue , LastTransitionTime : now } } ,
ContainerStatuses : [ ] v1 . ContainerStatus { { RestartCount : 3 } , { RestartCount : 0 } } ,
} ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "runningLongerTime" } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus {
Phase : v1 . PodRunning ,
Conditions : [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue , LastTransitionTime : then } } ,
ContainerStatuses : [ ] v1 . ContainerStatus { { RestartCount : 3 } , { RestartCount : 0 } } ,
} ,
} ,
2024-05-19 11:50:25 -04:00
{
ObjectMeta : metav1 . ObjectMeta { Name : "lowerSidecarContainerRestartCount" , CreationTimestamp : now } ,
Spec : v1 . PodSpec {
NodeName : "foo" ,
InitContainers : [ ] v1 . Container { {
Name : "sidecar" ,
RestartPolicy : & restartAlways ,
} } ,
} ,
Status : v1 . PodStatus {
Phase : v1 . PodRunning ,
Conditions : [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue , LastTransitionTime : then } } ,
ContainerStatuses : [ ] v1 . ContainerStatus { { RestartCount : 2 } , { RestartCount : 1 } } ,
InitContainerStatuses : [ ] v1 . ContainerStatus { { Name : "sidecar" , RestartCount : 2 } } ,
} ,
} ,
2023-07-10 15:52:28 -04:00
{
ObjectMeta : metav1 . ObjectMeta { Name : "lowerContainerRestartCount" , CreationTimestamp : now } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus {
Phase : v1 . PodRunning ,
Conditions : [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue , LastTransitionTime : then } } ,
ContainerStatuses : [ ] v1 . ContainerStatus { { RestartCount : 2 } , { RestartCount : 1 } } ,
} ,
} ,
{
ObjectMeta : metav1 . ObjectMeta { Name : "oldest" , CreationTimestamp : then } ,
Spec : v1 . PodSpec { NodeName : "foo" } ,
Status : v1 . PodStatus {
Phase : v1 . PodRunning ,
Conditions : [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue , LastTransitionTime : then } } ,
ContainerStatuses : [ ] v1 . ContainerStatus { { RestartCount : 2 } , { RestartCount : 1 } } ,
} ,
} ,
} ,
wantOrder : [ ] string {
"unscheduled" ,
"scheduledButPending" ,
"unknownPhase" ,
"runningButNotReady" ,
"runningNoLastTransitionTime" ,
"runningWithLastTransitionTime" ,
"runningLongerTime" ,
2024-05-19 11:50:25 -04:00
"lowerSidecarContainerRestartCount" ,
2023-07-10 15:52:28 -04:00
"lowerContainerRestartCount" ,
"oldest" ,
} ,
} ,
}
2015-07-27 21:21:37 -04:00
2023-07-10 15:52:28 -04:00
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
numPods := len ( test . pods )
for i := 0 ; i < 20 ; i ++ {
idx := rand . Perm ( numPods )
randomizedPods := make ( [ ] * v1 . Pod , numPods )
for j := 0 ; j < numPods ; j ++ {
randomizedPods [ j ] = & test . pods [ idx [ j ] ]
}
sort . Sort ( ActivePods ( randomizedPods ) )
gotOrder := make ( [ ] string , len ( randomizedPods ) )
for i := range randomizedPods {
gotOrder [ i ] = randomizedPods [ i ] . Name
}
if diff := cmp . Diff ( test . wantOrder , gotOrder ) ; diff != "" {
t . Errorf ( "Sorted active pod names (-want,+got):\n%s" , diff )
}
}
} )
2015-07-27 21:21:37 -04:00
}
}
2017-02-08 17:18:13 -05:00
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
func TestSortingActivePodsWithRanks ( t * testing . T ) {
now := metav1 . Now ( )
2020-11-26 16:34:21 -05:00
then1Month := metav1 . Time { Time : now . AddDate ( 0 , - 1 , 0 ) }
then2Hours := metav1 . Time { Time : now . Add ( - 2 * time . Hour ) }
then5Hours := metav1 . Time { Time : now . Add ( - 5 * time . Hour ) }
then8Hours := metav1 . Time { Time : now . Add ( - 8 * time . Hour ) }
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
zeroTime := metav1 . Time { }
2024-05-19 11:50:25 -04:00
restartAlways := v1 . ContainerRestartPolicyAlways
pod := func ( podName , nodeName string , phase v1 . PodPhase , ready bool , restarts int32 , sideRestarts int32 , readySince metav1 . Time , created metav1 . Time , annotations map [ string ] string ) * v1 . Pod {
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
var conditions [ ] v1 . PodCondition
var containerStatuses [ ] v1 . ContainerStatus
2024-05-19 11:50:25 -04:00
var initContainerStatuses [ ] v1 . ContainerStatus
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
if ready {
conditions = [ ] v1 . PodCondition { { Type : v1 . PodReady , Status : v1 . ConditionTrue , LastTransitionTime : readySince } }
containerStatuses = [ ] v1 . ContainerStatus { { RestartCount : restarts } }
2024-05-19 11:50:25 -04:00
initContainerStatuses = [ ] v1 . ContainerStatus { { Name : "sidecar" , RestartCount : sideRestarts } }
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
}
return & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
CreationTimestamp : created ,
Name : podName ,
2021-02-17 15:39:42 -05:00
Annotations : annotations ,
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
} ,
2024-05-19 11:50:25 -04:00
Spec : v1 . PodSpec {
NodeName : nodeName ,
InitContainers : [ ] v1 . Container { { Name : "sidecar" , RestartPolicy : & restartAlways } } ,
} ,
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
Status : v1 . PodStatus {
2024-05-19 11:50:25 -04:00
Conditions : conditions ,
ContainerStatuses : containerStatuses ,
InitContainerStatuses : initContainerStatuses ,
Phase : phase ,
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
} ,
}
}
var (
2024-05-19 11:50:25 -04:00
unscheduledPod = pod ( "unscheduled" , "" , v1 . PodPending , false , 0 , 0 , zeroTime , zeroTime , nil )
scheduledPendingPod = pod ( "pending" , "node" , v1 . PodPending , false , 0 , 0 , zeroTime , zeroTime , nil )
unknownPhasePod = pod ( "unknown-phase" , "node" , v1 . PodUnknown , false , 0 , 0 , zeroTime , zeroTime , nil )
runningNotReadyPod = pod ( "not-ready" , "node" , v1 . PodRunning , false , 0 , 0 , zeroTime , zeroTime , nil )
runningReadyNoLastTransitionTimePod = pod ( "ready-no-last-transition-time" , "node" , v1 . PodRunning , true , 0 , 0 , zeroTime , zeroTime , nil )
runningReadyNow = pod ( "ready-now" , "node" , v1 . PodRunning , true , 0 , 0 , now , now , nil )
runningReadyThen = pod ( "ready-then" , "node" , v1 . PodRunning , true , 0 , 0 , then1Month , then1Month , nil )
runningReadyNowHighRestarts = pod ( "ready-high-restarts" , "node" , v1 . PodRunning , true , 9001 , 0 , now , now , nil )
runningReadyNowHighSideRestarts = pod ( "ready-high-restarts" , "node" , v1 . PodRunning , true , 9001 , 9001 , now , now , nil )
runningReadyNowCreatedThen = pod ( "ready-now-created-then" , "node" , v1 . PodRunning , true , 0 , 0 , now , then1Month , nil )
lowPodDeletionCost = pod ( "low-deletion-cost" , "node" , v1 . PodRunning , true , 0 , 0 , now , then1Month , map [ string ] string { core . PodDeletionCost : "10" } )
highPodDeletionCost = pod ( "high-deletion-cost" , "node" , v1 . PodRunning , true , 0 , 0 , now , then1Month , map [ string ] string { core . PodDeletionCost : "100" } )
unscheduled5Hours = pod ( "unscheduled-5-hours" , "" , v1 . PodPending , false , 0 , 0 , then5Hours , then5Hours , nil )
unscheduled8Hours = pod ( "unscheduled-10-hours" , "" , v1 . PodPending , false , 0 , 0 , then8Hours , then8Hours , nil )
ready2Hours = pod ( "ready-2-hours" , "" , v1 . PodRunning , true , 0 , 0 , then2Hours , then1Month , nil )
ready5Hours = pod ( "ready-5-hours" , "" , v1 . PodRunning , true , 0 , 0 , then5Hours , then1Month , nil )
ready10Hours = pod ( "ready-10-hours" , "" , v1 . PodRunning , true , 0 , 0 , then8Hours , then1Month , nil )
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
)
2020-11-26 16:34:21 -05:00
equalityTests := [ ] struct {
p1 * v1 . Pod
p2 * v1 . Pod
disableLogarithmicScaleDown bool
} {
{ p1 : unscheduledPod } ,
{ p1 : scheduledPendingPod } ,
{ p1 : unknownPhasePod } ,
{ p1 : runningNotReadyPod } ,
{ p1 : runningReadyNowCreatedThen } ,
{ p1 : runningReadyNow } ,
{ p1 : runningReadyThen } ,
{ p1 : runningReadyNowHighRestarts } ,
{ p1 : runningReadyNowCreatedThen } ,
{ p1 : unscheduled5Hours , p2 : unscheduled8Hours } ,
{ p1 : ready5Hours , p2 : ready10Hours } ,
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
}
2023-07-10 15:52:28 -04:00
for i , test := range equalityTests {
t . Run ( fmt . Sprintf ( "Equality tests %d" , i ) , func ( t * testing . T ) {
2024-04-23 04:39:47 -04:00
featuregatetesting . SetFeatureGateDuringTest ( t , utilfeature . DefaultFeatureGate , features . LogarithmicScaleDown , ! test . disableLogarithmicScaleDown )
2023-07-10 15:52:28 -04:00
if test . p2 == nil {
test . p2 = test . p1
}
podsWithRanks := ActivePodsWithRanks {
Pods : [ ] * v1 . Pod { test . p1 , test . p2 } ,
Rank : [ ] int { 1 , 1 } ,
Now : now ,
}
if podsWithRanks . Less ( 0 , 1 ) || podsWithRanks . Less ( 1 , 0 ) {
t . Errorf ( "expected pod %q to be equivalent to %q" , test . p1 . Name , test . p2 . Name )
}
} )
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
}
2023-07-10 15:52:28 -04:00
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
type podWithRank struct {
pod * v1 . Pod
rank int
}
inequalityTests := [ ] struct {
2020-11-26 16:34:21 -05:00
lesser , greater podWithRank
disablePodDeletioncost bool
disableLogarithmicScaleDown bool
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
} {
2021-02-17 15:39:42 -05:00
{ lesser : podWithRank { unscheduledPod , 1 } , greater : podWithRank { scheduledPendingPod , 2 } } ,
{ lesser : podWithRank { unscheduledPod , 2 } , greater : podWithRank { scheduledPendingPod , 1 } } ,
{ lesser : podWithRank { scheduledPendingPod , 1 } , greater : podWithRank { unknownPhasePod , 2 } } ,
{ lesser : podWithRank { unknownPhasePod , 1 } , greater : podWithRank { runningNotReadyPod , 2 } } ,
{ lesser : podWithRank { runningNotReadyPod , 1 } , greater : podWithRank { runningReadyNoLastTransitionTimePod , 1 } } ,
{ lesser : podWithRank { runningReadyNoLastTransitionTimePod , 1 } , greater : podWithRank { runningReadyNow , 1 } } ,
{ lesser : podWithRank { runningReadyNow , 2 } , greater : podWithRank { runningReadyNoLastTransitionTimePod , 1 } } ,
{ lesser : podWithRank { runningReadyNow , 1 } , greater : podWithRank { runningReadyThen , 1 } } ,
{ lesser : podWithRank { runningReadyNow , 2 } , greater : podWithRank { runningReadyThen , 1 } } ,
{ lesser : podWithRank { runningReadyNowHighRestarts , 1 } , greater : podWithRank { runningReadyNow , 1 } } ,
2024-05-19 11:50:25 -04:00
{ lesser : podWithRank { runningReadyNowHighSideRestarts , 1 } , greater : podWithRank { runningReadyNowHighRestarts , 1 } } ,
2021-02-17 15:39:42 -05:00
{ lesser : podWithRank { runningReadyNow , 2 } , greater : podWithRank { runningReadyNowHighRestarts , 1 } } ,
{ lesser : podWithRank { runningReadyNow , 1 } , greater : podWithRank { runningReadyNowCreatedThen , 1 } } ,
{ lesser : podWithRank { runningReadyNowCreatedThen , 2 } , greater : podWithRank { runningReadyNow , 1 } } ,
{ lesser : podWithRank { lowPodDeletionCost , 2 } , greater : podWithRank { highPodDeletionCost , 1 } } ,
{ lesser : podWithRank { highPodDeletionCost , 2 } , greater : podWithRank { lowPodDeletionCost , 1 } , disablePodDeletioncost : true } ,
2020-11-26 16:34:21 -05:00
{ lesser : podWithRank { ready2Hours , 1 } , greater : podWithRank { ready5Hours , 1 } } ,
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
}
2023-07-10 15:52:28 -04:00
2021-02-17 15:39:42 -05:00
for i , test := range inequalityTests {
2023-07-10 15:52:28 -04:00
t . Run ( fmt . Sprintf ( "Inequality tests %d" , i ) , func ( t * testing . T ) {
2025-10-01 23:55:46 -04:00
featuregatetesting . SetFeatureGatesDuringTest ( t , utilfeature . DefaultFeatureGate , featuregatetesting . FeatureOverrides {
features . PodDeletionCost : ! test . disablePodDeletioncost ,
features . LogarithmicScaleDown : ! test . disableLogarithmicScaleDown ,
} )
2021-02-17 15:39:42 -05:00
podsWithRanks := ActivePodsWithRanks {
Pods : [ ] * v1 . Pod { test . lesser . pod , test . greater . pod } ,
Rank : [ ] int { test . lesser . rank , test . greater . rank } ,
2020-11-26 16:34:21 -05:00
Now : now ,
2021-02-17 15:39:42 -05:00
}
if ! podsWithRanks . Less ( 0 , 1 ) {
t . Errorf ( "expected pod %q with rank %v to be less than %q with rank %v" , podsWithRanks . Pods [ 0 ] . Name , podsWithRanks . Rank [ 0 ] , podsWithRanks . Pods [ 1 ] . Name , podsWithRanks . Rank [ 1 ] )
}
if podsWithRanks . Less ( 1 , 0 ) {
t . Errorf ( "expected pod %q with rank %v not to be less than %v with rank %v" , podsWithRanks . Pods [ 1 ] . Name , podsWithRanks . Rank [ 1 ] , podsWithRanks . Pods [ 0 ] . Name , podsWithRanks . Rank [ 0 ] )
}
} )
Prefer to delete doubled-up pods of a ReplicaSet
When scaling down a ReplicaSet, delete doubled up replicas first, where a
"doubled up replica" is defined as one that is on the same node as an
active replica belonging to a related ReplicaSet. ReplicaSets are
considered "related" if they have a common controller (typically a
Deployment).
The intention of this change is to make a rolling update of a Deployment
scale down the old ReplicaSet as it scales up the new ReplicaSet by
deleting pods from the old ReplicaSet that are colocated with ready pods of
the new ReplicaSet. This change in the behavior of rolling updates can be
combined with pod affinity rules to preserve the locality of a Deployment's
pods over rollout.
A specific scenario that benefits from this change is when a Deployment's
pods are exposed by a Service that has type "LoadBalancer" and external
traffic policy "Local". In this scenario, the load balancer uses health
checks to determine whether it should forward traffic for the Service to a
particular node. If the node has no local endpoints for the Service, the
health check will fail for that node. Eventually, the load balancer will
stop forwarding traffic to that node. In the meantime, the service proxy
drops traffic for that Service. Thus, in order to reduce risk of dropping
traffic during a rolling update, it is desirable preserve node locality of
endpoints.
* pkg/controller/controller_utils.go (ActivePodsWithRanks): New type to
sort pods using a given ranking.
* pkg/controller/controller_utils_test.go (TestSortingActivePodsWithRanks):
New test for ActivePodsWithRanks.
* pkg/controller/replicaset/replica_set.go
(getReplicaSetsWithSameController): New method. Given a ReplicaSet, return
all ReplicaSets that have the same owner.
(manageReplicas): Call getIndirectlyRelatedPods, and pass its result to
getPodsToDelete.
(getIndirectlyRelatedPods): New method. Given a ReplicaSet, return all
pods that are owned by any ReplicaSet with the same owner.
(getPodsToDelete): Add an argument for related pods. Use related pods and
the new getPodsRankedByRelatedPodsOnSameNode function to take into account
whether a pod is doubled up when sorting pods for deletion.
(getPodsRankedByRelatedPodsOnSameNode): New function. Return an
ActivePodsWithRanks value that wraps the given slice of pods and computes
ranks where each pod's rank is equal to the number of active related pods
that are colocated on the same node.
* pkg/controller/replicaset/replica_set_test.go (newReplicaSet): Set
OwnerReferences on the ReplicaSet.
(newPod): Set a unique UID on the pod.
(byName): New type to sort pods by name.
(TestGetReplicaSetsWithSameController): New test for
getReplicaSetsWithSameController.
(TestRelatedPodsLookup): New test for getIndirectlyRelatedPods.
(TestGetPodsToDelete): Augment the "various pod phases and conditions, diff
= len(pods)" test case to ensure that scale-down still selects doubled-up
pods if there are not enough other pods to scale down. Add a "various pod
phases and conditions, diff = len(pods), relatedPods empty" test case to
verify that getPodsToDelete works even if related pods could not be
determined. Add a "ready and colocated with another ready pod vs not
colocated, diff < len(pods)" test case to verify that a doubled-up pod gets
preferred for deletion. Augment the "various pod phases and conditions,
diff < len(pods)" test case to ensure that not-ready pods are preferred
over ready but doubled-up pods.
* pkg/controller/replicaset/BUILD: Regenerate.
* test/e2e/apps/deployment.go
(testRollingUpdateDeploymentWithLocalTrafficLoadBalancer): New end-to-end
test. Create a deployment with a rolling update strategy and affinity
rules and a load balancer with "Local" external traffic policy, and verify
that set of nodes with local endponts for the service remains unchanged
during rollouts.
(setAffinity): New helper, used by
testRollingUpdateDeploymentWithLocalTrafficLoadBalancer.
* test/e2e/framework/service/jig.go (GetEndpointNodes): Factor building the
set of node names out...
(GetEndpointNodeNames): ...into this new method.
2019-07-10 18:56:19 -04:00
}
}
2025-06-05 08:48:30 -04:00
func TestNextPodAvailabilityCheck ( t * testing . T ) {
newPodWithReadyCond := func ( now metav1 . Time , ready bool , beforeSec int ) * v1 . Pod {
conditionStatus := v1 . ConditionFalse
if ready {
conditionStatus = v1 . ConditionTrue
}
return & v1 . Pod {
Status : v1 . PodStatus {
Conditions : [ ] v1 . PodCondition {
{
Type : v1 . PodReady ,
LastTransitionTime : metav1 . NewTime ( now . Add ( - 1 * time . Duration ( beforeSec ) * time . Second ) ) ,
Status : conditionStatus ,
} ,
} ,
} ,
}
}
now := metav1 . Now ( )
tests := [ ] struct {
name string
pod * v1 . Pod
minReadySeconds int32
expected * time . Duration
} {
{
name : "not ready" ,
pod : newPodWithReadyCond ( now , false , 0 ) ,
minReadySeconds : 0 ,
expected : nil ,
} ,
{
name : "no minReadySeconds defined" ,
pod : newPodWithReadyCond ( now , true , 0 ) ,
minReadySeconds : 0 ,
expected : nil ,
} ,
{
name : "lastTransitionTime is zero" ,
pod : func ( ) * v1 . Pod {
pod := newPodWithReadyCond ( now , true , 0 )
pod . Status . Conditions [ 0 ] . LastTransitionTime = metav1 . Time { }
return pod
} ( ) ,
minReadySeconds : 1 ,
expected : nil ,
} ,
{
name : "just became ready - available in 1s" ,
pod : newPodWithReadyCond ( now , true , 0 ) ,
minReadySeconds : 1 ,
expected : ptr . To ( time . Second ) ,
} ,
{
name : "ready for 20s - available in 10s" ,
pod : newPodWithReadyCond ( now , true , 20 ) ,
minReadySeconds : 30 ,
expected : ptr . To ( 10 * time . Second ) ,
} ,
{
name : "available" ,
pod : newPodWithReadyCond ( now , true , 51 ) ,
minReadySeconds : 50 ,
expected : nil ,
} ,
}
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
nextAvailable := nextPodAvailabilityCheck ( test . pod , test . minReadySeconds , now . Time )
if ! ptr . Equal ( nextAvailable , test . expected ) {
t . Errorf ( "expected next pod availability check: %v, got: %v" , test . expected , nextAvailable )
}
} )
}
}
func TestFindMinNextPodAvailabilitySimpleCheck ( t * testing . T ) {
now := metav1 . Now ( )
pod := func ( name string , ready bool , beforeSec int ) * v1 . Pod {
p := testutil . NewPod ( name , "node0" )
if ready {
p . Status . Conditions [ 0 ] . LastTransitionTime = metav1 . NewTime ( now . Add ( - 1 * time . Duration ( beforeSec ) * time . Second ) )
} else {
p . Status . Conditions [ 0 ] . Status = v1 . ConditionFalse
}
return p
}
tests := [ ] struct {
name string
pods [ ] * v1 . Pod
minReadySeconds int32
expected * time . Duration
expectedPod * string
} {
{
name : "no pods" ,
pods : nil ,
minReadySeconds : 0 ,
expected : nil ,
expectedPod : nil ,
} ,
{
name : "unready pods" ,
pods : [ ] * v1 . Pod {
pod ( "pod1" , false , 0 ) ,
pod ( "pod2" , false , 0 ) ,
} ,
minReadySeconds : 0 ,
expected : nil ,
expectedPod : nil ,
} ,
{
name : "ready pods with no minReadySeconds" ,
pods : [ ] * v1 . Pod {
pod ( "pod1" , true , 0 ) ,
pod ( "pod2" , true , 0 ) ,
} ,
minReadySeconds : 0 ,
expected : nil ,
expectedPod : nil ,
} ,
{
name : "unready and ready pods should find min next availability check" ,
pods : [ ] * v1 . Pod {
pod ( "pod1" , false , 0 ) ,
pod ( "pod2" , true , 2 ) ,
pod ( "pod3" , true , 0 ) ,
pod ( "pod4" , true , 4 ) ,
pod ( "pod5" , false , 0 ) ,
} ,
minReadySeconds : 10 ,
expected : ptr . To ( 6 * time . Second ) ,
expectedPod : ptr . To ( "pod4" ) ,
} ,
{
name : "unready and available pods do not require min next availability check" , // only after pods become ready we can schedule one
pods : [ ] * v1 . Pod {
pod ( "pod1" , false , 0 ) ,
pod ( "pod2" , true , 15 ) ,
pod ( "pod3" , true , 11 ) ,
pod ( "pod4" , true , 10 ) ,
pod ( "pod5" , false , 0 ) ,
} ,
minReadySeconds : 10 ,
expected : nil ,
expectedPod : nil ,
} ,
}
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
nextAvailable , checkPod := findMinNextPodAvailabilitySimpleCheck ( test . pods , test . minReadySeconds , now . Time )
var checkPodName * string
if checkPod != nil {
checkPodName = ptr . To ( checkPod . Name )
}
if ! ptr . Equal ( nextAvailable , test . expected ) {
t . Errorf ( "expected next min pod availability check: %v, got: %v" , test . expected , nextAvailable )
}
if ! ptr . Equal ( checkPodName , test . expectedPod ) {
t . Errorf ( "expected next min pod availability check for pod: %v, got: %v" , test . expectedPod , checkPodName )
}
// using the same now for status evaluation and the clock should return the same result as findMinNextPodAvailabilitySimpleCheck
nextAvailable = FindMinNextPodAvailabilityCheck ( test . pods , test . minReadySeconds , now . Time , testingclock . NewFakeClock ( now . Time ) )
if ! ptr . Equal ( nextAvailable , test . expected ) {
t . Errorf ( "expected next min pod availability check when status evaluation and clock is now: %v, got: %v" , test . expected , nextAvailable )
}
} )
}
}
func TestFindMinNextPodAvailability ( t * testing . T ) {
now := metav1 . Now ( )
pod := func ( name string , ready bool , beforeSec int ) * v1 . Pod {
p := testutil . NewPod ( name , "node0" )
if ready {
p . Status . Conditions [ 0 ] . LastTransitionTime = metav1 . NewTime ( now . Add ( - 1 * time . Duration ( beforeSec ) * time . Second ) )
} else {
p . Status . Conditions [ 0 ] . Status = v1 . ConditionFalse
}
return p
}
tests := [ ] struct {
name string
pods [ ] * v1 . Pod
minReadySeconds int32
statusEvaluationDelaySeconds int
expected * time . Duration
} {
{
name : "unready and ready pods should find min next availability check considering status evaluation/update delay" ,
pods : [ ] * v1 . Pod {
pod ( "pod1" , false , 0 ) ,
pod ( "pod2" , true , 2 ) ,
pod ( "pod3" , true , 0 ) ,
pod ( "pod4" , true , 4 ) ,
pod ( "pod5" , false , 0 ) ,
} ,
minReadySeconds : 10 ,
statusEvaluationDelaySeconds : 2 , // total is 4+2 since the pod4 became ready
expected : ptr . To ( 4 * time . Second ) ,
} ,
{
name : "unready and ready pods should find min next availability check even if the status evaluation delay is longer than minReadySeconds" ,
pods : [ ] * v1 . Pod {
pod ( "pod1" , false , 0 ) ,
pod ( "pod2" , true , 2 ) ,
pod ( "pod3" , true , 0 ) ,
pod ( "pod4" , true , 4 ) ,
pod ( "pod5" , false , 0 ) ,
} ,
minReadySeconds : 10 ,
statusEvaluationDelaySeconds : 7 , // total is 4+7 since the pod4 became ready
expected : ptr . To ( 0 * time . Second ) ,
} ,
}
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
oldNow := now . Time
newNow := testingclock . NewFakePassiveClock ( now . Add ( time . Duration ( test . statusEvaluationDelaySeconds ) * time . Second ) )
nextAvailable := FindMinNextPodAvailabilityCheck ( test . pods , test . minReadySeconds , oldNow , newNow )
if ! ptr . Equal ( nextAvailable , test . expected ) {
t . Errorf ( "expected next min pod availability check: %v, got: %v" , test . expected , nextAvailable )
}
} )
}
}
2017-02-08 17:18:13 -05:00
func TestActiveReplicaSetsFiltering ( t * testing . T ) {
2023-07-10 15:52:28 -04:00
rsUuid := uuid . NewUUID ( )
tests := [ ] struct {
name string
replicaSets [ ] * apps . ReplicaSet
wantReplicaSets [ ] * apps . ReplicaSet
} {
{
name : "Filters active replica sets" ,
replicaSets : [ ] * apps . ReplicaSet {
newReplicaSet ( "zero" , 0 , rsUuid ) ,
nil ,
newReplicaSet ( "foo" , 1 , rsUuid ) ,
newReplicaSet ( "bar" , 2 , rsUuid ) ,
} ,
wantReplicaSets : [ ] * apps . ReplicaSet {
newReplicaSet ( "foo" , 1 , rsUuid ) ,
newReplicaSet ( "bar" , 2 , rsUuid ) ,
} ,
} ,
2017-02-08 17:18:13 -05:00
}
2023-07-10 15:52:28 -04:00
for _ , test := range tests {
t . Run ( test . name , func ( t * testing . T ) {
gotReplicaSets := FilterActiveReplicaSets ( test . replicaSets )
if diff := cmp . Diff ( test . wantReplicaSets , gotReplicaSets ) ; diff != "" {
t . Errorf ( "Active replica set names (-want,+got):\n%s" , diff )
}
} )
2023-07-07 00:01:06 -04:00
}
2017-02-08 17:18:13 -05:00
}
2017-05-17 19:53:46 -04:00
func TestComputeHash ( t * testing . T ) {
2017-08-13 08:10:25 -04:00
collisionCount := int32 ( 1 )
otherCollisionCount := int32 ( 2 )
maxCollisionCount := int32 ( math . MaxInt32 )
2017-05-17 19:53:46 -04:00
tests := [ ] struct {
name string
template * v1 . PodTemplateSpec
2017-08-13 08:10:25 -04:00
collisionCount * int32
otherCollisionCount * int32
2017-05-17 19:53:46 -04:00
} {
{
name : "simple" ,
template : & v1 . PodTemplateSpec { } ,
2017-08-13 08:10:25 -04:00
collisionCount : & collisionCount ,
otherCollisionCount : & otherCollisionCount ,
2017-05-17 19:53:46 -04:00
} ,
{
name : "using math.MaxInt64" ,
template : & v1 . PodTemplateSpec { } ,
collisionCount : nil ,
2017-08-13 08:10:25 -04:00
otherCollisionCount : & maxCollisionCount ,
2017-05-17 19:53:46 -04:00
} ,
}
for _ , test := range tests {
hash := ComputeHash ( test . template , test . collisionCount )
otherHash := ComputeHash ( test . template , test . otherCollisionCount )
2017-09-20 01:24:07 -04:00
assert . NotEqual ( t , hash , otherHash , "expected different hashes but got the same: %d" , hash )
2017-05-17 19:53:46 -04:00
}
}
2017-08-07 07:29:39 -04:00
func TestRemoveTaintOffNode ( t * testing . T ) {
tests := [ ] struct {
name string
nodeHandler * testutil . FakeNodeHandler
nodeName string
taintsToRemove [ ] * v1 . Taint
expectedTaints [ ] v1 . Taint
requestCount int
} {
{
name : "remove one taint from node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToRemove : [ ] * v1 . Taint {
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
} ,
requestCount : 4 ,
} ,
{
name : "remove multiple taints from node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
{ Key : "key4" , Value : "value4" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToRemove : [ ] * v1 . Taint {
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key4" , Value : "value4" , Effect : "NoExecute" } ,
} ,
requestCount : 4 ,
} ,
{
name : "remove no-exist taints from node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToRemove : [ ] * v1 . Taint {
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
requestCount : 2 ,
} ,
{
name : "remove taint from node without taints" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToRemove : [ ] * v1 . Taint {
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
} ,
expectedTaints : nil ,
requestCount : 2 ,
} ,
{
name : "remove empty taint list from node without taints" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToRemove : [ ] * v1 . Taint { } ,
expectedTaints : nil ,
requestCount : 2 ,
} ,
{
name : "remove empty taint list from node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToRemove : [ ] * v1 . Taint { } ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
requestCount : 2 ,
} ,
}
for _ , test := range tests {
2019-12-17 03:01:53 -05:00
node , _ := test . nodeHandler . Get ( context . TODO ( ) , test . nodeName , metav1 . GetOptions { } )
2021-04-22 14:27:59 -04:00
err := RemoveTaintOffNode ( context . TODO ( ) , test . nodeHandler , test . nodeName , node , test . taintsToRemove ... )
2024-07-05 15:06:21 -04:00
require . NoError ( t , err , "%s: RemoveTaintOffNode() error = %v" , test . name , err )
2017-08-07 07:29:39 -04:00
2019-12-17 03:01:53 -05:00
node , _ = test . nodeHandler . Get ( context . TODO ( ) , test . nodeName , metav1 . GetOptions { } )
2017-09-20 01:24:07 -04:00
assert . EqualValues ( t , test . expectedTaints , node . Spec . Taints ,
"%s: failed to remove taint off node: expected %+v, got %+v" ,
test . name , test . expectedTaints , node . Spec . Taints )
2017-08-07 07:29:39 -04:00
2017-09-20 01:24:07 -04:00
assert . Equal ( t , test . requestCount , test . nodeHandler . RequestCount ,
"%s: unexpected request count: expected %+v, got %+v" ,
test . name , test . requestCount , test . nodeHandler . RequestCount )
2017-08-07 07:29:39 -04:00
}
}
func TestAddOrUpdateTaintOnNode ( t * testing . T ) {
tests := [ ] struct {
name string
nodeHandler * testutil . FakeNodeHandler
nodeName string
taintsToAdd [ ] * v1 . Taint
expectedTaints [ ] v1 . Taint
requestCount int
2022-11-01 06:12:32 -04:00
expectedErr error
2017-08-07 07:29:39 -04:00
} {
{
name : "add one taint on node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint {
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
requestCount : 3 ,
} ,
{
name : "add multiple taints to node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint {
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
{ Key : "key4" , Value : "value4" , Effect : "NoExecute" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
{ Key : "key4" , Value : "value4" , Effect : "NoExecute" } ,
} ,
requestCount : 3 ,
} ,
{
name : "add exist taints to node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint {
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
2017-07-27 04:31:26 -04:00
requestCount : 2 ,
2017-08-07 07:29:39 -04:00
} ,
{
name : "add taint to node without taints" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint {
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
} ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key3" , Value : "value3" , Effect : "NoSchedule" } ,
} ,
requestCount : 3 ,
} ,
{
name : "add empty taint list to node without taints" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint { } ,
expectedTaints : nil ,
requestCount : 1 ,
} ,
{
name : "add empty taint list to node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint { } ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
requestCount : 1 ,
} ,
2022-10-18 08:01:15 -04:00
{
name : "add taint to changed node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
ResourceVersion : "1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
AsyncCalls : [ ] func ( * testutil . FakeNodeHandler ) { func ( m * testutil . FakeNodeHandler ) {
if len ( m . UpdatedNodes ) == 0 {
m . UpdatedNodes = append ( m . UpdatedNodes , & v1 . Node {
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
ResourceVersion : "2" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint { } ,
} } )
}
} } ,
} ,
nodeName : "node1" ,
taintsToAdd : [ ] * v1 . Taint { { Key : "key2" , Value : "value2" , Effect : "NoExecute" } } ,
expectedTaints : [ ] v1 . Taint {
{ Key : "key2" , Value : "value2" , Effect : "NoExecute" } ,
} ,
requestCount : 5 ,
} ,
2022-11-01 06:12:32 -04:00
{
name : "add taint to non-exist node" ,
nodeHandler : & testutil . FakeNodeHandler {
Existing : [ ] * v1 . Node {
{
ObjectMeta : metav1 . ObjectMeta {
Name : "node1" ,
ResourceVersion : "1" ,
} ,
Spec : v1 . NodeSpec {
Taints : [ ] v1 . Taint {
{ Key : "key1" , Value : "value1" , Effect : "NoSchedule" } ,
} ,
} ,
} ,
} ,
Clientset : fake . NewSimpleClientset ( & v1 . PodList { Items : [ ] v1 . Pod { * testutil . NewPod ( "pod0" , "node0" ) } } ) ,
} ,
nodeName : "node2" ,
taintsToAdd : [ ] * v1 . Taint { { Key : "key2" , Value : "value2" , Effect : "NoExecute" } } ,
expectedErr : apierrors . NewNotFound ( schema . GroupResource { Resource : "nodes" } , "node2" ) ,
} ,
2017-08-07 07:29:39 -04:00
}
for _ , test := range tests {
2021-04-22 14:27:59 -04:00
err := AddOrUpdateTaintOnNode ( context . TODO ( ) , test . nodeHandler , test . nodeName , test . taintsToAdd ... )
2022-11-01 06:12:32 -04:00
if test . expectedErr != nil {
assert . Equal ( t , test . expectedErr , err , "AddOrUpdateTaintOnNode get unexpected error" )
continue
}
2024-07-05 15:06:21 -04:00
require . NoError ( t , err , "%s: AddOrUpdateTaintOnNode() error = %v" , test . name , err )
2017-08-07 07:29:39 -04:00
2019-12-17 03:01:53 -05:00
node , _ := test . nodeHandler . Get ( context . TODO ( ) , test . nodeName , metav1 . GetOptions { } )
2017-09-20 01:24:07 -04:00
assert . EqualValues ( t , test . expectedTaints , node . Spec . Taints ,
"%s: failed to add taint to node: expected %+v, got %+v" ,
test . name , test . expectedTaints , node . Spec . Taints )
2017-08-07 07:29:39 -04:00
2017-09-20 01:24:07 -04:00
assert . Equal ( t , test . requestCount , test . nodeHandler . RequestCount ,
"%s: unexpected request count: expected %+v, got %+v" ,
test . name , test . requestCount , test . nodeHandler . RequestCount )
2017-08-07 07:29:39 -04:00
}
}
2025-07-01 06:35:28 -04:00
func TestFilterPodsByOwner ( t * testing . T ) {
newPod := func ( name , ns string , owner * metav1 . OwnerReference ) * v1 . Pod {
pod := & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Name : name ,
Namespace : ns ,
} ,
}
if owner != nil {
pod . OwnerReferences = append ( pod . OwnerReferences , * owner )
}
return pod
}
2025-10-16 12:53:01 -04:00
ownerKind := "OwnerKind"
ownerName := "ownerName"
2025-07-01 06:35:28 -04:00
cases := map [ string ] struct {
owner * metav1 . ObjectMeta
2025-10-16 12:53:01 -04:00
ownedOnly bool
2025-07-01 06:35:28 -04:00
allPods [ ] * v1 . Pod
wantPodsKeys sets . Set [ string ]
} {
"multiple Pods, some are owned by the owner" : {
owner : & metav1 . ObjectMeta {
Namespace : "ns1" ,
2025-10-16 12:53:01 -04:00
Name : ownerName ,
2025-07-01 06:35:28 -04:00
UID : "abc" ,
} ,
allPods : [ ] * v1 . Pod {
newPod ( "a" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
2025-10-16 12:53:01 -04:00
Kind : ownerKind ,
Name : ownerName ,
2025-07-01 06:35:28 -04:00
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "b" , "ns1" , & metav1 . OwnerReference {
UID : "def" ,
2025-10-16 12:53:01 -04:00
Kind : ownerKind ,
Name : ownerName ,
2025-07-01 06:35:28 -04:00
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "c" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
2025-10-16 12:53:01 -04:00
Kind : ownerKind ,
Name : ownerName ,
2025-07-01 06:35:28 -04:00
Controller : ptr . To ( true ) ,
} ) ,
} ,
wantPodsKeys : sets . New ( "ns1/a" , "ns1/c" ) ,
} ,
"orphan Pods in multiple namespaces" : {
owner : & metav1 . ObjectMeta {
Namespace : "ns1" ,
2025-10-16 12:53:01 -04:00
Name : ownerName ,
UID : "abc" ,
2025-07-01 06:35:28 -04:00
} ,
allPods : [ ] * v1 . Pod {
newPod ( "a" , "ns1" , nil ) ,
newPod ( "b" , "ns2" , nil ) ,
} ,
wantPodsKeys : sets . New ( "ns1/a" ) ,
} ,
"owned Pods and orphan Pods in the owner's namespace" : {
owner : & metav1 . ObjectMeta {
Namespace : "ns1" ,
2025-10-16 12:53:01 -04:00
Name : ownerName ,
2025-07-01 06:35:28 -04:00
UID : "abc" ,
} ,
allPods : [ ] * v1 . Pod {
newPod ( "a" , "ns1" , nil ) ,
newPod ( "b" , "ns2" , nil ) ,
newPod ( "c" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
2025-10-16 12:53:01 -04:00
Kind : ownerKind ,
Name : ownerName ,
2025-07-01 06:35:28 -04:00
Controller : ptr . To ( true ) ,
} ) ,
} ,
wantPodsKeys : sets . New ( "ns1/a" , "ns1/c" ) ,
} ,
2025-10-16 12:53:01 -04:00
"exclude orphan pods, pods in mismatched ns,uid,kind,name,controller" : {
owner : & metav1 . ObjectMeta {
Namespace : "ns1" ,
Name : ownerName ,
UID : "abc" ,
} ,
allPods : [ ] * v1 . Pod {
newPod ( "a" , "ns1" , nil ) ,
newPod ( "other-ns-orphan" , "ns2" , nil ) ,
newPod ( "other-ns-owned" , "ns2" , & metav1 . OwnerReference {
UID : "abc" ,
Kind : ownerKind ,
Name : ownerName ,
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "c" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
Kind : ownerKind ,
Name : ownerName ,
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "other-uid" , "ns1" , & metav1 . OwnerReference {
UID : "other-uid" ,
Kind : ownerKind ,
Name : ownerName ,
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "other-kind" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
Kind : "OtherKind" ,
Name : ownerName ,
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "other-name" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
Kind : ownerKind ,
Name : "otherName" ,
Controller : ptr . To ( true ) ,
} ) ,
newPod ( "non-controller" , "ns1" , & metav1 . OwnerReference {
UID : "abc" ,
Kind : ownerKind ,
Name : ownerName ,
Controller : ptr . To ( false ) ,
} ) ,
} ,
ownedOnly : true ,
wantPodsKeys : sets . New ( "ns1/c" ) ,
} ,
2025-07-01 06:35:28 -04:00
}
for name , tc := range cases {
t . Run ( name , func ( t * testing . T ) {
fakeClient := fake . NewSimpleClientset ( )
sharedInformers := informers . NewSharedInformerFactory ( fakeClient , 0 )
podInformer := sharedInformers . Core ( ) . V1 ( ) . Pods ( )
2025-10-16 12:53:01 -04:00
err := AddPodControllerIndexer ( podInformer . Informer ( ) )
2025-07-01 06:35:28 -04:00
if err != nil {
t . Fatalf ( "failed to register indexer: %v" , err )
}
podIndexer := podInformer . Informer ( ) . GetIndexer ( )
for _ , pod := range tc . allPods {
if err := podIndexer . Add ( pod ) ; err != nil {
t . Fatalf ( "failed adding Pod to indexer: %v" , err )
}
}
2025-10-16 12:53:01 -04:00
gotPods , err := FilterPodsByOwner ( podIndexer , tc . owner , ownerKind , ! tc . ownedOnly )
if err != nil {
t . Fatal ( err )
}
2025-07-01 06:35:28 -04:00
gotPodKeys := sets . New [ string ] ( )
for _ , pod := range gotPods {
gotPodKeys . Insert ( pod . Namespace + "/" + pod . Name )
}
if diff := cmp . Diff ( tc . wantPodsKeys , gotPodKeys ) ; diff != "" {
t . Errorf ( "unexpected pods returned, diff=%s" , diff )
}
} )
}
}