2014-06-28 18:35:51 -04:00
/ *
2016-06-02 20:25:58 -04:00
Copyright 2014 The Kubernetes Authors .
2014-06-28 18:35:51 -04:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2015-05-08 07:01:09 -04:00
package predicates
2014-06-28 18:35:51 -04:00
import (
2016-10-11 09:31:47 -04:00
"errors"
2014-09-26 19:28:30 -04:00
"fmt"
2016-06-10 20:15:50 -04:00
"math/rand"
"strconv"
2016-07-21 10:16:24 -04:00
"sync"
2016-06-10 20:15:50 -04:00
"time"
2014-09-26 19:28:30 -04:00
2016-05-04 02:50:31 -04:00
"github.com/golang/glog"
2017-02-21 15:00:57 -05:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-01-11 09:09:48 -05:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2017-01-27 10:20:40 -05:00
"k8s.io/client-go/util/workqueue"
2016-11-18 15:52:35 -05:00
"k8s.io/kubernetes/pkg/api/v1"
2017-02-21 15:00:57 -05:00
corelisters "k8s.io/kubernetes/pkg/client/listers/core/v1"
2016-06-20 21:28:42 -04:00
"k8s.io/kubernetes/pkg/kubelet/qos"
2015-08-05 18:03:47 -04:00
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
2016-05-04 02:50:31 -04:00
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
2016-01-28 15:14:45 -05:00
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
2014-06-28 18:35:51 -04:00
)
2016-10-12 12:03:01 -04:00
// predicatePrecomputations: Helper types/variables...
type PredicateMetadataModifier func ( pm * predicateMetadata )
var predicatePrecomputeRegisterLock sync . Mutex
var predicatePrecomputations map [ string ] PredicateMetadataModifier = make ( map [ string ] PredicateMetadataModifier )
func RegisterPredicatePrecomputation ( predicateName string , precomp PredicateMetadataModifier ) {
predicatePrecomputeRegisterLock . Lock ( )
defer predicatePrecomputeRegisterLock . Unlock ( )
predicatePrecomputations [ predicateName ] = precomp
}
// Other types for predicate functions...
2014-09-25 16:55:42 -04:00
type NodeInfo interface {
2016-11-18 15:52:35 -05:00
GetNodeInfo ( nodeID string ) ( * v1 . Node , error )
2014-09-26 19:28:30 -04:00
}
2015-11-29 14:00:49 -05:00
type PersistentVolumeInfo interface {
2016-11-18 15:52:35 -05:00
GetPersistentVolumeInfo ( pvID string ) ( * v1 . PersistentVolume , error )
2015-11-29 14:00:49 -05:00
}
2017-02-21 15:00:57 -05:00
// CachedPersistentVolumeInfo implements PersistentVolumeInfo
type CachedPersistentVolumeInfo struct {
corelisters . PersistentVolumeLister
}
func ( c * CachedPersistentVolumeInfo ) GetPersistentVolumeInfo ( pvID string ) ( * v1 . PersistentVolume , error ) {
return c . Get ( pvID )
}
2015-11-29 14:00:49 -05:00
type PersistentVolumeClaimInfo interface {
2016-11-18 15:52:35 -05:00
GetPersistentVolumeClaimInfo ( namespace string , name string ) ( * v1 . PersistentVolumeClaim , error )
2016-10-14 14:26:25 -04:00
}
// CachedPersistentVolumeClaimInfo implements PersistentVolumeClaimInfo
type CachedPersistentVolumeClaimInfo struct {
2017-02-21 15:00:57 -05:00
corelisters . PersistentVolumeClaimLister
2016-10-14 14:26:25 -04:00
}
// GetPersistentVolumeClaimInfo fetches the claim in specified namespace with specified name
2016-11-18 15:52:35 -05:00
func ( c * CachedPersistentVolumeClaimInfo ) GetPersistentVolumeClaimInfo ( namespace string , name string ) ( * v1 . PersistentVolumeClaim , error ) {
2016-10-14 14:26:25 -04:00
return c . PersistentVolumeClaims ( namespace ) . Get ( name )
2015-11-29 14:00:49 -05:00
}
2015-11-26 03:57:26 -05:00
type CachedNodeInfo struct {
2017-02-21 15:00:57 -05:00
corelisters . NodeLister
2015-11-26 03:57:26 -05:00
}
// GetNodeInfo returns cached data for the node 'id'.
2016-11-18 15:52:35 -05:00
func ( c * CachedNodeInfo ) GetNodeInfo ( id string ) ( * v1 . Node , error ) {
2017-02-21 15:00:57 -05:00
node , err := c . Get ( id )
2015-11-26 03:57:26 -05:00
2017-02-21 15:00:57 -05:00
if apierrors . IsNotFound ( err ) {
return nil , fmt . Errorf ( "node '%v' not found" , id )
2015-11-26 03:57:26 -05:00
}
2017-02-21 15:00:57 -05:00
if err != nil {
return nil , fmt . Errorf ( "error retrieving node '%v' from cache: %v" , id , err )
2015-11-26 03:57:26 -05:00
}
2017-02-21 15:00:57 -05:00
return node , nil
2015-11-26 03:57:26 -05:00
}
2016-11-23 05:15:17 -05:00
// Note that predicateMetadata and matchingPodAntiAffinityTerm need to be declared in the same file
2016-10-12 12:03:01 -04:00
// due to the way declarations are processed in predicate declaration unit tests.
2016-07-21 10:16:24 -04:00
type matchingPodAntiAffinityTerm struct {
2016-11-18 15:52:35 -05:00
term * v1 . PodAffinityTerm
node * v1 . Node
2016-07-21 10:16:24 -04:00
}
2016-10-12 12:03:01 -04:00
type predicateMetadata struct {
2016-11-18 15:52:35 -05:00
pod * v1 . Pod
2016-10-12 12:03:01 -04:00
podBestEffort bool
podRequest * schedulercache . Resource
podPorts map [ int ] bool
matchingAntiAffinityTerms [ ] matchingPodAntiAffinityTerm
2016-11-18 15:52:35 -05:00
serviceAffinityMatchingPodList [ ] * v1 . Pod
serviceAffinityMatchingPodServices [ ] * v1 . Service
2016-07-08 02:12:44 -04:00
}
2016-11-18 15:52:35 -05:00
func isVolumeConflict ( volume v1 . Volume , pod * v1 . Pod ) bool {
2015-12-09 14:45:56 -05:00
// fast path if there is no conflict checking targets.
2016-12-09 11:17:13 -05:00
if volume . GCEPersistentDisk == nil && volume . AWSElasticBlockStore == nil && volume . RBD == nil && volume . ISCSI == nil {
2015-12-09 14:45:56 -05:00
return false
}
2015-12-09 14:19:57 -05:00
for _ , existingVolume := range pod . Spec . Volumes {
2015-12-09 14:45:56 -05:00
// Same GCE disk mounted by multiple pods conflicts unless all pods mount it read-only.
2015-12-09 14:19:57 -05:00
if volume . GCEPersistentDisk != nil && existingVolume . GCEPersistentDisk != nil {
disk , existingDisk := volume . GCEPersistentDisk , existingVolume . GCEPersistentDisk
if disk . PDName == existingDisk . PDName && ! ( disk . ReadOnly && existingDisk . ReadOnly ) {
2015-03-06 09:26:39 -05:00
return true
}
}
2014-10-13 00:34:23 -04:00
2015-12-09 14:19:57 -05:00
if volume . AWSElasticBlockStore != nil && existingVolume . AWSElasticBlockStore != nil {
if volume . AWSElasticBlockStore . VolumeID == existingVolume . AWSElasticBlockStore . VolumeID {
2015-03-06 09:26:39 -05:00
return true
}
2014-10-13 00:34:23 -04:00
}
2015-12-09 14:19:57 -05:00
2016-12-09 11:17:13 -05:00
if volume . ISCSI != nil && existingVolume . ISCSI != nil {
2017-02-15 00:45:22 -05:00
iqn := volume . ISCSI . IQN
eiqn := existingVolume . ISCSI . IQN
// two ISCSI volumes are same, if they share the same iqn. As iscsi volumes are of type
2016-12-09 11:17:13 -05:00
// RWO or ROX, we could permit only one RW mount. Same iscsi volume mounted by multiple Pods
// conflict unless all other pods mount as read only.
2017-02-15 00:45:22 -05:00
if iqn == eiqn && ! ( volume . ISCSI . ReadOnly && existingVolume . ISCSI . ReadOnly ) {
2016-12-09 11:17:13 -05:00
return true
}
}
2015-12-09 14:19:57 -05:00
if volume . RBD != nil && existingVolume . RBD != nil {
mon , pool , image := volume . RBD . CephMonitors , volume . RBD . RBDPool , volume . RBD . RBDImage
emon , epool , eimage := existingVolume . RBD . CephMonitors , existingVolume . RBD . RBDPool , existingVolume . RBD . RBDImage
2016-07-25 11:29:07 -04:00
// two RBDs images are the same if they share the same Ceph monitor, are in the same RADOS Pool, and have the same image name
// only one read-write mount is permitted for the same RBD image.
// same RBD image mounted by multiple Pods conflicts unless all Pods mount the image read-only
if haveSame ( mon , emon ) && pool == epool && image == eimage && ! ( volume . RBD . ReadOnly && existingVolume . RBD . ReadOnly ) {
2015-12-09 14:19:57 -05:00
return true
2015-10-20 13:24:23 -04:00
}
}
}
2015-12-09 14:19:57 -05:00
2014-10-13 00:34:23 -04:00
return false
}
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that
2015-10-22 09:28:30 -04:00
// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
2015-11-02 10:18:39 -05:00
// can't be scheduled there.
// This is GCE, Amazon EBS, and Ceph RBD specific for now:
// - GCE PD allows multiple mounts as long as they're all read-only
// - AWS EBS forbids any two pods mounting the same volume ID
// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image.
2016-12-09 11:17:13 -05:00
// - ISCSI forbids if any two pods share at least same IQN, LUN and Target
2014-10-13 00:34:23 -04:00
// TODO: migrate this into some per-volume specific code?
2016-11-18 15:52:35 -05:00
func NoDiskConflict ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2015-12-09 14:19:57 -05:00
for _ , v := range pod . Spec . Volumes {
2016-01-28 15:14:45 -05:00
for _ , ev := range nodeInfo . Pods ( ) {
2015-12-09 14:19:57 -05:00
if isVolumeConflict ( v , ev ) {
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrDiskConflict } , nil
2014-10-13 00:34:23 -04:00
}
}
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2014-10-13 00:34:23 -04:00
}
2016-01-14 15:45:08 -05:00
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
maxVolumes int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
}
// VolumeFilter contains information on how to filter PD Volumes when checking PD Volume caps
type VolumeFilter struct {
// Filter normal volumes
2016-11-18 15:52:35 -05:00
FilterVolume func ( vol * v1 . Volume ) ( id string , relevant bool )
FilterPersistentVolume func ( pv * v1 . PersistentVolume ) ( id string , relevant bool )
2016-01-14 15:45:08 -05:00
}
// NewMaxPDVolumeCountPredicate creates a predicate which evaluates whether a pod can fit based on the
// number of volumes which match a filter that it requests, and those that are already present. The
// maximum number is configurable to accommodate different systems.
//
// The predicate looks for both volumes used directly, as well as PVC volumes that are backed by relevant volume
// types, counts the number of unique volumes, and rejects the new pod if it would place the total count over
// the maximum.
func NewMaxPDVolumeCountPredicate ( filter VolumeFilter , maxVolumes int , pvInfo PersistentVolumeInfo , pvcInfo PersistentVolumeClaimInfo ) algorithm . FitPredicate {
c := & MaxPDVolumeCountChecker {
filter : filter ,
maxVolumes : maxVolumes ,
pvInfo : pvInfo ,
pvcInfo : pvcInfo ,
}
return c . predicate
}
2016-11-18 15:52:35 -05:00
func ( c * MaxPDVolumeCountChecker ) filterVolumes ( volumes [ ] v1 . Volume , namespace string , filteredVolumes map [ string ] bool ) error {
2016-12-27 09:49:04 -05:00
for i := range volumes {
vol := & volumes [ i ]
if id , ok := c . filter . FilterVolume ( vol ) ; ok {
2016-01-14 15:45:08 -05:00
filteredVolumes [ id ] = true
} else if vol . PersistentVolumeClaim != nil {
pvcName := vol . PersistentVolumeClaim . ClaimName
if pvcName == "" {
2016-05-17 10:01:37 -04:00
return fmt . Errorf ( "PersistentVolumeClaim had no name" )
2016-01-14 15:45:08 -05:00
}
pvc , err := c . pvcInfo . GetPersistentVolumeClaimInfo ( namespace , pvcName )
if err != nil {
2016-06-10 20:15:50 -04:00
// if the PVC is not found, log the error and count the PV towards the PV limit
// generate a random volume ID since its required for de-dup
2016-06-13 16:44:12 -04:00
utilruntime . HandleError ( fmt . Errorf ( "Unable to look up PVC info for %s/%s, assuming PVC matches predicate when counting limits: %v" , namespace , pvcName , err ) )
2016-06-10 20:15:50 -04:00
source := rand . NewSource ( time . Now ( ) . UnixNano ( ) )
generatedID := "missingPVC" + strconv . Itoa ( rand . New ( source ) . Intn ( 1000000 ) )
filteredVolumes [ generatedID ] = true
return nil
2016-01-14 15:45:08 -05:00
}
2016-06-03 01:02:26 -04:00
if pvc == nil {
return fmt . Errorf ( "PersistentVolumeClaim not found: %q" , pvcName )
}
2016-01-14 15:45:08 -05:00
pvName := pvc . Spec . VolumeName
if pvName == "" {
return fmt . Errorf ( "PersistentVolumeClaim is not bound: %q" , pvcName )
}
pv , err := c . pvInfo . GetPersistentVolumeInfo ( pvName )
if err != nil {
2016-06-10 20:15:50 -04:00
// if the PV is not found, log the error
// and count the PV towards the PV limit
2017-03-30 22:17:19 -04:00
// generate a random volume ID since it is required for de-dup
2016-06-13 16:44:12 -04:00
utilruntime . HandleError ( fmt . Errorf ( "Unable to look up PV info for %s/%s/%s, assuming PV matches predicate when counting limits: %v" , namespace , pvcName , pvName , err ) )
2016-06-10 20:15:50 -04:00
source := rand . NewSource ( time . Now ( ) . UnixNano ( ) )
generatedID := "missingPV" + strconv . Itoa ( rand . New ( source ) . Intn ( 1000000 ) )
filteredVolumes [ generatedID ] = true
return nil
2016-01-14 15:45:08 -05:00
}
2016-06-03 01:02:26 -04:00
if pv == nil {
return fmt . Errorf ( "PersistentVolume not found: %q" , pvName )
}
2016-01-14 15:45:08 -05:00
if id , ok := c . filter . FilterPersistentVolume ( pv ) ; ok {
filteredVolumes [ id ] = true
}
}
}
return nil
}
2016-11-18 15:52:35 -05:00
func ( c * MaxPDVolumeCountChecker ) predicate ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-07-07 05:55:41 -04:00
// If a pod doesn't have any volume attached to it, the predicate will always be true.
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
if len ( pod . Spec . Volumes ) == 0 {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-07-07 05:55:41 -04:00
}
2016-01-14 15:45:08 -05:00
newVolumes := make ( map [ string ] bool )
if err := c . filterVolumes ( pod . Spec . Volumes , pod . Namespace , newVolumes ) ; err != nil {
2016-08-09 08:01:46 -04:00
return false , nil , err
2016-01-14 15:45:08 -05:00
}
// quick return
if len ( newVolumes ) == 0 {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-01-14 15:45:08 -05:00
}
// count unique volumes
existingVolumes := make ( map [ string ] bool )
2016-01-28 15:14:45 -05:00
for _ , existingPod := range nodeInfo . Pods ( ) {
2016-01-14 15:45:08 -05:00
if err := c . filterVolumes ( existingPod . Spec . Volumes , existingPod . Namespace , existingVolumes ) ; err != nil {
2016-08-09 08:01:46 -04:00
return false , nil , err
2016-01-14 15:45:08 -05:00
}
}
numExistingVolumes := len ( existingVolumes )
// filter out already-mounted volumes
for k := range existingVolumes {
if _ , ok := newVolumes [ k ] ; ok {
delete ( newVolumes , k )
}
}
numNewVolumes := len ( newVolumes )
if numExistingVolumes + numNewVolumes > c . maxVolumes {
2016-01-05 20:10:59 -05:00
// violates MaxEBSVolumeCount or MaxGCEPDVolumeCount
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrMaxVolumeCountExceeded } , nil
2016-01-14 15:45:08 -05:00
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-01-14 15:45:08 -05:00
}
// EBSVolumeFilter is a VolumeFilter for filtering AWS ElasticBlockStore Volumes
var EBSVolumeFilter VolumeFilter = VolumeFilter {
2016-11-18 15:52:35 -05:00
FilterVolume : func ( vol * v1 . Volume ) ( string , bool ) {
2016-01-14 15:45:08 -05:00
if vol . AWSElasticBlockStore != nil {
return vol . AWSElasticBlockStore . VolumeID , true
}
return "" , false
} ,
2016-11-18 15:52:35 -05:00
FilterPersistentVolume : func ( pv * v1 . PersistentVolume ) ( string , bool ) {
2016-01-14 15:45:08 -05:00
if pv . Spec . AWSElasticBlockStore != nil {
return pv . Spec . AWSElasticBlockStore . VolumeID , true
}
return "" , false
} ,
}
// GCEPDVolumeFilter is a VolumeFilter for filtering GCE PersistentDisk Volumes
var GCEPDVolumeFilter VolumeFilter = VolumeFilter {
2016-11-18 15:52:35 -05:00
FilterVolume : func ( vol * v1 . Volume ) ( string , bool ) {
2016-01-14 15:45:08 -05:00
if vol . GCEPersistentDisk != nil {
return vol . GCEPersistentDisk . PDName , true
}
return "" , false
} ,
2016-11-18 15:52:35 -05:00
FilterPersistentVolume : func ( pv * v1 . PersistentVolume ) ( string , bool ) {
2016-01-14 15:45:08 -05:00
if pv . Spec . GCEPersistentDisk != nil {
return pv . Spec . GCEPersistentDisk . PDName , true
}
return "" , false
} ,
}
2017-02-02 11:32:02 -05:00
// AzureDiskVolumeFilter is a VolumeFilter for filtering Azure Disk Volumes
var AzureDiskVolumeFilter VolumeFilter = VolumeFilter {
FilterVolume : func ( vol * v1 . Volume ) ( string , bool ) {
if vol . AzureDisk != nil {
return vol . AzureDisk . DiskName , true
}
return "" , false
} ,
FilterPersistentVolume : func ( pv * v1 . PersistentVolume ) ( string , bool ) {
if pv . Spec . AzureDisk != nil {
return pv . Spec . AzureDisk . DiskName , true
}
return "" , false
} ,
}
2015-11-29 14:00:49 -05:00
type VolumeZoneChecker struct {
2016-04-21 04:24:12 -04:00
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
2015-11-29 14:00:49 -05:00
}
// VolumeZonePredicate evaluates if a pod can fit due to the volumes it requests, given
// that some volumes may have zone scheduling constraints. The requirement is that any
// volume zone-labels must match the equivalent zone-labels on the node. It is OK for
// the node to have more zone-label constraints (for example, a hypothetical replicated
// volume might allow region-wide access)
//
// Currently this is only supported with PersistentVolumeClaims, and looks to the labels
// only on the bound PersistentVolume.
//
// Working with volumes declared inline in the pod specification (i.e. not
// using a PersistentVolume) is likely to be harder, as it would require
// determining the zone of a volume during scheduling, and that is likely to
// require calling out to the cloud provider. It seems that we are moving away
// from inline volume declarations anyway.
2016-04-21 04:24:12 -04:00
func NewVolumeZonePredicate ( pvInfo PersistentVolumeInfo , pvcInfo PersistentVolumeClaimInfo ) algorithm . FitPredicate {
2015-11-29 14:00:49 -05:00
c := & VolumeZoneChecker {
2016-04-21 04:24:12 -04:00
pvInfo : pvInfo ,
pvcInfo : pvcInfo ,
2015-11-29 14:00:49 -05:00
}
return c . predicate
}
2016-11-18 15:52:35 -05:00
func ( c * VolumeZoneChecker ) predicate ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-07-07 05:55:41 -04:00
// If a pod doesn't have any volume attached to it, the predicate will always be true.
// Thus we make a fast path for it, to avoid unnecessary computations in this case.
if len ( pod . Spec . Volumes ) == 0 {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-07-07 05:55:41 -04:00
}
2016-04-21 04:24:12 -04:00
node := nodeInfo . Node ( )
2015-11-29 14:00:49 -05:00
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2015-11-29 14:00:49 -05:00
}
nodeConstraints := make ( map [ string ] string )
for k , v := range node . ObjectMeta . Labels {
2016-12-03 13:57:26 -05:00
if k != metav1 . LabelZoneFailureDomain && k != metav1 . LabelZoneRegion {
2015-11-29 14:00:49 -05:00
continue
}
nodeConstraints [ k ] = v
}
if len ( nodeConstraints ) == 0 {
// The node has no zone constraints, so we're OK to schedule.
// In practice, when using zones, all nodes must be labeled with zone labels.
// We want to fast-path this case though.
2016-08-09 08:01:46 -04:00
return true , nil , nil
2015-11-29 14:00:49 -05:00
}
namespace := pod . Namespace
manifest := & ( pod . Spec )
for i := range manifest . Volumes {
volume := & manifest . Volumes [ i ]
if volume . PersistentVolumeClaim != nil {
pvcName := volume . PersistentVolumeClaim . ClaimName
if pvcName == "" {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "PersistentVolumeClaim had no name" )
2015-11-29 14:00:49 -05:00
}
pvc , err := c . pvcInfo . GetPersistentVolumeClaimInfo ( namespace , pvcName )
if err != nil {
2016-08-09 08:01:46 -04:00
return false , nil , err
2015-11-29 14:00:49 -05:00
}
if pvc == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "PersistentVolumeClaim was not found: %q" , pvcName )
2015-11-29 14:00:49 -05:00
}
pvName := pvc . Spec . VolumeName
if pvName == "" {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "PersistentVolumeClaim is not bound: %q" , pvcName )
2015-11-29 14:00:49 -05:00
}
pv , err := c . pvInfo . GetPersistentVolumeInfo ( pvName )
if err != nil {
2016-08-09 08:01:46 -04:00
return false , nil , err
2015-11-29 14:00:49 -05:00
}
if pv == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "PersistentVolume not found: %q" , pvName )
2015-11-29 14:00:49 -05:00
}
for k , v := range pv . ObjectMeta . Labels {
2016-12-03 13:57:26 -05:00
if k != metav1 . LabelZoneFailureDomain && k != metav1 . LabelZoneRegion {
2015-11-29 14:00:49 -05:00
continue
}
nodeV , _ := nodeConstraints [ k ]
if v != nodeV {
2016-11-28 10:31:49 -05:00
glog . V ( 10 ) . Infof ( "Won't schedule pod %q onto node %q due to volume %q (mismatch on %q)" , pod . Name , node . Name , pvName , k )
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrVolumeZoneConflict } , nil
2015-11-29 14:00:49 -05:00
}
}
}
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2015-11-29 14:00:49 -05:00
}
2017-03-06 20:42:55 -05:00
// Returns a *schedulercache.Resource that covers the largest width in each
// resource dimension. Because init-containers run sequentially, we collect the
// max in each dimension iteratively. In contrast, we sum the resource vectors
// for regular containers since they run simultaneously.
//
// Example:
//
// Pod:
// InitContainers
// IC1:
// CPU: 2
// Memory: 1G
// IC2:
// CPU: 2
// Memory: 3G
// Containers
// C1:
// CPU: 2
// Memory: 1G
// C2:
// CPU: 1
// Memory: 1G
//
// Result: CPU: 3, Memory: 3G
2016-11-18 15:52:35 -05:00
func GetResourceRequest ( pod * v1 . Pod ) * schedulercache . Resource {
2016-07-19 06:21:09 -04:00
result := schedulercache . Resource { }
2015-07-30 15:59:22 -04:00
for _ , container := range pod . Spec . Containers {
2016-09-26 11:11:31 -04:00
for rName , rQuantity := range container . Resources . Requests {
switch rName {
2016-11-18 15:52:35 -05:00
case v1 . ResourceMemory :
2016-09-26 11:11:31 -04:00
result . Memory += rQuantity . Value ( )
2016-11-18 15:52:35 -05:00
case v1 . ResourceCPU :
2016-09-26 11:11:31 -04:00
result . MilliCPU += rQuantity . MilliValue ( )
2016-11-18 15:52:35 -05:00
case v1 . ResourceNvidiaGPU :
2016-09-26 11:11:31 -04:00
result . NvidiaGPU += rQuantity . Value ( )
default :
2016-11-18 15:52:35 -05:00
if v1 . IsOpaqueIntResourceName ( rName ) {
2016-11-11 23:19:26 -05:00
result . AddOpaque ( rName , rQuantity . Value ( ) )
2016-09-26 11:11:31 -04:00
}
}
}
2014-09-25 16:55:42 -04:00
}
2016-04-08 11:20:24 -04:00
// take max_resource(sum_pod, any_init_container)
for _ , container := range pod . Spec . InitContainers {
2016-09-26 11:11:31 -04:00
for rName , rQuantity := range container . Resources . Requests {
switch rName {
2016-11-18 15:52:35 -05:00
case v1 . ResourceMemory :
2016-09-26 11:11:31 -04:00
if mem := rQuantity . Value ( ) ; mem > result . Memory {
result . Memory = mem
}
2016-11-18 15:52:35 -05:00
case v1 . ResourceCPU :
2016-09-26 11:11:31 -04:00
if cpu := rQuantity . MilliValue ( ) ; cpu > result . MilliCPU {
result . MilliCPU = cpu
}
2016-11-18 15:52:35 -05:00
case v1 . ResourceNvidiaGPU :
2016-09-26 11:11:31 -04:00
if gpu := rQuantity . Value ( ) ; gpu > result . NvidiaGPU {
result . NvidiaGPU = gpu
}
default :
2016-11-18 15:52:35 -05:00
if v1 . IsOpaqueIntResourceName ( rName ) {
2016-09-26 11:11:31 -04:00
value := rQuantity . Value ( )
if value > result . OpaqueIntResources [ rName ] {
2017-03-06 20:42:55 -05:00
result . SetOpaque ( rName , value )
2016-09-26 11:11:31 -04:00
}
}
}
2016-04-08 11:20:24 -04:00
}
}
2016-07-08 08:59:32 -04:00
return & result
2014-09-25 16:55:42 -04:00
}
2016-11-18 15:52:35 -05:00
func podName ( pod * v1 . Pod ) string {
2015-10-19 18:00:41 -04:00
return pod . Namespace + "/" + pod . Name
}
2016-11-18 15:52:35 -05:00
func PodFitsResources ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-04-21 04:24:12 -04:00
node := nodeInfo . Node ( )
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2016-04-21 04:24:12 -04:00
}
2016-08-09 08:01:46 -04:00
var predicateFails [ ] algorithm . PredicateFailureReason
2016-07-11 03:46:04 -04:00
allowedPodNumber := nodeInfo . AllowedPodNumber ( )
if len ( nodeInfo . Pods ( ) ) + 1 > allowedPodNumber {
2016-11-18 15:52:35 -05:00
predicateFails = append ( predicateFails , NewInsufficientResourceError ( v1 . ResourcePods , 1 , int64 ( len ( nodeInfo . Pods ( ) ) ) , int64 ( allowedPodNumber ) ) )
2016-04-22 12:58:49 -04:00
}
2016-07-08 08:59:32 -04:00
2016-07-19 06:21:09 -04:00
var podRequest * schedulercache . Resource
2016-07-12 05:43:54 -04:00
if predicateMeta , ok := meta . ( * predicateMetadata ) ; ok {
2016-07-08 08:59:32 -04:00
podRequest = predicateMeta . podRequest
} else {
// We couldn't parse metadata - fallback to computing it.
2016-10-11 11:13:35 -04:00
podRequest = GetResourceRequest ( pod )
2016-07-08 08:59:32 -04:00
}
2016-09-26 11:11:31 -04:00
if podRequest . MilliCPU == 0 && podRequest . Memory == 0 && podRequest . NvidiaGPU == 0 && len ( podRequest . OpaqueIntResources ) == 0 {
2016-08-09 08:01:46 -04:00
return len ( predicateFails ) == 0 , predicateFails , nil
2015-12-09 16:24:54 -05:00
}
2016-07-12 10:30:26 -04:00
allocatable := nodeInfo . AllocatableResource ( )
2016-07-19 06:21:09 -04:00
if allocatable . MilliCPU < podRequest . MilliCPU + nodeInfo . RequestedResource ( ) . MilliCPU {
2016-11-18 15:52:35 -05:00
predicateFails = append ( predicateFails , NewInsufficientResourceError ( v1 . ResourceCPU , podRequest . MilliCPU , nodeInfo . RequestedResource ( ) . MilliCPU , allocatable . MilliCPU ) )
2015-07-23 21:27:29 -04:00
}
2016-07-19 06:21:09 -04:00
if allocatable . Memory < podRequest . Memory + nodeInfo . RequestedResource ( ) . Memory {
2016-11-18 15:52:35 -05:00
predicateFails = append ( predicateFails , NewInsufficientResourceError ( v1 . ResourceMemory , podRequest . Memory , nodeInfo . RequestedResource ( ) . Memory , allocatable . Memory ) )
2014-09-25 16:55:42 -04:00
}
2016-07-19 06:21:09 -04:00
if allocatable . NvidiaGPU < podRequest . NvidiaGPU + nodeInfo . RequestedResource ( ) . NvidiaGPU {
2016-11-18 15:52:35 -05:00
predicateFails = append ( predicateFails , NewInsufficientResourceError ( v1 . ResourceNvidiaGPU , podRequest . NvidiaGPU , nodeInfo . RequestedResource ( ) . NvidiaGPU , allocatable . NvidiaGPU ) )
2016-04-26 20:54:19 -04:00
}
2016-09-26 11:11:31 -04:00
for rName , rQuant := range podRequest . OpaqueIntResources {
if allocatable . OpaqueIntResources [ rName ] < rQuant + nodeInfo . RequestedResource ( ) . OpaqueIntResources [ rName ] {
predicateFails = append ( predicateFails , NewInsufficientResourceError ( rName , podRequest . OpaqueIntResources [ rName ] , nodeInfo . RequestedResource ( ) . OpaqueIntResources [ rName ] , allocatable . OpaqueIntResources [ rName ] ) )
}
}
2016-11-22 05:14:12 -05:00
if glog . V ( 10 ) && len ( predicateFails ) == 0 {
2016-07-11 03:46:04 -04:00
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog . Infof ( "Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods." ,
podName ( pod ) , node . Name , len ( nodeInfo . Pods ( ) ) , allowedPodNumber )
}
2016-08-09 08:01:46 -04:00
return len ( predicateFails ) == 0 , predicateFails , nil
2014-09-25 16:55:42 -04:00
}
2016-02-22 22:53:10 -05:00
// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms,
2016-08-09 08:01:46 -04:00
// terms are ORed, and an empty list of terms will match nothing.
2016-11-18 15:52:35 -05:00
func nodeMatchesNodeSelectorTerms ( node * v1 . Node , nodeSelectorTerms [ ] v1 . NodeSelectorTerm ) bool {
2016-01-26 18:03:18 -05:00
for _ , req := range nodeSelectorTerms {
2016-11-18 15:52:35 -05:00
nodeSelector , err := v1 . NodeSelectorRequirementsAsSelector ( req . MatchExpressions )
2016-01-26 18:03:18 -05:00
if err != nil {
glog . V ( 10 ) . Infof ( "Failed to parse MatchExpressions: %+v, regarding as not match." , req . MatchExpressions )
return false
}
if nodeSelector . Matches ( labels . Set ( node . Labels ) ) {
return true
}
}
return false
}
// The pod can only schedule onto nodes that satisfy requirements in both NodeAffinity and nodeSelector.
2016-11-18 15:52:35 -05:00
func podMatchesNodeLabels ( pod * v1 . Pod , node * v1 . Node ) bool {
2016-01-26 18:03:18 -05:00
// Check if node.Labels match pod.Spec.NodeSelector.
if len ( pod . Spec . NodeSelector ) > 0 {
selector := labels . SelectorFromSet ( pod . Spec . NodeSelector )
if ! selector . Matches ( labels . Set ( node . Labels ) ) {
return false
}
}
// 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes)
// 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes
// 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity
// 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes
// 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity
// 6. non-nil empty NodeSelectorRequirement is not allowed
nodeAffinityMatches := true
2017-02-16 12:38:03 -05:00
affinity := schedulercache . ReconcileAffinity ( pod )
2016-07-22 06:48:35 -04:00
if affinity != nil && affinity . NodeAffinity != nil {
2016-01-26 18:03:18 -05:00
nodeAffinity := affinity . NodeAffinity
// if no required NodeAffinity requirements, will do no-op, means select all nodes.
2016-02-11 02:06:33 -05:00
// TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution.
if nodeAffinity . RequiredDuringSchedulingIgnoredDuringExecution == nil {
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution == nil && nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
2016-01-26 18:03:18 -05:00
return true
}
// Match node selector for requiredDuringSchedulingRequiredDuringExecution.
2016-02-11 02:06:33 -05:00
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
// if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution != nil {
// nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution.NodeSelectorTerms
// glog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms)
2016-02-22 22:53:10 -05:00
// nodeAffinityMatches = nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
2016-02-11 02:06:33 -05:00
// }
// Match node selector for requiredDuringSchedulingIgnoredDuringExecution.
2016-01-26 18:03:18 -05:00
if nodeAffinity . RequiredDuringSchedulingIgnoredDuringExecution != nil {
nodeSelectorTerms := nodeAffinity . RequiredDuringSchedulingIgnoredDuringExecution . NodeSelectorTerms
glog . V ( 10 ) . Infof ( "Match for RequiredDuringSchedulingIgnoredDuringExecution node selector terms %+v" , nodeSelectorTerms )
2016-02-22 22:53:10 -05:00
nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms ( node , nodeSelectorTerms )
2016-01-26 18:03:18 -05:00
}
2015-03-20 12:52:32 -04:00
}
2016-01-26 18:03:18 -05:00
return nodeAffinityMatches
2015-03-20 12:52:32 -04:00
}
2016-11-18 15:52:35 -05:00
func PodSelectorMatches ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-04-21 04:24:12 -04:00
node := nodeInfo . Node ( )
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2014-10-21 20:13:52 -04:00
}
2016-07-08 05:03:51 -04:00
if podMatchesNodeLabels ( pod , node ) {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-01-05 20:10:59 -05:00
}
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrNodeSelectorNotMatch } , nil
2014-10-21 20:13:52 -04:00
}
2016-11-18 15:52:35 -05:00
func PodFitsHost ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2015-05-22 19:40:57 -04:00
if len ( pod . Spec . NodeName ) == 0 {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2014-12-18 17:12:58 -05:00
}
2016-04-28 10:51:17 -04:00
node := nodeInfo . Node ( )
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2016-04-28 10:51:17 -04:00
}
if pod . Spec . NodeName == node . Name {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-01-05 20:10:59 -05:00
}
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrPodNotMatchHostName } , nil
2014-12-18 17:12:58 -05:00
}
2014-12-22 16:54:41 -05:00
type NodeLabelChecker struct {
labels [ ] string
presence bool
}
2016-04-21 04:24:12 -04:00
func NewNodeLabelPredicate ( labels [ ] string , presence bool ) algorithm . FitPredicate {
2014-12-22 16:54:41 -05:00
labelChecker := & NodeLabelChecker {
labels : labels ,
presence : presence ,
}
return labelChecker . CheckNodeLabelPresence
}
2015-09-10 04:40:22 -04:00
// CheckNodeLabelPresence checks whether all of the specified labels exists on a node or not, regardless of their value
// If "presence" is false, then returns false if any of the requested labels matches any of the node's labels,
2015-01-05 17:51:22 -05:00
// otherwise returns true.
2015-09-10 04:40:22 -04:00
// If "presence" is true, then returns false if any of the requested labels does not match any of the node's labels,
2015-01-05 17:51:22 -05:00
// otherwise returns true.
//
2015-09-10 04:40:22 -04:00
// Consider the cases where the nodes are placed in regions/zones/racks and these are identified by labels
// In some cases, it is required that only nodes that are part of ANY of the defined regions/zones/racks be selected
2014-12-22 16:54:41 -05:00
//
2015-09-10 04:40:22 -04:00
// Alternately, eliminating nodes that have a certain label, regardless of value, is also useful
// A node may have a label with "retiring" as key and the date as the value
// and it may be desirable to avoid scheduling new pods on this node
2016-11-18 15:52:35 -05:00
func ( n * NodeLabelChecker ) CheckNodeLabelPresence ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-04-21 04:24:12 -04:00
node := nodeInfo . Node ( )
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2014-12-22 16:54:41 -05:00
}
2016-04-21 04:24:12 -04:00
var exists bool
2015-09-10 04:40:22 -04:00
nodeLabels := labels . Set ( node . Labels )
2014-12-22 16:54:41 -05:00
for _ , label := range n . labels {
2015-09-10 04:40:22 -04:00
exists = nodeLabels . Has ( label )
2014-12-22 16:54:41 -05:00
if ( exists && ! n . presence ) || ( ! exists && n . presence ) {
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrNodeLabelPresenceViolated } , nil
2014-12-22 16:54:41 -05:00
}
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2014-12-22 16:54:41 -05:00
}
2014-12-22 18:55:31 -05:00
type ServiceAffinity struct {
2015-05-08 07:01:09 -04:00
podLister algorithm . PodLister
serviceLister algorithm . ServiceLister
2014-12-22 18:55:31 -05:00
nodeInfo NodeInfo
labels [ ] string
}
2016-10-12 12:03:01 -04:00
// serviceAffinityPrecomputation should be run once by the scheduler before looping through the Predicate. It is a helper function that
// only should be referenced by NewServiceAffinityPredicate.
func ( s * ServiceAffinity ) serviceAffinityPrecomputation ( pm * predicateMetadata ) {
if pm . pod == nil {
2017-02-07 09:52:35 -05:00
glog . Errorf ( "Cannot precompute service affinity, a pod is required to calculate service affinity." )
2016-10-12 12:03:01 -04:00
return
}
var errSvc , errList error
// Store services which match the pod.
pm . serviceAffinityMatchingPodServices , errSvc = s . serviceLister . GetPodServices ( pm . pod )
selector := CreateSelectorFromLabels ( pm . pod . Labels )
// consider only the pods that belong to the same namespace
allMatches , errList := s . podLister . List ( selector )
// In the future maybe we will return them as part of the function.
if errSvc != nil || errList != nil {
glog . Errorf ( "Some Error were found while precomputing svc affinity: \nservices:%v , \npods:%v" , errSvc , errList )
}
pm . serviceAffinityMatchingPodList = FilterPodsByNamespace ( allMatches , pm . pod . Namespace )
}
func NewServiceAffinityPredicate ( podLister algorithm . PodLister , serviceLister algorithm . ServiceLister , nodeInfo NodeInfo , labels [ ] string ) ( algorithm . FitPredicate , PredicateMetadataModifier ) {
2014-12-22 18:55:31 -05:00
affinity := & ServiceAffinity {
podLister : podLister ,
serviceLister : serviceLister ,
nodeInfo : nodeInfo ,
labels : labels ,
}
2016-10-12 12:03:01 -04:00
return affinity . checkServiceAffinity , affinity . serviceAffinityPrecomputation
2014-12-22 18:55:31 -05:00
}
2016-10-12 12:03:01 -04:00
// checkServiceAffinity is a predicate which matches nodes in such a way to force that
// ServiceAffinity.labels are homogenous for pods that are scheduled to a node.
// (i.e. it returns true IFF this pod can be added to this node such that all other pods in
// the same service are running on nodes with
2016-10-12 12:03:01 -04:00
// the exact same ServiceAffinity.label values).
2015-01-08 01:18:22 -05:00
//
2016-11-02 05:17:35 -04:00
// For example:
// If the first pod of a service was scheduled to a node with label "region=foo",
// all the other subsequent pods belong to the same service will be schedule on
// nodes with the same "region=foo" label.
//
2016-10-12 12:03:01 -04:00
// Details:
//
// If (the svc affinity labels are not a subset of pod's label selectors )
// The pod has all information necessary to check affinity, the pod's label selector is sufficient to calculate
// the match.
// Otherwise:
2016-11-01 06:34:42 -04:00
// Create an "implicit selector" which guarantees pods will land on nodes with similar values
2016-10-12 12:03:01 -04:00
// for the affinity labels.
//
2016-10-12 12:03:01 -04:00
// To do this, we "reverse engineer" a selector by introspecting existing pods running under the same service+namespace.
// These backfilled labels in the selector "L" are defined like so:
// - L is a label that the ServiceAffinity object needs as a matching constraints.
// - L is not defined in the pod itself already.
// - and SOME pod, from a service, in the same namespace, ALREADY scheduled onto a node, has a matching value.
2016-10-12 12:03:01 -04:00
//
2016-11-01 06:34:42 -04:00
// WARNING: This Predicate is NOT guaranteed to work if some of the predicateMetadata data isn't precomputed...
// For that reason it is not exported, i.e. it is highly coupled to the implementation of the FitPredicate construction.
2016-11-18 15:52:35 -05:00
func ( s * ServiceAffinity ) checkServiceAffinity ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
var services [ ] * v1 . Service
var pods [ ] * v1 . Pod
2016-10-12 12:03:01 -04:00
if pm , ok := meta . ( * predicateMetadata ) ; ok && ( pm . serviceAffinityMatchingPodList != nil || pm . serviceAffinityMatchingPodServices != nil ) {
services = pm . serviceAffinityMatchingPodServices
pods = pm . serviceAffinityMatchingPodList
} else {
// Make the predicate resilient in case metadata is missing.
pm = & predicateMetadata { pod : pod }
s . serviceAffinityPrecomputation ( pm )
pods , services = pm . serviceAffinityMatchingPodList , pm . serviceAffinityMatchingPodServices
}
2016-07-08 04:54:43 -04:00
node := nodeInfo . Node ( )
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2016-07-08 04:54:43 -04:00
}
2015-01-05 17:51:22 -05:00
// check if the pod being scheduled has the affinity labels specified in its NodeSelector
2016-10-12 12:03:01 -04:00
affinityLabels := FindLabelsInSet ( s . labels , labels . Set ( pod . Spec . NodeSelector ) )
2016-10-12 12:03:01 -04:00
// Step 1: If we don't have all constraints, introspect nodes to find the missing constraints.
2016-10-12 12:03:01 -04:00
if len ( s . labels ) > len ( affinityLabels ) {
2016-10-12 12:03:01 -04:00
if len ( services ) > 0 {
if len ( pods ) > 0 {
nodeWithAffinityLabels , err := s . nodeInfo . GetNodeInfo ( pods [ 0 ] . Spec . NodeName )
2014-12-22 18:55:31 -05:00
if err != nil {
2016-08-09 08:01:46 -04:00
return false , nil , err
2014-12-22 18:55:31 -05:00
}
2016-10-12 12:03:01 -04:00
AddUnsetLabelsToMap ( affinityLabels , s . labels , labels . Set ( nodeWithAffinityLabels . Labels ) )
2014-12-22 18:55:31 -05:00
}
}
}
2016-10-12 12:03:01 -04:00
// Step 2: Finally complete the affinity predicate based on whatever set of predicates we were able to find.
2016-10-12 12:03:01 -04:00
if CreateSelectorFromLabels ( affinityLabels ) . Matches ( labels . Set ( node . Labels ) ) {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-01-05 20:10:59 -05:00
}
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrServiceAffinityViolated } , nil
2014-12-22 18:55:31 -05:00
}
2016-11-18 15:52:35 -05:00
func PodFitsHostPorts ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-07-12 05:43:54 -04:00
var wantPorts map [ int ] bool
if predicateMeta , ok := meta . ( * predicateMetadata ) ; ok {
wantPorts = predicateMeta . podPorts
} else {
// We couldn't parse metadata - fallback to computing it.
2016-10-11 11:13:35 -04:00
wantPorts = GetUsedPorts ( pod )
2016-07-12 05:43:54 -04:00
}
2016-01-09 23:03:52 -05:00
if len ( wantPorts ) == 0 {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-01-09 23:03:52 -05:00
}
2016-07-12 05:43:54 -04:00
2017-03-04 08:21:06 -05:00
existingPorts := nodeInfo . UsedPorts ( )
2014-11-05 00:21:26 -05:00
for wport := range wantPorts {
2016-07-12 05:43:54 -04:00
if wport != 0 && existingPorts [ wport ] {
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrPodNotFitsHostPorts } , nil
2014-09-23 19:14:54 -04:00
}
2014-06-28 18:35:51 -04:00
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2014-06-28 18:35:51 -04:00
}
2016-11-18 15:52:35 -05:00
func GetUsedPorts ( pods ... * v1 . Pod ) map [ int ] bool {
2014-11-05 00:21:26 -05:00
ports := make ( map [ int ] bool )
for _ , pod := range pods {
2016-07-11 04:32:29 -04:00
for j := range pod . Spec . Containers {
container := & pod . Spec . Containers [ j ]
for k := range container . Ports {
podPort := & container . Ports [ k ]
2016-04-21 04:24:12 -04:00
// "0" is explicitly ignored in PodFitsHostPorts,
// which is the only function that uses this value.
if podPort . HostPort != 0 {
2016-04-27 00:35:14 -04:00
ports [ int ( podPort . HostPort ) ] = true
2016-04-21 04:24:12 -04:00
}
2014-06-28 18:35:51 -04:00
}
}
}
2014-11-05 00:21:26 -05:00
return ports
2014-06-28 18:35:51 -04:00
}
2015-10-20 13:24:23 -04:00
// search two arrays and return true if they have at least one common element; return false otherwise
func haveSame ( a1 , a2 [ ] string ) bool {
for _ , val1 := range a1 {
for _ , val2 := range a2 {
if val1 == val2 {
return true
}
}
}
return false
}
2016-01-05 20:10:59 -05:00
2016-11-18 15:52:35 -05:00
func GeneralPredicates ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2017-02-23 20:14:46 -05:00
var predicateFails [ ] algorithm . PredicateFailureReason
fit , reasons , err := noncriticalPredicates ( pod , meta , nodeInfo )
if err != nil {
return false , predicateFails , err
}
if ! fit {
predicateFails = append ( predicateFails , reasons ... )
}
fit , reasons , err = EssentialPredicates ( pod , meta , nodeInfo )
if err != nil {
return false , predicateFails , err
}
if ! fit {
predicateFails = append ( predicateFails , reasons ... )
}
return len ( predicateFails ) == 0 , predicateFails , nil
}
// noncriticalPredicates are the predicates that only non-critical pods need
func noncriticalPredicates ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-08-09 08:01:46 -04:00
var predicateFails [ ] algorithm . PredicateFailureReason
fit , reasons , err := PodFitsResources ( pod , meta , nodeInfo )
if err != nil {
return false , predicateFails , err
}
2016-01-05 20:10:59 -05:00
if ! fit {
2016-08-09 08:01:46 -04:00
predicateFails = append ( predicateFails , reasons ... )
2016-01-05 20:10:59 -05:00
}
2016-04-22 12:58:49 -04:00
2017-02-23 20:14:46 -05:00
return len ( predicateFails ) == 0 , predicateFails , nil
}
// EssentialPredicates are the predicates that all pods, including critical pods, need
func EssentialPredicates ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
var predicateFails [ ] algorithm . PredicateFailureReason
fit , reasons , err := PodFitsHost ( pod , meta , nodeInfo )
2016-08-09 08:01:46 -04:00
if err != nil {
return false , predicateFails , err
}
2016-01-05 20:10:59 -05:00
if ! fit {
2016-08-09 08:01:46 -04:00
predicateFails = append ( predicateFails , reasons ... )
}
2017-02-23 20:14:46 -05:00
// TODO: PodFitsHostPorts is essential for now, but kubelet should ideally
// preempt pods to free up host ports too
2016-08-09 08:01:46 -04:00
fit , reasons , err = PodFitsHostPorts ( pod , meta , nodeInfo )
if err != nil {
return false , predicateFails , err
2016-01-05 20:10:59 -05:00
}
if ! fit {
2016-08-09 08:01:46 -04:00
predicateFails = append ( predicateFails , reasons ... )
}
fit , reasons , err = PodSelectorMatches ( pod , meta , nodeInfo )
if err != nil {
return false , predicateFails , err
2016-01-05 20:10:59 -05:00
}
2016-04-21 04:24:12 -04:00
if ! fit {
2016-08-09 08:01:46 -04:00
predicateFails = append ( predicateFails , reasons ... )
2016-01-05 20:10:59 -05:00
}
2016-08-09 08:01:46 -04:00
return len ( predicateFails ) == 0 , predicateFails , nil
2016-01-05 20:10:59 -05:00
}
2016-05-04 02:50:31 -04:00
type PodAffinityChecker struct {
2016-10-11 09:31:47 -04:00
info NodeInfo
podLister algorithm . PodLister
2016-05-04 02:50:31 -04:00
}
2016-10-11 09:31:47 -04:00
func NewPodAffinityPredicate ( info NodeInfo , podLister algorithm . PodLister ) algorithm . FitPredicate {
2016-05-04 02:50:31 -04:00
checker := & PodAffinityChecker {
2016-10-11 09:31:47 -04:00
info : info ,
podLister : podLister ,
2016-05-04 02:50:31 -04:00
}
return checker . InterPodAffinityMatches
}
2016-11-18 15:52:35 -05:00
func ( c * PodAffinityChecker ) InterPodAffinityMatches ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-04-28 10:51:17 -04:00
node := nodeInfo . Node ( )
if node == nil {
2016-08-09 08:01:46 -04:00
return false , nil , fmt . Errorf ( "node not found" )
2016-05-04 02:50:31 -04:00
}
2016-07-21 10:16:24 -04:00
if ! c . satisfiesExistingPodsAntiAffinity ( pod , meta , node ) {
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrPodAffinityNotMatch } , nil
2016-05-04 02:50:31 -04:00
}
2016-07-21 10:16:24 -04:00
// Now check if <pod> requirements will be satisfied on this node.
2017-02-16 12:38:03 -05:00
affinity := schedulercache . ReconcileAffinity ( pod )
2016-07-21 10:16:24 -04:00
if affinity == nil || ( affinity . PodAffinity == nil && affinity . PodAntiAffinity == nil ) {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-07-22 06:48:35 -04:00
}
2016-07-21 10:16:24 -04:00
if ! c . satisfiesPodsAffinityAntiAffinity ( pod , node , affinity ) {
2016-08-09 08:01:46 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrPodAffinityNotMatch } , nil
2016-05-04 02:50:31 -04:00
}
2016-07-18 08:30:52 -04:00
2016-07-21 10:16:24 -04:00
if glog . V ( 10 ) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog . Infof ( "Schedule Pod %+v on Node %+v is allowed, pod (anti)affinity constraints satisfied" ,
podName ( pod ) , node . Name )
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-05-04 02:50:31 -04:00
}
2016-10-11 09:31:47 -04:00
// anyPodMatchesPodAffinityTerm checks if any of given pods can match the specific podAffinityTerm.
2016-07-18 08:30:52 -04:00
// First return value indicates whether a matching pod exists on a node that matches the topology key,
// while the second return value indicates whether a matching pod exists anywhere.
// TODO: Do we really need any pod matching, or all pods matching? I think the latter.
2016-11-18 15:52:35 -05:00
func ( c * PodAffinityChecker ) anyPodMatchesPodAffinityTerm ( pod * v1 . Pod , allPods [ ] * v1 . Pod , node * v1 . Node , term * v1 . PodAffinityTerm ) ( bool , bool , error ) {
2016-10-11 09:31:47 -04:00
if len ( term . TopologyKey ) == 0 {
return false , false , errors . New ( "Empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity" )
}
2016-07-18 08:30:52 -04:00
matchingPodExists := false
2016-12-08 05:58:13 -05:00
namespaces := priorityutil . GetNamespacesFromPodAffinityTerm ( pod , term )
selector , err := metav1 . LabelSelectorAsSelector ( term . LabelSelector )
if err != nil {
return false , false , err
}
2016-07-21 10:16:24 -04:00
for _ , existingPod := range allPods {
2016-12-08 05:58:13 -05:00
match := priorityutil . PodMatchesTermsNamespaceAndSelector ( existingPod , namespaces , selector )
2016-07-18 08:30:52 -04:00
if match {
matchingPodExists = true
2016-07-21 10:16:24 -04:00
existingPodNode , err := c . info . GetNodeInfo ( existingPod . Spec . NodeName )
if err != nil {
return false , matchingPodExists , err
}
2016-10-11 09:31:47 -04:00
if priorityutil . NodesHaveSameTopologyKey ( node , existingPodNode , term . TopologyKey ) {
2016-07-18 08:30:52 -04:00
return true , matchingPodExists , nil
}
}
}
return false , matchingPodExists , nil
}
2016-11-18 15:52:35 -05:00
func getPodAffinityTerms ( podAffinity * v1 . PodAffinity ) ( terms [ ] v1 . PodAffinityTerm ) {
2016-07-18 08:30:52 -04:00
if podAffinity != nil {
if len ( podAffinity . RequiredDuringSchedulingIgnoredDuringExecution ) != 0 {
terms = podAffinity . RequiredDuringSchedulingIgnoredDuringExecution
}
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(podAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, podAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
2016-05-04 02:50:31 -04:00
}
2016-07-18 08:30:52 -04:00
return terms
}
2016-11-18 15:52:35 -05:00
func getPodAntiAffinityTerms ( podAntiAffinity * v1 . PodAntiAffinity ) ( terms [ ] v1 . PodAffinityTerm ) {
2016-07-18 08:30:52 -04:00
if podAntiAffinity != nil {
if len ( podAntiAffinity . RequiredDuringSchedulingIgnoredDuringExecution ) != 0 {
terms = podAntiAffinity . RequiredDuringSchedulingIgnoredDuringExecution
}
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, podAntiAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
}
return terms
2016-05-04 02:50:31 -04:00
}
2016-11-18 15:52:35 -05:00
func getMatchingAntiAffinityTerms ( pod * v1 . Pod , nodeInfoMap map [ string ] * schedulercache . NodeInfo ) ( [ ] matchingPodAntiAffinityTerm , error ) {
2016-07-21 10:16:24 -04:00
allNodeNames := make ( [ ] string , 0 , len ( nodeInfoMap ) )
for name := range nodeInfoMap {
allNodeNames = append ( allNodeNames , name )
}
var lock sync . Mutex
var result [ ] matchingPodAntiAffinityTerm
var firstError error
appendResult := func ( toAppend [ ] matchingPodAntiAffinityTerm ) {
lock . Lock ( )
defer lock . Unlock ( )
result = append ( result , toAppend ... )
}
catchError := func ( err error ) {
lock . Lock ( )
defer lock . Unlock ( )
if firstError == nil {
firstError = err
2016-05-04 02:50:31 -04:00
}
2016-07-21 10:16:24 -04:00
}
2016-05-04 02:50:31 -04:00
2016-07-21 10:16:24 -04:00
processNode := func ( i int ) {
nodeInfo := nodeInfoMap [ allNodeNames [ i ] ]
node := nodeInfo . Node ( )
if node == nil {
catchError ( fmt . Errorf ( "node not found" ) )
return
}
var nodeResult [ ] matchingPodAntiAffinityTerm
for _ , existingPod := range nodeInfo . PodsWithAffinity ( ) {
2017-02-16 12:38:03 -05:00
affinity := schedulercache . ReconcileAffinity ( existingPod )
2016-07-21 10:16:24 -04:00
if affinity == nil {
continue
2016-05-04 02:50:31 -04:00
}
2016-07-21 10:16:24 -04:00
for _ , term := range getPodAntiAffinityTerms ( affinity . PodAntiAffinity ) {
2016-12-08 05:58:13 -05:00
namespaces := priorityutil . GetNamespacesFromPodAffinityTerm ( pod , & term )
selector , err := metav1 . LabelSelectorAsSelector ( term . LabelSelector )
2016-07-21 10:16:24 -04:00
if err != nil {
catchError ( err )
return
}
2016-12-08 05:58:13 -05:00
match := priorityutil . PodMatchesTermsNamespaceAndSelector ( pod , namespaces , selector )
2016-07-21 10:16:24 -04:00
if match {
nodeResult = append ( nodeResult , matchingPodAntiAffinityTerm { term : & term , node : node } )
}
}
}
if len ( nodeResult ) > 0 {
appendResult ( nodeResult )
2016-05-04 02:50:31 -04:00
}
}
2016-07-21 10:16:24 -04:00
workqueue . Parallelize ( 16 , len ( allNodeNames ) , processNode )
return result , firstError
2016-05-04 02:50:31 -04:00
}
2016-11-18 15:52:35 -05:00
func ( c * PodAffinityChecker ) getMatchingAntiAffinityTerms ( pod * v1 . Pod , allPods [ ] * v1 . Pod ) ( [ ] matchingPodAntiAffinityTerm , error ) {
2016-07-21 10:16:24 -04:00
var result [ ] matchingPodAntiAffinityTerm
for _ , existingPod := range allPods {
2017-02-16 12:38:03 -05:00
affinity := schedulercache . ReconcileAffinity ( existingPod )
2016-08-22 13:10:49 -04:00
if affinity != nil && affinity . PodAntiAffinity != nil {
2016-07-21 10:16:24 -04:00
existingPodNode , err := c . info . GetNodeInfo ( existingPod . Spec . NodeName )
if err != nil {
return nil , err
}
for _ , term := range getPodAntiAffinityTerms ( affinity . PodAntiAffinity ) {
2016-12-08 05:58:13 -05:00
namespaces := priorityutil . GetNamespacesFromPodAffinityTerm ( existingPod , & term )
selector , err := metav1 . LabelSelectorAsSelector ( term . LabelSelector )
2016-07-21 10:16:24 -04:00
if err != nil {
return nil , err
}
2016-12-08 05:58:13 -05:00
match := priorityutil . PodMatchesTermsNamespaceAndSelector ( pod , namespaces , selector )
2016-07-21 10:16:24 -04:00
if match {
result = append ( result , matchingPodAntiAffinityTerm { term : & term , node : existingPodNode } )
}
}
2016-05-04 02:50:31 -04:00
}
}
2016-07-21 10:16:24 -04:00
return result , nil
}
2016-05-04 02:50:31 -04:00
2016-07-21 10:16:24 -04:00
// Checks if scheduling the pod onto this node would break any anti-affinity
// rules indicated by the existing pods.
2016-11-18 15:52:35 -05:00
func ( c * PodAffinityChecker ) satisfiesExistingPodsAntiAffinity ( pod * v1 . Pod , meta interface { } , node * v1 . Node ) bool {
2016-07-21 10:16:24 -04:00
var matchingTerms [ ] matchingPodAntiAffinityTerm
if predicateMeta , ok := meta . ( * predicateMetadata ) ; ok {
matchingTerms = predicateMeta . matchingAntiAffinityTerms
} else {
allPods , err := c . podLister . List ( labels . Everything ( ) )
2016-05-04 02:50:31 -04:00
if err != nil {
2016-07-21 10:16:24 -04:00
glog . V ( 10 ) . Infof ( "Failed to get all pods, %+v" , err )
2016-05-04 02:50:31 -04:00
return false
}
2016-07-21 10:16:24 -04:00
if matchingTerms , err = c . getMatchingAntiAffinityTerms ( pod , allPods ) ; err != nil {
glog . V ( 10 ) . Infof ( "Failed to get all terms that pod %+v matches, err: %+v" , podName ( pod ) , err )
return false
2016-07-22 06:48:35 -04:00
}
2016-07-21 10:16:24 -04:00
}
for _ , term := range matchingTerms {
2016-10-11 09:31:47 -04:00
if len ( term . term . TopologyKey ) == 0 {
glog . V ( 10 ) . Infof ( "Empty topologyKey is not allowed except for PreferredDuringScheduling pod anti-affinity" )
return false
}
if priorityutil . NodesHaveSameTopologyKey ( node , term . node , term . term . TopologyKey ) {
2016-07-21 10:16:24 -04:00
glog . V ( 10 ) . Infof ( "Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v" ,
podName ( pod ) , node . Name , term . term )
return false
}
}
if glog . V ( 10 ) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog . Infof ( "Schedule Pod %+v on Node %+v is allowed, existing pods anti-affinity rules satisfied." ,
podName ( pod ) , node . Name )
}
return true
}
// Checks if scheduling the pod onto this node would break any rules of this pod.
2016-11-18 15:52:35 -05:00
func ( c * PodAffinityChecker ) satisfiesPodsAffinityAntiAffinity ( pod * v1 . Pod , node * v1 . Node , affinity * v1 . Affinity ) bool {
2016-07-21 10:16:24 -04:00
allPods , err := c . podLister . List ( labels . Everything ( ) )
if err != nil {
return false
}
// Check all affinity terms.
for _ , term := range getPodAffinityTerms ( affinity . PodAffinity ) {
termMatches , matchingPodExists , err := c . anyPodMatchesPodAffinityTerm ( pod , allPods , node , & term )
2016-07-18 08:30:52 -04:00
if err != nil {
2016-07-21 10:16:24 -04:00
glog . V ( 10 ) . Infof ( "Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v" ,
podName ( pod ) , node . Name , term , err )
2016-07-18 08:30:52 -04:00
return false
}
2016-07-21 10:16:24 -04:00
if ! termMatches {
2016-11-01 06:34:42 -04:00
// If the requirement matches a pod's own labels are namespace, and there are
2016-07-21 10:16:24 -04:00
// no other such pods, then disregard the requirement. This is necessary to
// not block forever because the first pod of the collection can't be scheduled.
2016-12-11 10:49:45 -05:00
if matchingPodExists {
glog . V ( 10 ) . Infof ( "Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v" ,
podName ( pod ) , node . Name , term , err )
return false
}
2016-12-08 05:58:13 -05:00
namespaces := priorityutil . GetNamespacesFromPodAffinityTerm ( pod , & term )
selector , err := metav1 . LabelSelectorAsSelector ( term . LabelSelector )
if err != nil {
glog . V ( 10 ) . Infof ( "Cannot parse selector on term %v for pod %v. Details %v" ,
term , podName ( pod ) , err )
return false
}
match := priorityutil . PodMatchesTermsNamespaceAndSelector ( pod , namespaces , selector )
2016-12-11 10:49:45 -05:00
if ! match {
2016-07-21 10:16:24 -04:00
glog . V ( 10 ) . Infof ( "Cannot schedule pod %+v onto node %v,because of PodAffinityTerm %v, err: %v" ,
podName ( pod ) , node . Name , term , err )
2016-07-18 08:30:52 -04:00
return false
2016-05-04 02:50:31 -04:00
}
}
}
2016-07-21 10:16:24 -04:00
// Check all anti-affinity terms.
for _ , term := range getPodAntiAffinityTerms ( affinity . PodAntiAffinity ) {
termMatches , _ , err := c . anyPodMatchesPodAffinityTerm ( pod , allPods , node , & term )
if err != nil || termMatches {
glog . V ( 10 ) . Infof ( "Cannot schedule pod %+v onto node %v,because of PodAntiAffinityTerm %v, err: %v" ,
podName ( pod ) , node . Name , term , err )
return false
}
}
if glog . V ( 10 ) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog . Infof ( "Schedule Pod %+v on Node %+v is allowed, pod afinnity/anti-affinity constraints satisfied." ,
podName ( pod ) , node . Name )
}
2016-05-04 02:50:31 -04:00
return true
}
2016-11-18 15:52:35 -05:00
func PodToleratesNodeTaints ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-10-21 05:33:09 -04:00
taints , err := nodeInfo . Taints ( )
2016-03-30 23:42:57 -04:00
if err != nil {
2016-08-09 08:01:46 -04:00
return false , nil , err
2016-03-30 23:42:57 -04:00
}
2017-02-20 11:43:05 -05:00
if v1 . TolerationsTolerateTaintsWithFilter ( pod . Spec . Tolerations , taints , func ( t * v1 . Taint ) bool {
2017-02-21 13:03:13 -05:00
// PodToleratesNodeTaints is only interested in NoSchedule and NoExecute taints.
2017-02-07 07:47:57 -05:00
return t . Effect == v1 . TaintEffectNoSchedule || t . Effect == v1 . TaintEffectNoExecute
2017-01-14 03:18:14 -05:00
} ) {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-03-30 23:42:57 -04:00
}
2016-10-07 15:10:55 -04:00
return false , [ ] algorithm . PredicateFailureReason { ErrTaintsTolerationsNotMatch } , nil
2016-03-30 23:42:57 -04:00
}
2016-05-12 08:01:33 -04:00
// Determine if a pod is scheduled with best-effort QoS
2016-11-18 15:52:35 -05:00
func isPodBestEffort ( pod * v1 . Pod ) bool {
2016-12-19 16:02:01 -05:00
return qos . GetPodQOS ( pod ) == v1 . PodQOSBestEffort
2016-05-12 08:01:33 -04:00
}
// CheckNodeMemoryPressurePredicate checks if a pod can be scheduled on a node
// reporting memory pressure condition.
2016-11-18 15:52:35 -05:00
func CheckNodeMemoryPressurePredicate ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-07-08 02:25:49 -04:00
var podBestEffort bool
2016-07-21 10:16:24 -04:00
if predicateMeta , ok := meta . ( * predicateMetadata ) ; ok {
2016-07-08 02:25:49 -04:00
podBestEffort = predicateMeta . podBestEffort
} else {
// We couldn't parse metadata - fallback to computing it.
podBestEffort = isPodBestEffort ( pod )
}
2016-05-12 08:01:33 -04:00
// pod is not BestEffort pod
2016-07-08 02:25:49 -04:00
if ! podBestEffort {
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-05-12 08:01:33 -04:00
}
2016-10-21 05:23:22 -04:00
// is node under presure?
if nodeInfo . MemoryPressureCondition ( ) == v1 . ConditionTrue {
return false , [ ] algorithm . PredicateFailureReason { ErrNodeUnderMemoryPressure } , nil
2016-05-12 08:01:33 -04:00
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-05-12 08:01:33 -04:00
}
2016-07-22 15:23:34 -04:00
// CheckNodeDiskPressurePredicate checks if a pod can be scheduled on a node
// reporting disk pressure condition.
2016-11-18 15:52:35 -05:00
func CheckNodeDiskPressurePredicate ( pod * v1 . Pod , meta interface { } , nodeInfo * schedulercache . NodeInfo ) ( bool , [ ] algorithm . PredicateFailureReason , error ) {
2016-10-21 05:23:22 -04:00
// is node under presure?
if nodeInfo . DiskPressureCondition ( ) == v1 . ConditionTrue {
return false , [ ] algorithm . PredicateFailureReason { ErrNodeUnderDiskPressure } , nil
2016-07-22 15:23:34 -04:00
}
2016-08-09 08:01:46 -04:00
return true , nil , nil
2016-07-22 15:23:34 -04:00
}