2017-05-30 15:15:38 -04:00
/ *
Copyright 2017 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package node
import (
"sync"
2020-06-17 11:48:42 -04:00
"time"
2017-05-30 15:15:38 -04:00
2018-08-08 08:41:00 -04:00
corev1 "k8s.io/api/core/v1"
2021-08-31 02:39:55 -04:00
"k8s.io/component-helpers/storage/ephemeral"
2018-08-09 09:27:23 -04:00
pvutil "k8s.io/kubernetes/pkg/api/v1/persistentvolume"
2018-08-08 08:41:00 -04:00
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
2017-05-30 15:15:38 -04:00
"k8s.io/kubernetes/third_party/forked/gonum/graph"
"k8s.io/kubernetes/third_party/forked/gonum/graph/simple"
)
// namedVertex implements graph.Node and remembers the type, namespace, and name of its related API object
type namedVertex struct {
name string
namespace string
id int
vertexType vertexType
}
func newNamedVertex ( vertexType vertexType , namespace , name string , id int ) * namedVertex {
return & namedVertex {
vertexType : vertexType ,
name : name ,
namespace : namespace ,
id : id ,
}
}
func ( n * namedVertex ) ID ( ) int {
return n . id
}
func ( n * namedVertex ) String ( ) string {
if len ( n . namespace ) == 0 {
return vertexTypes [ n . vertexType ] + ":" + n . name
}
return vertexTypes [ n . vertexType ] + ":" + n . namespace + "/" + n . name
}
// destinationEdge is a graph edge that includes a denormalized reference to the final destination vertex.
// This should only be used when there is a single leaf vertex reachable from T.
type destinationEdge struct {
F graph . Node
T graph . Node
Destination graph . Node
}
func newDestinationEdge ( from , to , destination graph . Node ) graph . Edge {
return & destinationEdge { F : from , T : to , Destination : destination }
}
func ( e * destinationEdge ) From ( ) graph . Node { return e . F }
func ( e * destinationEdge ) To ( ) graph . Node { return e . T }
func ( e * destinationEdge ) Weight ( ) float64 { return 0 }
func ( e * destinationEdge ) DestinationID ( ) int { return e . Destination . ID ( ) }
// Graph holds graph vertices and a way to look up a vertex for a particular API type/namespace/name.
// All edges point toward the vertices representing Kubernetes nodes:
//
// node <- pod
// pod <- secret,configmap,pvc
// pvc <- pv
// pv <- secret
type Graph struct {
lock sync . RWMutex
graph * simple . DirectedAcyclicGraph
// vertices is a map of type -> namespace -> name -> vertex
vertices map [ vertexType ] namespaceVertexMapping
2018-04-19 22:22:25 -04:00
// destinationEdgeIndex is a map of vertex -> set of destination IDs
destinationEdgeIndex map [ int ] * intSet
// destinationEdgeThreshold is the minimum number of distinct destination IDs at which to maintain an index
destinationEdgeThreshold int
2017-05-30 15:15:38 -04:00
}
// namespaceVertexMapping is a map of namespace -> name -> vertex
type namespaceVertexMapping map [ string ] nameVertexMapping
// nameVertexMapping is a map of name -> vertex
type nameVertexMapping map [ string ] * namedVertex
func NewGraph ( ) * Graph {
return & Graph {
vertices : map [ vertexType ] namespaceVertexMapping { } ,
graph : simple . NewDirectedAcyclicGraph ( 0 , 0 ) ,
2018-04-19 22:22:25 -04:00
destinationEdgeIndex : map [ int ] * intSet { } ,
// experimentally determined to be the point at which iteration adds an order of magnitude to the authz check.
// since maintaining indexes costs time/memory while processing graph changes, we don't want to make this too low.
destinationEdgeThreshold : 200 ,
2017-05-30 15:15:38 -04:00
}
}
// vertexType indicates the type of the API object the vertex represents.
// represented as a byte to minimize space used in the vertices.
type vertexType byte
const (
configMapVertexType vertexType = iota
nodeVertexType
podVertexType
pvcVertexType
pvVertexType
secretVertexType
2018-01-16 23:39:11 -05:00
vaVertexType
2017-11-02 13:26:04 -04:00
serviceAccountVertexType
2017-05-30 15:15:38 -04:00
)
var vertexTypes = map [ vertexType ] string {
2017-11-02 13:26:04 -04:00
configMapVertexType : "configmap" ,
nodeVertexType : "node" ,
podVertexType : "pod" ,
pvcVertexType : "pvc" ,
pvVertexType : "pv" ,
secretVertexType : "secret" ,
vaVertexType : "volumeattachment" ,
serviceAccountVertexType : "serviceAccount" ,
2017-05-30 15:15:38 -04:00
}
// must be called under a write lock
func ( g * Graph ) getOrCreateVertex_locked ( vertexType vertexType , namespace , name string ) * namedVertex {
if vertex , exists := g . getVertex_rlocked ( vertexType , namespace , name ) ; exists {
return vertex
}
return g . createVertex_locked ( vertexType , namespace , name )
}
// must be called under a read lock
func ( g * Graph ) getVertex_rlocked ( vertexType vertexType , namespace , name string ) ( * namedVertex , bool ) {
vertex , exists := g . vertices [ vertexType ] [ namespace ] [ name ]
return vertex , exists
}
// must be called under a write lock
func ( g * Graph ) createVertex_locked ( vertexType vertexType , namespace , name string ) * namedVertex {
typedVertices , exists := g . vertices [ vertexType ]
if ! exists {
typedVertices = namespaceVertexMapping { }
g . vertices [ vertexType ] = typedVertices
}
namespacedVertices , exists := typedVertices [ namespace ]
if ! exists {
namespacedVertices = map [ string ] * namedVertex { }
typedVertices [ namespace ] = namespacedVertices
}
vertex := newNamedVertex ( vertexType , namespace , name , g . graph . NewNodeID ( ) )
namespacedVertices [ name ] = vertex
g . graph . AddNode ( vertex )
return vertex
}
// must be called under write lock
func ( g * Graph ) deleteVertex_locked ( vertexType vertexType , namespace , name string ) {
vertex , exists := g . getVertex_rlocked ( vertexType , namespace , name )
if ! exists {
return
}
// find existing neighbors with a single edge (meaning we are their only neighbor)
neighborsToRemove := [ ] graph . Node { }
2020-02-10 13:24:27 -05:00
edgesToRemoveFromIndexes := [ ] graph . Edge { }
2017-05-30 15:15:38 -04:00
g . graph . VisitFrom ( vertex , func ( neighbor graph . Node ) bool {
// this downstream neighbor has only one edge (which must be from us), so remove them as well
if g . graph . Degree ( neighbor ) == 1 {
neighborsToRemove = append ( neighborsToRemove , neighbor )
}
return true
} )
g . graph . VisitTo ( vertex , func ( neighbor graph . Node ) bool {
if g . graph . Degree ( neighbor ) == 1 {
2018-04-19 16:59:09 -04:00
// this upstream neighbor has only one edge (which must be to us), so remove them as well
2017-05-30 15:15:38 -04:00
neighborsToRemove = append ( neighborsToRemove , neighbor )
2018-04-19 22:22:25 -04:00
} else {
2020-02-10 13:24:27 -05:00
// decrement the destination edge index on this neighbor if the edge between us was a destination edge
edgesToRemoveFromIndexes = append ( edgesToRemoveFromIndexes , g . graph . EdgeBetween ( vertex , neighbor ) )
2017-05-30 15:15:38 -04:00
}
return true
} )
// remove the vertex
2018-04-19 16:59:09 -04:00
g . removeVertex_locked ( vertex )
2017-05-30 15:15:38 -04:00
// remove neighbors that are now edgeless
for _ , neighbor := range neighborsToRemove {
2018-04-19 16:59:09 -04:00
g . removeVertex_locked ( neighbor . ( * namedVertex ) )
2017-05-30 15:15:38 -04:00
}
2018-04-19 22:22:25 -04:00
2020-02-10 13:24:27 -05:00
// remove edges from destination indexes for neighbors that dropped outbound edges
for _ , edge := range edgesToRemoveFromIndexes {
g . removeEdgeFromDestinationIndex_locked ( edge )
2018-04-19 22:22:25 -04:00
}
2017-05-30 15:15:38 -04:00
}
2018-02-20 14:28:28 -05:00
// must be called under write lock
// deletes edges from a given vertex type to a specific vertex
// will delete each orphaned "from" vertex, but will never delete the "to" vertex
func ( g * Graph ) deleteEdges_locked ( fromType , toType vertexType , toNamespace , toName string ) {
// get the "to" side
toVert , exists := g . getVertex_rlocked ( toType , toNamespace , toName )
if ! exists {
return
}
// delete all edges between vertices of fromType and toVert
2018-04-19 16:59:09 -04:00
neighborsToRemove := [ ] * namedVertex { }
2020-02-10 13:24:27 -05:00
edgesToRemove := [ ] graph . Edge { }
2018-04-19 16:59:09 -04:00
g . graph . VisitTo ( toVert , func ( from graph . Node ) bool {
fromVert := from . ( * namedVertex )
if fromVert . vertexType != fromType {
return true
2018-02-20 14:28:28 -05:00
}
2020-02-10 13:24:27 -05:00
// this neighbor has only one edge (which must be to us), so remove them as well
if g . graph . Degree ( fromVert ) == 1 {
2018-04-19 16:59:09 -04:00
neighborsToRemove = append ( neighborsToRemove , fromVert )
2018-04-19 22:22:25 -04:00
} else {
2020-02-10 13:24:27 -05:00
edgesToRemove = append ( edgesToRemove , g . graph . EdgeBetween ( from , toVert ) )
2018-04-19 16:59:09 -04:00
}
return true
} )
2018-02-20 14:28:28 -05:00
// clean up orphaned verts
2018-04-19 16:59:09 -04:00
for _ , v := range neighborsToRemove {
g . removeVertex_locked ( v )
}
2018-04-19 22:22:25 -04:00
2020-02-10 13:24:27 -05:00
// remove edges and decrement destination indexes for neighbors that dropped outbound edges
for _ , edge := range edgesToRemove {
g . graph . RemoveEdge ( edge )
g . removeEdgeFromDestinationIndex_locked ( edge )
}
}
// A fastpath for recomputeDestinationIndex_locked for "removing edge" case.
func ( g * Graph ) removeEdgeFromDestinationIndex_locked ( e graph . Edge ) {
n := e . From ( )
// don't maintain indices for nodes with few edges
edgeCount := g . graph . Degree ( n )
if edgeCount < g . destinationEdgeThreshold {
delete ( g . destinationEdgeIndex , n . ID ( ) )
return
}
// decrement the nodeID->destinationID refcount in the index, if the index exists
index := g . destinationEdgeIndex [ n . ID ( ) ]
if index == nil {
return
}
if destinationEdge , ok := e . ( * destinationEdge ) ; ok {
index . decrement ( destinationEdge . DestinationID ( ) )
2018-04-19 22:22:25 -04:00
}
2018-04-19 16:59:09 -04:00
}
2020-01-30 08:23:25 -05:00
// A fastpath for recomputeDestinationIndex_locked for "adding edge case".
func ( g * Graph ) addEdgeToDestinationIndex_locked ( e graph . Edge ) {
n := e . From ( )
index := g . destinationEdgeIndex [ n . ID ( ) ]
if index == nil {
// There is no index, use the full index computation method
g . recomputeDestinationIndex_locked ( n )
return
}
// fast-add the new edge to an existing index
if destinationEdge , ok := e . ( * destinationEdge ) ; ok {
2020-02-10 13:24:13 -05:00
index . increment ( destinationEdge . DestinationID ( ) )
2020-01-30 08:23:25 -05:00
}
}
2018-04-19 16:59:09 -04:00
// must be called under write lock
// removeVertex_locked removes the specified vertex from the graph and from the maintained indices.
// It does nothing to indexes of neighbor vertices.
func ( g * Graph ) removeVertex_locked ( v * namedVertex ) {
g . graph . RemoveNode ( v )
2018-04-19 22:22:25 -04:00
delete ( g . destinationEdgeIndex , v . ID ( ) )
2018-04-19 16:59:09 -04:00
delete ( g . vertices [ v . vertexType ] [ v . namespace ] , v . name )
if len ( g . vertices [ v . vertexType ] [ v . namespace ] ) == 0 {
delete ( g . vertices [ v . vertexType ] , v . namespace )
2018-02-20 14:28:28 -05:00
}
}
2018-04-19 22:22:25 -04:00
// must be called under write lock
// recomputeDestinationIndex_locked recomputes the index of destination ids for the specified vertex
func ( g * Graph ) recomputeDestinationIndex_locked ( n graph . Node ) {
// don't maintain indices for nodes with few edges
edgeCount := g . graph . Degree ( n )
if edgeCount < g . destinationEdgeThreshold {
delete ( g . destinationEdgeIndex , n . ID ( ) )
return
}
// get or create the index
index := g . destinationEdgeIndex [ n . ID ( ) ]
if index == nil {
index = newIntSet ( )
} else {
2020-02-10 13:24:13 -05:00
index . reset ( )
2018-04-19 22:22:25 -04:00
}
// populate the index
g . graph . VisitFrom ( n , func ( dest graph . Node ) bool {
if destinationEdge , ok := g . graph . EdgeBetween ( n , dest ) . ( * destinationEdge ) ; ok {
2020-02-10 13:24:13 -05:00
index . increment ( destinationEdge . DestinationID ( ) )
2018-04-19 22:22:25 -04:00
}
return true
} )
2020-02-10 13:24:27 -05:00
g . destinationEdgeIndex [ n . ID ( ) ] = index
2018-04-19 22:22:25 -04:00
}
2017-05-30 15:15:38 -04:00
// AddPod should only be called once spec.NodeName is populated.
// It sets up edges for the following relationships (which are immutable for a pod once bound to a node):
//
// pod -> node
//
// secret -> pod
// configmap -> pod
// pvc -> pod
2017-11-02 13:26:04 -04:00
// svcacct -> pod
2018-08-08 08:41:00 -04:00
func ( g * Graph ) AddPod ( pod * corev1 . Pod ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "AddPod" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2017-05-30 15:15:38 -04:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
g . deleteVertex_locked ( podVertexType , pod . Namespace , pod . Name )
podVertex := g . getOrCreateVertex_locked ( podVertexType , pod . Namespace , pod . Name )
nodeVertex := g . getOrCreateVertex_locked ( nodeVertexType , "" , pod . Spec . NodeName )
g . graph . SetEdge ( newDestinationEdge ( podVertex , nodeVertex , nodeVertex ) )
2018-06-06 11:34:14 -04:00
// Short-circuit adding edges to other resources for mirror pods.
// A node must never be able to create a pod that grants them permissions on other API objects.
// The NodeRestriction admission plugin prevents creation of such pods, but short-circuiting here gives us defense in depth.
2018-08-16 06:02:17 -04:00
if _ , isMirrorPod := pod . Annotations [ corev1 . MirrorPodAnnotationKey ] ; isMirrorPod {
2018-06-06 11:34:14 -04:00
return
}
2017-11-02 13:26:04 -04:00
// TODO(mikedanese): If the pod doesn't mount the service account secrets,
// should the node still get access to the service account?
//
// ref https://github.com/kubernetes/kubernetes/issues/58790
if len ( pod . Spec . ServiceAccountName ) > 0 {
2018-04-19 16:59:09 -04:00
serviceAccountVertex := g . getOrCreateVertex_locked ( serviceAccountVertexType , pod . Namespace , pod . Spec . ServiceAccountName )
2020-01-30 08:23:25 -05:00
e := newDestinationEdge ( serviceAccountVertex , podVertex , nodeVertex )
g . graph . SetEdge ( e )
g . addEdgeToDestinationIndex_locked ( e )
2017-11-02 13:26:04 -04:00
}
2017-05-30 15:15:38 -04:00
podutil . VisitPodSecretNames ( pod , func ( secret string ) bool {
2018-04-19 16:59:09 -04:00
secretVertex := g . getOrCreateVertex_locked ( secretVertexType , pod . Namespace , secret )
2020-01-30 08:23:25 -05:00
e := newDestinationEdge ( secretVertex , podVertex , nodeVertex )
g . graph . SetEdge ( e )
g . addEdgeToDestinationIndex_locked ( e )
2017-05-30 15:15:38 -04:00
return true
} )
podutil . VisitPodConfigmapNames ( pod , func ( configmap string ) bool {
2018-04-19 16:59:09 -04:00
configmapVertex := g . getOrCreateVertex_locked ( configMapVertexType , pod . Namespace , configmap )
2020-01-30 08:23:25 -05:00
e := newDestinationEdge ( configmapVertex , podVertex , nodeVertex )
g . graph . SetEdge ( e )
g . addEdgeToDestinationIndex_locked ( e )
2017-05-30 15:15:38 -04:00
return true
} )
for _ , v := range pod . Spec . Volumes {
2020-06-08 04:31:38 -04:00
claimName := ""
2017-05-30 15:15:38 -04:00
if v . PersistentVolumeClaim != nil {
2020-06-08 04:31:38 -04:00
claimName = v . PersistentVolumeClaim . ClaimName
2021-10-11 09:57:25 -04:00
} else if v . Ephemeral != nil {
2021-08-31 02:39:55 -04:00
claimName = ephemeral . VolumeClaimName ( pod , & v )
2020-06-08 04:31:38 -04:00
}
if claimName != "" {
pvcVertex := g . getOrCreateVertex_locked ( pvcVertexType , pod . Namespace , claimName )
2020-01-30 08:23:25 -05:00
e := newDestinationEdge ( pvcVertex , podVertex , nodeVertex )
g . graph . SetEdge ( e )
g . addEdgeToDestinationIndex_locked ( e )
2017-05-30 15:15:38 -04:00
}
}
}
func ( g * Graph ) DeletePod ( name , namespace string ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "DeletePod" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2017-05-30 15:15:38 -04:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
g . deleteVertex_locked ( podVertexType , namespace , name )
}
// AddPV sets up edges for the following relationships:
//
// secret -> pv
//
// pv -> pvc
2018-08-09 09:27:23 -04:00
func ( g * Graph ) AddPV ( pv * corev1 . PersistentVolume ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "AddPV" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2017-05-30 15:15:38 -04:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
// clear existing edges
g . deleteVertex_locked ( pvVertexType , "" , pv . Name )
// if we have a pvc, establish new edges
if pv . Spec . ClaimRef != nil {
pvVertex := g . getOrCreateVertex_locked ( pvVertexType , "" , pv . Name )
// since we don't know the other end of the pvc -> pod -> node chain (or it may not even exist yet), we can't decorate these edges with kubernetes node info
g . graph . SetEdge ( simple . Edge { F : pvVertex , T : g . getOrCreateVertex_locked ( pvcVertexType , pv . Spec . ClaimRef . Namespace , pv . Spec . ClaimRef . Name ) } )
2018-02-23 16:50:43 -05:00
pvutil . VisitPVSecretNames ( pv , func ( namespace , secret string , kubeletVisible bool ) bool {
2017-05-30 15:15:38 -04:00
// This grants access to the named secret in the same namespace as the bound PVC
2018-02-23 16:50:43 -05:00
if kubeletVisible {
g . graph . SetEdge ( simple . Edge { F : g . getOrCreateVertex_locked ( secretVertexType , namespace , secret ) , T : pvVertex } )
}
2017-05-30 15:15:38 -04:00
return true
} )
}
}
func ( g * Graph ) DeletePV ( name string ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "DeletePV" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2017-05-30 15:15:38 -04:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
g . deleteVertex_locked ( pvVertexType , "" , name )
}
2018-01-16 23:39:11 -05:00
// AddVolumeAttachment sets up edges for the following relationships:
//
// volume attachment -> node
func ( g * Graph ) AddVolumeAttachment ( attachmentName , nodeName string ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "AddVolumeAttachment" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2018-01-16 23:39:11 -05:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
// clear existing edges
g . deleteVertex_locked ( vaVertexType , "" , attachmentName )
// if we have a node, establish new edges
if len ( nodeName ) > 0 {
vaVertex := g . getOrCreateVertex_locked ( vaVertexType , "" , attachmentName )
nodeVertex := g . getOrCreateVertex_locked ( nodeVertexType , "" , nodeName )
g . graph . SetEdge ( newDestinationEdge ( vaVertex , nodeVertex , nodeVertex ) )
}
}
func ( g * Graph ) DeleteVolumeAttachment ( name string ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "DeleteVolumeAttachment" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2018-01-16 23:39:11 -05:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
g . deleteVertex_locked ( vaVertexType , "" , name )
}
2018-02-20 14:28:28 -05:00
2018-01-29 12:32:48 -05:00
// SetNodeConfigMap sets up edges for the Node.Spec.ConfigSource.ConfigMap relationship:
2018-02-20 14:28:28 -05:00
//
// configmap -> node
func ( g * Graph ) SetNodeConfigMap ( nodeName , configMapName , configMapNamespace string ) {
2020-06-17 11:48:42 -04:00
start := time . Now ( )
defer func ( ) {
graphActionsDuration . WithLabelValues ( "SetNodeConfigMap" ) . Observe ( time . Since ( start ) . Seconds ( ) )
} ( )
2018-02-20 14:28:28 -05:00
g . lock . Lock ( )
defer g . lock . Unlock ( )
// TODO(mtaufen): ensure len(nodeName) > 0 in all cases (would sure be nice to have a dependently-typed language here...)
// clear edges configmaps -> node where the destination is the current node *only*
// at present, a node can only have one *direct* configmap reference at a time
g . deleteEdges_locked ( configMapVertexType , nodeVertexType , "" , nodeName )
// establish new edges if we have a real ConfigMap to reference
if len ( configMapName ) > 0 && len ( configMapNamespace ) > 0 {
configmapVertex := g . getOrCreateVertex_locked ( configMapVertexType , configMapNamespace , configMapName )
nodeVertex := g . getOrCreateVertex_locked ( nodeVertexType , "" , nodeName )
2020-02-10 13:23:50 -05:00
e := newDestinationEdge ( configmapVertex , nodeVertex , nodeVertex )
g . graph . SetEdge ( e )
g . addEdgeToDestinationIndex_locked ( e )
2018-02-20 14:28:28 -05:00
}
}