mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-13 15:59:57 -05:00
DRA upgrade/downgrade: split out individual test steps
This approach with collecting results from callbacks in a main ginkgo.It and using them as failures in separate ginkgo.It callbacks might be the best that can be done with Ginkgo. A better solution is probably Go unit tests with sub-tests.
This commit is contained in:
parent
47b613eded
commit
65ef31973c
3 changed files with 430 additions and 249 deletions
74
test/e2e_dra/coredra_test.go
Normal file
74
test/e2e_dra/coredra_test.go
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2edra
|
||||
|
||||
import (
|
||||
"github.com/onsi/gomega"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
func coreDRA(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder) step2Func {
|
||||
namespace := f.Namespace.Name
|
||||
claim := b.ExternalClaim()
|
||||
pod := b.PodExternal()
|
||||
b.Create(tCtx, claim, pod)
|
||||
b.TestPod(tCtx, f, pod)
|
||||
|
||||
return func(tCtx ktesting.TContext) step3Func {
|
||||
// Remove pod prepared in step 1.
|
||||
framework.ExpectNoError(f.ClientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}))
|
||||
framework.ExpectNoError(f.ClientSet.CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{}))
|
||||
framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(tCtx, f.ClientSet, pod.Name, namespace, f.Timeouts.PodDelete))
|
||||
|
||||
// Create another claim and pod, this time using the latest Kubernetes.
|
||||
claim = b.ExternalClaim()
|
||||
pod = b.PodExternal()
|
||||
pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name
|
||||
b.Create(tCtx, claim, pod)
|
||||
b.TestPod(tCtx, f, pod)
|
||||
|
||||
return func(tCtx ktesting.TContext) {
|
||||
// We need to clean up explicitly because the normal
|
||||
// cleanup doesn't work (driver shuts down first).
|
||||
//
|
||||
// The retry loops are necessary because of a stale connection
|
||||
// to the restarted apiserver. Sometimes, attempts fail with "EOF" as error
|
||||
// or (even weirder) with
|
||||
// getting *v1.Pod: pods "tester-2" is forbidden: User "kubernetes-admin" cannot get resource "pods" in API group "" in the namespace "dra-9021"
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error {
|
||||
return f.ClientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
}).Should(gomega.Succeed(), "delete claim after downgrade")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error {
|
||||
return f.ClientSet.CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{})
|
||||
}).Should(gomega.Succeed(), "delete pod after downgrade")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) *v1.Pod {
|
||||
pod, err := f.ClientSet.CoreV1().Pods(namespace).Get(tCtx, pod.Name, metav1.GetOptions{})
|
||||
if apierrors.IsNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
tCtx.ExpectNoError(err, "get pod")
|
||||
return pod
|
||||
}).Should(gomega.BeNil(), "no pod after deletion after downgrade")
|
||||
}
|
||||
}
|
||||
}
|
||||
235
test/e2e_dra/resourceclaimstatus_test.go
Normal file
235
test/e2e_dra/resourceclaimstatus_test.go
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
/*
|
||||
Copyright The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package e2edra
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
resourceapi "k8s.io/api/resource/v1"
|
||||
resourceapiv1beta2 "k8s.io/api/resource/v1beta2"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
apiruntime "k8s.io/apimachinery/pkg/runtime"
|
||||
resourceapiac "k8s.io/client-go/applyconfigurations/resource/v1"
|
||||
resourceapiacv1beta2 "k8s.io/client-go/applyconfigurations/resource/v1beta2"
|
||||
draapiv1beta2 "k8s.io/dynamic-resource-allocation/api/v1beta2"
|
||||
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
// resourceClaimDeviceStatus corresponds to testResourceClaimDeviceStatus in test/integration/dra
|
||||
// and was copied from there, therefore the unit-test style with tCtx and require.
|
||||
// This is the preferred style for new tests.
|
||||
func resourceClaimDeviceStatus(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder) step2Func {
|
||||
namespace := f.Namespace.Name
|
||||
claimName := "claim-with-device-status"
|
||||
claim := &resourceapiv1beta2.ResourceClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: namespace,
|
||||
Name: claimName,
|
||||
},
|
||||
Spec: resourceapiv1beta2.ResourceClaimSpec{
|
||||
Devices: resourceapiv1beta2.DeviceClaim{
|
||||
Requests: []resourceapiv1beta2.DeviceRequest{
|
||||
{
|
||||
Name: "foo",
|
||||
Exactly: &resourceapiv1beta2.ExactDeviceRequest{
|
||||
DeviceClassName: "foo",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Create(tCtx, claim, metav1.CreateOptions{})
|
||||
tCtx.ExpectNoError(err, "create ResourceClaim")
|
||||
|
||||
// Add an allocation result.
|
||||
// A finalizer is required for that.
|
||||
finalizer := "test.example.com/my-test-finalizer"
|
||||
claim.Finalizers = append(claim.Finalizers, finalizer)
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{})
|
||||
claim.Status.Allocation = &resourceapiv1beta2.AllocationResult{
|
||||
Devices: resourceapiv1beta2.DeviceAllocationResult{
|
||||
Results: []resourceapiv1beta2.DeviceRequestAllocationResult{
|
||||
{
|
||||
Request: "foo",
|
||||
Driver: "one",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
},
|
||||
{
|
||||
Request: "foo",
|
||||
Driver: "two",
|
||||
Pool: "global",
|
||||
Device: "another-device",
|
||||
},
|
||||
{
|
||||
Request: "foo",
|
||||
Driver: "three",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tCtx.ExpectNoError(err, "add finalizer")
|
||||
removeClaim := func(tCtx ktesting.TContext) {
|
||||
client := tCtx.Client().ResourceV1beta2()
|
||||
claim, err := client.ResourceClaims(namespace).Get(tCtx, claimName, metav1.GetOptions{})
|
||||
if apierrors.IsNotFound(err) {
|
||||
return
|
||||
}
|
||||
tCtx.ExpectNoError(err, "get claim to remove finalizer")
|
||||
if claim.Status.Allocation != nil {
|
||||
claim.Status.Allocation = nil
|
||||
claim, err = client.ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "remove allocation")
|
||||
}
|
||||
claim.Finalizers = nil
|
||||
claim, err = client.ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "remove finalizer")
|
||||
err = client.ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
tCtx.ExpectNoError(err, "delete claim")
|
||||
}
|
||||
tCtx.CleanupCtx(removeClaim)
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "add allocation result")
|
||||
|
||||
// Now adding the device status should work.
|
||||
deviceStatus := []resourceapiv1beta2.AllocatedDeviceStatus{{
|
||||
Driver: "one",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
Data: &apiruntime.RawExtension{
|
||||
Raw: []byte(`{"kind": "foo", "apiVersion": "dra.example.com/v1"}`),
|
||||
},
|
||||
NetworkData: &resourceapiv1beta2.NetworkDeviceData{
|
||||
InterfaceName: "net-1",
|
||||
IPs: []string{
|
||||
"10.9.8.0/24",
|
||||
"2001:db8::/64",
|
||||
},
|
||||
HardwareAddress: "ea:9f:cb:40:b1:7b",
|
||||
},
|
||||
}}
|
||||
claim.Status.Devices = deviceStatus
|
||||
tCtx.ExpectNoError(err, "add device status")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after adding device status")
|
||||
|
||||
// Strip the RawExtension. SSA re-encodes it, which causes negligble differences that nonetheless break assert.Equal.
|
||||
claim.Status.Devices[0].Data = nil
|
||||
deviceStatus[0].Data = nil
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "add device status")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after stripping RawExtension")
|
||||
|
||||
// Exercise SSA.
|
||||
deviceStatusAC := resourceapiacv1beta2.AllocatedDeviceStatus().
|
||||
WithDriver("two").
|
||||
WithPool("global").
|
||||
WithDevice("another-device").
|
||||
WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-2"))
|
||||
deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{
|
||||
Driver: "two",
|
||||
Pool: "global",
|
||||
Device: "another-device",
|
||||
NetworkData: &resourceapiv1beta2.NetworkDeviceData{
|
||||
InterfaceName: "net-2",
|
||||
},
|
||||
})
|
||||
claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace).
|
||||
WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC))
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-1",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "apply device status two")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status two")
|
||||
|
||||
deviceStatusAC = resourceapiacv1beta2.AllocatedDeviceStatus().
|
||||
WithDriver("three").
|
||||
WithPool("global").
|
||||
WithDevice("my-device").
|
||||
WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-3"))
|
||||
deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{
|
||||
Driver: "three",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
NetworkData: &resourceapiv1beta2.NetworkDeviceData{
|
||||
InterfaceName: "net-3",
|
||||
},
|
||||
})
|
||||
claimAC = resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace).
|
||||
WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC))
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-2",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "apply device status three")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status three")
|
||||
var buffer bytes.Buffer
|
||||
encoder := json.NewEncoder(&buffer)
|
||||
encoder.SetIndent(" ", " ")
|
||||
tCtx.ExpectNoError(encoder.Encode(claim))
|
||||
tCtx.Logf("Final ResourceClaim:\n%s", buffer.String())
|
||||
|
||||
return func(tCtx ktesting.TContext) step3Func {
|
||||
// Update one entry, remove the other.
|
||||
deviceStatusAC := resourceapiac.AllocatedDeviceStatus().
|
||||
WithDriver("two").
|
||||
WithPool("global").
|
||||
WithDevice("another-device").
|
||||
WithNetworkData(resourceapiac.NetworkDeviceData().WithInterfaceName("yet-another-net"))
|
||||
deviceStatus[1].NetworkData.InterfaceName = "yet-another-net"
|
||||
claimAC := resourceapiac.ResourceClaim(claim.Name, claim.Namespace).
|
||||
WithStatus(resourceapiac.ResourceClaimStatus().WithDevices(deviceStatusAC))
|
||||
claim, err := tCtx.Client().ResourceV1().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-1",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "update device status two")
|
||||
|
||||
var deviceStatusV1 []resourceapi.AllocatedDeviceStatus
|
||||
for _, status := range deviceStatus {
|
||||
var statusV1 resourceapi.AllocatedDeviceStatus
|
||||
tCtx.ExpectNoError(draapiv1beta2.Convert_v1beta2_AllocatedDeviceStatus_To_v1_AllocatedDeviceStatus(&status, &statusV1, nil))
|
||||
deviceStatusV1 = append(deviceStatusV1, statusV1)
|
||||
}
|
||||
require.Equal(tCtx, deviceStatusV1, claim.Status.Devices, "after updating device status two")
|
||||
|
||||
return func(tCtx ktesting.TContext) {
|
||||
claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace)
|
||||
deviceStatus = deviceStatus[0:2]
|
||||
claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-2",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "remove device status three")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after removing device status three")
|
||||
|
||||
// The cleanup order is so that we have to run this explicitly now.
|
||||
// The tCtx.CleanupCtx is more for the sake of completeness.
|
||||
removeClaim(tCtx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -18,11 +18,9 @@ package e2edra
|
|||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
_ "embed"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
|
|
@ -38,24 +36,13 @@ import (
|
|||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
resourceapi "k8s.io/api/resource/v1"
|
||||
resourceapiv1beta2 "k8s.io/api/resource/v1beta2"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
apiruntime "k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/version"
|
||||
resourceapiac "k8s.io/client-go/applyconfigurations/resource/v1"
|
||||
resourceapiacv1beta2 "k8s.io/client-go/applyconfigurations/resource/v1beta2"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
draapiv1beta2 "k8s.io/dynamic-resource-allocation/api/v1beta2"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/util/errors"
|
||||
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
"k8s.io/kubernetes/test/utils/localupcluster"
|
||||
|
|
@ -69,6 +56,45 @@ func init() {
|
|||
ktesting.SetDefaultVerbosity(2)
|
||||
}
|
||||
|
||||
// The overall flow of upgrade/downgrade testing is always the same:
|
||||
//
|
||||
// - Bring up a cluster with the previous release.
|
||||
// - "Install" the test DRA driver with 8 devices for the one node in the cluster.
|
||||
// There is a DeviceClass for it.
|
||||
// - Step 1: run some test code.
|
||||
// - Upgrade the cluster to the current code.
|
||||
// - Step 2: run some more test code.
|
||||
// - Downgrade to the previous release again.
|
||||
// - Step 3: run some final test code.
|
||||
//
|
||||
// The "test code" gets registered here with a single function for each
|
||||
// sub-test. That function then returns the next piece of code, which then
|
||||
// returns the final code.
|
||||
//
|
||||
// For performance reasons there is only a single `It("works")` which
|
||||
// runs everything. Failures in sub-tests are reported separately *if they
|
||||
// are reported via the TContext*. Failures reported via ginkgo.Fail
|
||||
// currently abort the entire test. This will be addressed by converting
|
||||
// everything to ktesting-based unit tests.
|
||||
//
|
||||
// Each sub-test must be self-contained. They intentionally run in a random
|
||||
// order. However, they share the same cluster and the 8 devices which are
|
||||
// available there.
|
||||
var subTests = map[string]step1Func{
|
||||
"core DRA": coreDRA,
|
||||
"ResourceClaim device status": resourceClaimDeviceStatus,
|
||||
}
|
||||
|
||||
// step1Func is passed everything that is needed to run
|
||||
type step1Func func(tCtx ktesting.TContext, f *framework.Framework, builder *drautils.Builder) step2Func
|
||||
|
||||
type step2Func func(tCtx ktesting.TContext) step3Func
|
||||
|
||||
type step3Func func(tCtx ktesting.TContext)
|
||||
|
||||
// steps has the names for the actual ginkgo.It where sub-test results are provided.
|
||||
var steps = []string{"before downgrade", "after downgrade", "after upgrade"}
|
||||
|
||||
var repoRoot = repoRootDefault()
|
||||
|
||||
func currentBinDir() (envName, content string) {
|
||||
|
|
@ -93,6 +119,7 @@ func repoRootDefault() string {
|
|||
|
||||
func TestUpgradeDowngrade(t *testing.T) {
|
||||
suiteConfig, reporterConfig := framework.CreateGinkgoConfig()
|
||||
suiteConfig.RandomizeAllSpecs = false
|
||||
ginkgo.RunSpecs(t, "DRA", suiteConfig, reporterConfig)
|
||||
}
|
||||
|
||||
|
|
@ -106,6 +133,14 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() {
|
|||
e2etestfiles.AddFileSource(e2etestfiles.RootFileSource{Root: repoRoot})
|
||||
gomega.RegisterFailHandler(ginkgo.Fail)
|
||||
|
||||
// sub-test -> step -> failure
|
||||
//
|
||||
// Initially each failure string is empty.
|
||||
results := make(map[string]map[string]string, len(subTests))
|
||||
for subTest := range subTests {
|
||||
results[subTest] = make(map[string]string, len(steps))
|
||||
}
|
||||
|
||||
ginkgo.It("works", func(ctx context.Context) {
|
||||
// TODO: replace with helper code from https://github.com/kubernetes/kubernetes/pull/122481 should that get merged.
|
||||
tCtx := ktesting.Init(GinkgoContextTB())
|
||||
|
|
@ -232,30 +267,40 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() {
|
|||
|
||||
tCtx.ExpectNoError(e2enode.WaitForAllNodesSchedulable(tCtx, tCtx.Client(), f.Timeouts.NodeSchedulable), "wait for all nodes to be schedulable")
|
||||
nodes := drautils.NewNodesNow(tCtx, f, 1, 1)
|
||||
testResourceClaimDeviceStatusAfterUpgrade, testResourceClaimDeviceStatusAfterDowngrade := testResourceClaimDeviceStatus(tCtx, namespace.Name)
|
||||
|
||||
// Opening sockets locally avoids intermittent errors and delays caused by proxying through the restarted apiserver.
|
||||
// We could speed up testing by shortening the sync delay in the ResourceSlice controller, but let's better
|
||||
// test the defaults.
|
||||
driver := drautils.NewDriverInstance(f)
|
||||
driver.IsLocal = true
|
||||
driver.Run(nodes, drautils.DriverResourcesNow(nodes, 1))
|
||||
driver.Run(nodes, drautils.DriverResourcesNow(nodes, 8))
|
||||
b := drautils.NewBuilderNow(ctx, f, driver)
|
||||
|
||||
claim := b.ExternalClaim()
|
||||
pod := b.PodExternal()
|
||||
b.Create(ctx, claim, pod)
|
||||
b.TestPod(ctx, f, pod)
|
||||
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
steps2 := make(map[string]step2Func, len(subTests))
|
||||
for subTest, step1 := range subTests {
|
||||
tCtx = ktesting.Begin(tCtx, subTest)
|
||||
var result error
|
||||
func() {
|
||||
tCtx, finalize := ktesting.WithError(tCtx, &result)
|
||||
defer finalize()
|
||||
|
||||
// This only gets set in case of success.
|
||||
steps2[subTest] = step1(tCtx, f, b)
|
||||
}()
|
||||
if result != nil {
|
||||
results[subTest][steps[0]] = result.Error()
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("update to %s", gitVersion))
|
||||
// We could split this up into first updating the apiserver, then control plane components, then restarting kubelet.
|
||||
// For the purpose of this test here we we primarily care about full before/after comparisons, so not done yet.
|
||||
// TODO
|
||||
restoreOptions := cluster.Modify(tCtx, localupcluster.ModifyOptions{Upgrade: true, BinDir: dir})
|
||||
tCtx = ktesting.End(tCtx)
|
||||
testResourceClaimDeviceStatusAfterUpgrade()
|
||||
|
||||
// The kubelet wipes all ResourceSlices on a restart because it doesn't know which drivers were running.
|
||||
// Wait for the ResourceSlice controller in the driver to notice and recreate the ResourceSlices.
|
||||
|
|
@ -263,17 +308,26 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() {
|
|||
gomega.Eventually(ctx, driver.NewGetSlices()).WithTimeout(5 * time.Minute).Should(gomega.HaveField("Items", gomega.HaveLen(len(nodes.NodeNames))))
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// Remove pod prepared by previous Kubernetes.
|
||||
framework.ExpectNoError(f.ClientSet.ResourceV1beta1().ResourceClaims(namespace.Name).Delete(ctx, claim.Name, metav1.DeleteOptions{}))
|
||||
framework.ExpectNoError(f.ClientSet.CoreV1().Pods(namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{}))
|
||||
framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, namespace.Name, f.Timeouts.PodDelete))
|
||||
steps3 := make(map[string]step3Func, len(subTests))
|
||||
for subTest := range subTests {
|
||||
step2 := steps2[subTest]
|
||||
if step2 == nil {
|
||||
continue
|
||||
}
|
||||
tCtx = ktesting.Begin(tCtx, subTest)
|
||||
var result error
|
||||
func() {
|
||||
tCtx, finalize := ktesting.WithError(tCtx, &result)
|
||||
defer finalize()
|
||||
|
||||
// Create another claim and pod, this time using the latest Kubernetes.
|
||||
claim = b.ExternalClaim()
|
||||
pod = b.PodExternal()
|
||||
pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name
|
||||
b.Create(ctx, claim, pod)
|
||||
b.TestPod(ctx, f, pod)
|
||||
// This only gets set in case of success.
|
||||
steps3[subTest] = step2(tCtx)
|
||||
}()
|
||||
if result != nil {
|
||||
results[subTest][steps[0]] = result.Error()
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
|
||||
// Roll back.
|
||||
tCtx = ktesting.Begin(tCtx, "downgrade")
|
||||
|
|
@ -289,30 +343,45 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() {
|
|||
return output
|
||||
}).Should(gomega.ContainSubstring(`"Caches are synced" controller="resource_claim"`))
|
||||
tCtx = ktesting.End(tCtx)
|
||||
testResourceClaimDeviceStatusAfterDowngrade()
|
||||
|
||||
// We need to clean up explicitly because the normal
|
||||
// cleanup doesn't work (driver shuts down first).
|
||||
//
|
||||
// The retry loops are necessary because of a stale connection
|
||||
// to the restarted apiserver. Sometimes, attempts fail with "EOF" as error
|
||||
// or (even weirder) with
|
||||
// getting *v1.Pod: pods "tester-2" is forbidden: User "kubernetes-admin" cannot get resource "pods" in API group "" in the namespace "dra-9021"
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error {
|
||||
return f.ClientSet.ResourceV1beta1().ResourceClaims(namespace.Name).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
}).Should(gomega.Succeed(), "delete claim after downgrade")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error {
|
||||
return f.ClientSet.CoreV1().Pods(namespace.Name).Delete(tCtx, pod.Name, metav1.DeleteOptions{})
|
||||
}).Should(gomega.Succeed(), "delete pod after downgrade")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) *v1.Pod {
|
||||
pod, err := f.ClientSet.CoreV1().Pods(namespace.Name).Get(tCtx, pod.Name, metav1.GetOptions{})
|
||||
if apierrors.IsNotFound(err) {
|
||||
return nil
|
||||
for subTest := range subTests {
|
||||
step3 := steps3[subTest]
|
||||
if step3 == nil {
|
||||
continue
|
||||
}
|
||||
tCtx.ExpectNoError(err, "get pod")
|
||||
return pod
|
||||
}).Should(gomega.BeNil(), "no pod after deletion after downgrade")
|
||||
tCtx = ktesting.Begin(tCtx, subTest)
|
||||
var result error
|
||||
func() {
|
||||
tCtx, finalize := ktesting.WithError(tCtx, &result)
|
||||
defer finalize()
|
||||
|
||||
step3(tCtx)
|
||||
}()
|
||||
if result != nil {
|
||||
results[subTest][steps[0]] = result.Error()
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
})
|
||||
|
||||
// This runs last because by default Ginkgo does not randomize within
|
||||
// a top-level container.
|
||||
for subTest := range subTests {
|
||||
ginkgo.Context(subTest, func() {
|
||||
for _, step := range steps {
|
||||
ginkgo.It(step, func() {
|
||||
failure := results[subTest][step]
|
||||
if failure != "" {
|
||||
// Source code location will be useless here. We can't have both:
|
||||
// separate test results *and* correct source code location.
|
||||
// This will become better with testing.T-based unit tests.
|
||||
_, _ = ginkgo.GinkgoWriter.Write([]byte("For log output see 'DRA upgrade/downgrade works'\n"))
|
||||
ginkgo.Fail(failure)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// sourceVersion identifies the Kubernetes git version based on hack/print-workspace-status.sh.
|
||||
|
|
@ -432,200 +501,3 @@ func serverDownloadURL(tCtx ktesting.TContext, prefix string, major, minor uint)
|
|||
}
|
||||
return fmt.Sprintf("https://dl.k8s.io/release/%s/kubernetes-server-%s-%s.tar.gz", string(version), runtime.GOOS, runtime.GOARCH), string(version), nil
|
||||
}
|
||||
|
||||
// testResourceClaimDeviceStatus corresponds to testResourceClaimDeviceStatus in test/integration/dra
|
||||
// and was copied from there, therefore the unit-test style with tCtx and require.
|
||||
func testResourceClaimDeviceStatus(tCtx ktesting.TContext, namespace string) (afterUpgrade, afterDowngrade func()) {
|
||||
claimName := "claim-with-device-status"
|
||||
claim := &resourceapiv1beta2.ResourceClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Namespace: namespace,
|
||||
Name: claimName,
|
||||
},
|
||||
Spec: resourceapiv1beta2.ResourceClaimSpec{
|
||||
Devices: resourceapiv1beta2.DeviceClaim{
|
||||
Requests: []resourceapiv1beta2.DeviceRequest{
|
||||
{
|
||||
Name: "foo",
|
||||
Exactly: &resourceapiv1beta2.ExactDeviceRequest{
|
||||
DeviceClassName: "foo",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Create(tCtx, claim, metav1.CreateOptions{})
|
||||
tCtx.ExpectNoError(err, "create ResourceClaim")
|
||||
|
||||
// Add an allocation result.
|
||||
// A finalizer is required for that.
|
||||
finalizer := "test.example.com/my-test-finalizer"
|
||||
claim.Finalizers = append(claim.Finalizers, finalizer)
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{})
|
||||
claim.Status.Allocation = &resourceapiv1beta2.AllocationResult{
|
||||
Devices: resourceapiv1beta2.DeviceAllocationResult{
|
||||
Results: []resourceapiv1beta2.DeviceRequestAllocationResult{
|
||||
{
|
||||
Request: "foo",
|
||||
Driver: "one",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
},
|
||||
{
|
||||
Request: "foo",
|
||||
Driver: "two",
|
||||
Pool: "global",
|
||||
Device: "another-device",
|
||||
},
|
||||
{
|
||||
Request: "foo",
|
||||
Driver: "three",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tCtx.ExpectNoError(err, "add finalizer")
|
||||
removeClaim := func(tCtx ktesting.TContext) {
|
||||
client := tCtx.Client().ResourceV1beta2()
|
||||
claim, err := client.ResourceClaims(namespace).Get(tCtx, claimName, metav1.GetOptions{})
|
||||
if apierrors.IsNotFound(err) {
|
||||
return
|
||||
}
|
||||
tCtx.ExpectNoError(err, "get claim to remove finalizer")
|
||||
if claim.Status.Allocation != nil {
|
||||
claim.Status.Allocation = nil
|
||||
claim, err = client.ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "remove allocation")
|
||||
}
|
||||
claim.Finalizers = nil
|
||||
claim, err = client.ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "remove finalizer")
|
||||
err = client.ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
tCtx.ExpectNoError(err, "delete claim")
|
||||
}
|
||||
tCtx.CleanupCtx(removeClaim)
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "add allocation result")
|
||||
|
||||
// Now adding the device status should work.
|
||||
deviceStatus := []resourceapiv1beta2.AllocatedDeviceStatus{{
|
||||
Driver: "one",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
Data: &apiruntime.RawExtension{
|
||||
Raw: []byte(`{"kind": "foo", "apiVersion": "dra.example.com/v1"}`),
|
||||
},
|
||||
NetworkData: &resourceapiv1beta2.NetworkDeviceData{
|
||||
InterfaceName: "net-1",
|
||||
IPs: []string{
|
||||
"10.9.8.0/24",
|
||||
"2001:db8::/64",
|
||||
},
|
||||
HardwareAddress: "ea:9f:cb:40:b1:7b",
|
||||
},
|
||||
}}
|
||||
claim.Status.Devices = deviceStatus
|
||||
tCtx.ExpectNoError(err, "add device status")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after adding device status")
|
||||
|
||||
// Strip the RawExtension. SSA re-encodes it, which causes negligble differences that nonetheless break assert.Equal.
|
||||
claim.Status.Devices[0].Data = nil
|
||||
deviceStatus[0].Data = nil
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "add device status")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after stripping RawExtension")
|
||||
|
||||
// Exercise SSA.
|
||||
deviceStatusAC := resourceapiacv1beta2.AllocatedDeviceStatus().
|
||||
WithDriver("two").
|
||||
WithPool("global").
|
||||
WithDevice("another-device").
|
||||
WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-2"))
|
||||
deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{
|
||||
Driver: "two",
|
||||
Pool: "global",
|
||||
Device: "another-device",
|
||||
NetworkData: &resourceapiv1beta2.NetworkDeviceData{
|
||||
InterfaceName: "net-2",
|
||||
},
|
||||
})
|
||||
claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace).
|
||||
WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC))
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-1",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "apply device status two")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status two")
|
||||
|
||||
deviceStatusAC = resourceapiacv1beta2.AllocatedDeviceStatus().
|
||||
WithDriver("three").
|
||||
WithPool("global").
|
||||
WithDevice("my-device").
|
||||
WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-3"))
|
||||
deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{
|
||||
Driver: "three",
|
||||
Pool: "global",
|
||||
Device: "my-device",
|
||||
NetworkData: &resourceapiv1beta2.NetworkDeviceData{
|
||||
InterfaceName: "net-3",
|
||||
},
|
||||
})
|
||||
claimAC = resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace).
|
||||
WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC))
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-2",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "apply device status three")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status three")
|
||||
var buffer bytes.Buffer
|
||||
encoder := json.NewEncoder(&buffer)
|
||||
encoder.SetIndent(" ", " ")
|
||||
tCtx.ExpectNoError(encoder.Encode(claim))
|
||||
tCtx.Logf("Final ResourceClaim:\n%s", buffer.String())
|
||||
|
||||
afterUpgrade = func() {
|
||||
// Update one entry, remove the other.
|
||||
deviceStatusAC := resourceapiac.AllocatedDeviceStatus().
|
||||
WithDriver("two").
|
||||
WithPool("global").
|
||||
WithDevice("another-device").
|
||||
WithNetworkData(resourceapiac.NetworkDeviceData().WithInterfaceName("yet-another-net"))
|
||||
deviceStatus[1].NetworkData.InterfaceName = "yet-another-net"
|
||||
claimAC := resourceapiac.ResourceClaim(claim.Name, claim.Namespace).
|
||||
WithStatus(resourceapiac.ResourceClaimStatus().WithDevices(deviceStatusAC))
|
||||
claim, err := tCtx.Client().ResourceV1().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-1",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "update device status two")
|
||||
|
||||
var deviceStatusV1 []resourceapi.AllocatedDeviceStatus
|
||||
for _, status := range deviceStatus {
|
||||
var statusV1 resourceapi.AllocatedDeviceStatus
|
||||
tCtx.ExpectNoError(draapiv1beta2.Convert_v1beta2_AllocatedDeviceStatus_To_v1_AllocatedDeviceStatus(&status, &statusV1, nil))
|
||||
deviceStatusV1 = append(deviceStatusV1, statusV1)
|
||||
}
|
||||
require.Equal(tCtx, deviceStatusV1, claim.Status.Devices, "after updating device status two")
|
||||
}
|
||||
afterDowngrade = func() {
|
||||
claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace)
|
||||
deviceStatus = deviceStatus[0:2]
|
||||
claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{
|
||||
Force: true,
|
||||
FieldManager: "manager-2",
|
||||
})
|
||||
tCtx.ExpectNoError(err, "remove device status three")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after removing device status three")
|
||||
|
||||
// The cleanup order is so that we have to run this explicitly now.
|
||||
// The tCtx.CleanupCtx is more for the sake of completeness.
|
||||
removeClaim(tCtx)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue