mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-02-13 15:59:57 -05:00
DRA upgrade/downgrade: rewrite as Go unit test
tCtx.Run and sub-tests make it much simpler to separate the different steps
than with Ginkgo because unless a test runs tCtx.Parallel (which we don't do
here), everything runs sequentially in a deterministic order.
Right now we get:
...
localupcluster.go:285: I1210 12:24:22.067524] bring up v1.34: stopping kubelet
localupcluster.go:285: I1210 12:24:22.067548] bring up v1.34: stopping kube-scheduler
localupcluster.go:285: I1210 12:24:22.067570] bring up v1.34: stopping kube-controller-manager
localupcluster.go:285: I1210 12:24:22.067589] bring up v1.34: stopping kube-apiserver
--- PASS: TestUpgradeDowngrade (94.78s)
--- PASS: TestUpgradeDowngrade/after-cluster-creation (2.07s)
--- PASS: TestUpgradeDowngrade/after-cluster-creation/core_DRA (2.05s)
--- PASS: TestUpgradeDowngrade/after-cluster-creation/ResourceClaim_device_status (0.02s)
--- PASS: TestUpgradeDowngrade/after-cluster-upgrade (4.10s)
--- PASS: TestUpgradeDowngrade/after-cluster-upgrade/core_DRA (4.09s)
--- PASS: TestUpgradeDowngrade/after-cluster-upgrade/ResourceClaim_device_status (0.01s)
--- PASS: TestUpgradeDowngrade/after-cluster-downgrade (1.24s)
--- PASS: TestUpgradeDowngrade/after-cluster-downgrade/core_DRA (1.21s)
--- PASS: TestUpgradeDowngrade/after-cluster-downgrade/ResourceClaim_device_status (0.02s)
PASS
It's even possible to use `-failfast` and
e.g. `-run=TestUpgradeDowngrade/after-cluster-creation/core_DRA`: `go test` then
runs everything up to that sub-test or any failing sub-test, then stops and
cleans up.
This commit is contained in:
parent
7c7b1e1018
commit
de47714879
4 changed files with 197 additions and 299 deletions
|
|
@ -1,12 +1,13 @@
|
|||
This directory contains a testsuite with automatic upgrade/downgrade tests for
|
||||
DRA. Conceptually this is like an integration test, in the sense that it
|
||||
starts/stops cluster components and runs tests against them.
|
||||
starts/stops cluster components and runs tests against them. It has its own
|
||||
directory because it needs to be started differently than other integration
|
||||
tests or unit tests, which makes it more like an E2E suite.
|
||||
|
||||
The difference is that it starts Kubernetes components by running the actual
|
||||
binaries, relying on local-up-cluster.sh for the logic and configuration
|
||||
steps. Because local-up-cluster.sh needs additional permissions and
|
||||
preparations on the host, the test cannot run in "make test-integration" and
|
||||
just skips itself there.
|
||||
steps. local-up-cluster.sh needs additional permissions and
|
||||
preparations on the host.
|
||||
|
||||
To run it:
|
||||
- Make sure that hack/local-up-cluster.sh works:
|
||||
|
|
@ -23,21 +24,16 @@ To run it:
|
|||
Otherwise a test tmp directory is used.
|
||||
- Invoke as a Go test (no need for the ginkgo CLI), for example:
|
||||
|
||||
go test -v -count=1 -timeout=1h ./test/e2e_dra -args -ginkgo.v
|
||||
dlv test ./test/e2e_dra -- -ginkgo.v
|
||||
make test KUBE_TIMEOUT=-timeout=1h WHAT=test/e2e_dra FULL_LOG=true KUBE_TEST_ARGS="-count=1 -args -ginkgo.v"
|
||||
go test -v -count=1 -timeout=1h ./test/e2e_dra
|
||||
dlv test ./test/e2e_dra -- -test.v
|
||||
make test KUBE_TIMEOUT=-timeout=1h WHAT=test/e2e_dra FULL_LOG=true KUBE_TEST_ARGS="-count=1"
|
||||
|
||||
`make test` instead of `make test-integration` is intentional: `local-up-cluster.sh`
|
||||
itself wants to start etcd. `-count=1` ensures that test runs each time it is invoked.
|
||||
`-v` and `-ginkgo.v` make the test output visible while the test runs.
|
||||
`-v`/`-test.v`/`FULL_LOG=true` make the test output visible while the test runs.
|
||||
|
||||
To simplify starting from scratch, `./test/e2e_dra/run.sh` cleans
|
||||
up, sets permissions, and then invokes whatever command is specified on the
|
||||
command line:
|
||||
|
||||
./test/e2e_dra/run.sh go test ./test/e2e_dra
|
||||
|
||||
The test is implemented as a Ginkgo suite because that allows reusing the same
|
||||
helper code as in E2E tests. Long-term the goal is to port that helper code to
|
||||
ktesting, support ktesting in test/e2e, and turn this test into a normal Go
|
||||
test.
|
||||
|
|
|
|||
|
|
@ -17,28 +17,29 @@ limitations under the License.
|
|||
package e2edra
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/onsi/gomega"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
func coreDRA(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder) step2Func {
|
||||
namespace := f.Namespace.Name
|
||||
func coreDRA(tCtx ktesting.TContext, b *drautils.Builder) upgradedTestFunc {
|
||||
namespace := tCtx.Namespace()
|
||||
claim := b.ExternalClaim()
|
||||
pod := b.PodExternal()
|
||||
b.Create(tCtx, claim, pod)
|
||||
b.TestPod(tCtx, pod)
|
||||
|
||||
return func(tCtx ktesting.TContext) step3Func {
|
||||
return func(tCtx ktesting.TContext) downgradedTestFunc {
|
||||
// Remove pod prepared in step 1.
|
||||
framework.ExpectNoError(f.ClientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}))
|
||||
framework.ExpectNoError(f.ClientSet.CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{}))
|
||||
framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(tCtx, f.ClientSet, pod.Name, namespace, f.Timeouts.PodDelete))
|
||||
tCtx.ExpectNoError(tCtx.Client().ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}))
|
||||
tCtx.ExpectNoError(tCtx.Client().CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{}))
|
||||
tCtx.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(tCtx, tCtx.Client(), pod.Name, namespace, 3*time.Minute))
|
||||
|
||||
// Create another claim and pod, this time using the latest Kubernetes.
|
||||
claim = b.ExternalClaim()
|
||||
|
|
@ -56,13 +57,13 @@ func coreDRA(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder
|
|||
// or (even weirder) with
|
||||
// getting *v1.Pod: pods "tester-2" is forbidden: User "kubernetes-admin" cannot get resource "pods" in API group "" in the namespace "dra-9021"
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error {
|
||||
return f.ClientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
return tCtx.Client().ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
}).Should(gomega.Succeed(), "delete claim after downgrade")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error {
|
||||
return f.ClientSet.CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{})
|
||||
return tCtx.Client().CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{})
|
||||
}).Should(gomega.Succeed(), "delete pod after downgrade")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) *v1.Pod {
|
||||
pod, err := f.ClientSet.CoreV1().Pods(namespace).Get(tCtx, pod.Name, metav1.GetOptions{})
|
||||
pod, err := tCtx.Client().CoreV1().Pods(namespace).Get(tCtx, pod.Name, metav1.GetOptions{})
|
||||
if apierrors.IsNotFound(err) {
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,15 +31,12 @@ import (
|
|||
resourceapiacv1beta2 "k8s.io/client-go/applyconfigurations/resource/v1beta2"
|
||||
draapiv1beta2 "k8s.io/dynamic-resource-allocation/api/v1beta2"
|
||||
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
)
|
||||
|
||||
// resourceClaimDeviceStatus corresponds to testResourceClaimDeviceStatus in test/integration/dra
|
||||
// and was copied from there, therefore the unit-test style with tCtx and require.
|
||||
// This is the preferred style for new tests.
|
||||
func resourceClaimDeviceStatus(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder) step2Func {
|
||||
namespace := f.Namespace.Name
|
||||
// resourceClaimDeviceStatus corresponds to testResourceClaimDeviceStatus in test/integration/dra.
|
||||
func resourceClaimDeviceStatus(tCtx ktesting.TContext, b *drautils.Builder) upgradedTestFunc {
|
||||
namespace := tCtx.Namespace()
|
||||
claimName := "claim-with-device-status"
|
||||
claim := &resourceapiv1beta2.ResourceClaim{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
|
|
@ -111,7 +108,6 @@ func resourceClaimDeviceStatus(tCtx ktesting.TContext, f *framework.Framework, b
|
|||
err = client.ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})
|
||||
tCtx.ExpectNoError(err, "delete claim")
|
||||
}
|
||||
tCtx.CleanupCtx(removeClaim)
|
||||
claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{})
|
||||
tCtx.ExpectNoError(err, "add allocation result")
|
||||
|
||||
|
|
@ -193,7 +189,7 @@ func resourceClaimDeviceStatus(tCtx ktesting.TContext, f *framework.Framework, b
|
|||
tCtx.ExpectNoError(encoder.Encode(claim))
|
||||
tCtx.Logf("Final ResourceClaim:\n%s", buffer.String())
|
||||
|
||||
return func(tCtx ktesting.TContext) step3Func {
|
||||
return func(tCtx ktesting.TContext) downgradedTestFunc {
|
||||
// Update one entry, remove the other.
|
||||
deviceStatusAC := resourceapiac.AllocatedDeviceStatus().
|
||||
WithDriver("two").
|
||||
|
|
@ -227,8 +223,9 @@ func resourceClaimDeviceStatus(tCtx ktesting.TContext, f *framework.Framework, b
|
|||
tCtx.ExpectNoError(err, "remove device status three")
|
||||
require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after removing device status three")
|
||||
|
||||
// The cleanup order is so that we have to run this explicitly now.
|
||||
// The tCtx.CleanupCtx is more for the sake of completeness.
|
||||
// This was created in a prior sub-test, so we have to
|
||||
// clean up manually for a proper termination of the
|
||||
// overall test.
|
||||
removeClaim(tCtx)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,9 +19,7 @@ package e2edra
|
|||
import (
|
||||
"archive/tar"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
_ "embed"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
|
@ -34,19 +32,16 @@ import (
|
|||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/onsi/ginkgo/v2"
|
||||
"github.com/onsi/gomega"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/version"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/util/errors"
|
||||
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
|
||||
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
|
||||
"k8s.io/kubernetes/test/utils/ktesting"
|
||||
"k8s.io/kubernetes/test/utils/localupcluster"
|
||||
admissionapi "k8s.io/pod-security-admission/api"
|
||||
)
|
||||
|
||||
var errHTTP404 = errors.New("resource not found (404)")
|
||||
|
|
@ -69,31 +64,23 @@ func init() {
|
|||
//
|
||||
// The "test code" gets registered here with a single function for each
|
||||
// sub-test. That function then returns the next piece of code, which then
|
||||
// returns the final code.
|
||||
//
|
||||
// For performance reasons there is only a single `It("works")` which
|
||||
// runs everything. Failures in sub-tests are reported separately *if they
|
||||
// are reported via the TContext*. Failures reported via ginkgo.Fail
|
||||
// currently abort the entire test. This will be addressed by converting
|
||||
// everything to ktesting-based unit tests.
|
||||
// returns the final code. Each callback function is executed as a sub-test.
|
||||
// The builder is configured to not delete objects when that sub-test ends,
|
||||
// so objects persist until the entire test is done.
|
||||
//
|
||||
// Each sub-test must be self-contained. They intentionally run in a random
|
||||
// order. However, they share the same cluster and the 8 devices which are
|
||||
// available there.
|
||||
var subTests = map[string]step1Func{
|
||||
var subTests = map[string]initialTestFunc{
|
||||
"core DRA": coreDRA,
|
||||
"ResourceClaim device status": resourceClaimDeviceStatus,
|
||||
}
|
||||
|
||||
// step1Func is passed everything that is needed to run
|
||||
type step1Func func(tCtx ktesting.TContext, f *framework.Framework, builder *drautils.Builder) step2Func
|
||||
type initialTestFunc func(tCtx ktesting.TContext, builder *drautils.Builder) upgradedTestFunc
|
||||
|
||||
type step2Func func(tCtx ktesting.TContext) step3Func
|
||||
type upgradedTestFunc func(tCtx ktesting.TContext) downgradedTestFunc
|
||||
|
||||
type step3Func func(tCtx ktesting.TContext)
|
||||
|
||||
// steps has the names for the actual ginkgo.It where sub-test results are provided.
|
||||
var steps = []string{"before downgrade", "after downgrade", "after upgrade"}
|
||||
type downgradedTestFunc func(tCtx ktesting.TContext)
|
||||
|
||||
var repoRoot = repoRootDefault()
|
||||
|
||||
|
|
@ -117,272 +104,189 @@ func repoRootDefault() string {
|
|||
return "../../"
|
||||
}
|
||||
|
||||
func TestUpgradeDowngrade(t *testing.T) {
|
||||
suiteConfig, reporterConfig := framework.CreateGinkgoConfig()
|
||||
suiteConfig.RandomizeAllSpecs = false
|
||||
ginkgo.RunSpecs(t, "DRA", suiteConfig, reporterConfig)
|
||||
}
|
||||
|
||||
var _ = ginkgo.Describe("DRA upgrade/downgrade", func() {
|
||||
// Initialize the default values by registering flags. We don't actually expose those flags.
|
||||
var fs flag.FlagSet
|
||||
framework.RegisterCommonFlags(&fs)
|
||||
framework.RegisterClusterFlags(&fs)
|
||||
|
||||
func TestUpgradeDowngrade(t *testing.T) { testUpgradeDowngrade(ktesting.Init(t)) }
|
||||
func testUpgradeDowngrade(tCtx ktesting.TContext) {
|
||||
// Some other things normally done by test/e2e.
|
||||
e2etestfiles.AddFileSource(e2etestfiles.RootFileSource{Root: repoRoot})
|
||||
gomega.RegisterFailHandler(ginkgo.Fail)
|
||||
|
||||
// sub-test -> step -> failure
|
||||
//
|
||||
// Initially each failure string is empty.
|
||||
results := make(map[string]map[string]string, len(subTests))
|
||||
for subTest := range subTests {
|
||||
results[subTest] = make(map[string]string, len(steps))
|
||||
// Ideally we shouldn't have any code which directly calls gomega.Expect,
|
||||
// but we are not there yet (e.g. e2epod.MakePod). So for now we install
|
||||
// one fail handler which records failures in the main test context.
|
||||
gomega.RegisterFailHandler(func(message string, callerSkip ...int) {
|
||||
tCtx.Helper()
|
||||
tCtx.Fatal(message)
|
||||
})
|
||||
|
||||
envName, dir := currentBinDir()
|
||||
if dir == "" {
|
||||
tCtx.Fatalf("%s must be set to test DRA upgrade/downgrade scenarios.", envName)
|
||||
}
|
||||
|
||||
ginkgo.It("works", func(ctx context.Context) {
|
||||
// TODO: replace with helper code from https://github.com/kubernetes/kubernetes/pull/122481 should that get merged.
|
||||
tCtx := ktesting.Init(GinkgoContextTB())
|
||||
tCtx = ktesting.WithContext(tCtx, ctx)
|
||||
// Determine what we need to downgrade to.
|
||||
tCtx = ktesting.Begin(tCtx, "get source code version")
|
||||
gitVersion, _, err := sourceVersion(tCtx, repoRoot)
|
||||
tCtx.ExpectNoError(err, "determine source code version for repo root %q", repoRoot)
|
||||
version, err := version.ParseGeneric(gitVersion)
|
||||
tCtx.ExpectNoError(err, "parse version %s of repo root %q", gitVersion, repoRoot)
|
||||
major, previousMinor := version.Major(), version.Minor()-1
|
||||
if strings.Contains(gitVersion, "-alpha.0") {
|
||||
// All version up to and including x.y.z-alpha.0 are treated as if we were
|
||||
// still the previous minor version x.(y-1). There are two reason for this:
|
||||
//
|
||||
// - During code freeze around (at?) -rc.0, the master branch already
|
||||
// identfies itself as the next release with -alpha.0. Without this
|
||||
// special case, we would change the version skew testing from what
|
||||
// has been tested and been known to work to something else, which
|
||||
// can and at least once did break.
|
||||
//
|
||||
// - Early in the next cycle the differences compared to the previous
|
||||
// release are small, so it's more interesting to go back further.
|
||||
previousMinor--
|
||||
}
|
||||
tCtx.Logf("got version: major: %d, minor: %d, previous minor: %d", major, version.Minor(), previousMinor)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
envName, dir := currentBinDir()
|
||||
if dir == "" {
|
||||
tCtx.Fatalf("%s must be set to test DRA upgrade/downgrade scenarios.", envName)
|
||||
// KUBERNETES_SERVER_CACHE_DIR can be set to keep downloaded files across test restarts.
|
||||
binDir, cacheBinaries := os.LookupEnv("KUBERNETES_SERVER_CACHE_DIR")
|
||||
if !cacheBinaries {
|
||||
binDir = tCtx.TempDir()
|
||||
}
|
||||
haveBinaries := false
|
||||
|
||||
// Get the previous release.
|
||||
tCtx = ktesting.Begin(tCtx, "get previous release info")
|
||||
tCtx.Logf("stable release %d.%d", major, previousMinor)
|
||||
previousURL, previousVersion, err := serverDownloadURL(tCtx, "stable", major, previousMinor)
|
||||
if errors.Is(err, errHTTP404) {
|
||||
tCtx.Logf("stable doesn't exist, get latest release %d.%d", major, previousMinor)
|
||||
previousURL, previousVersion, err = serverDownloadURL(tCtx, "latest", major, previousMinor)
|
||||
}
|
||||
tCtx.ExpectNoError(err)
|
||||
tCtx.Logf("got previous release version: %s, URL: %s", previousVersion, previousURL)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
if cacheBinaries {
|
||||
binDir = path.Join(binDir, previousVersion)
|
||||
_, err := os.Stat(path.Join(binDir, string(localupcluster.KubeClusterComponents[0])))
|
||||
if err == nil {
|
||||
haveBinaries = true
|
||||
}
|
||||
|
||||
// Determine what we need to downgrade to.
|
||||
tCtx = ktesting.Begin(tCtx, "get source code version")
|
||||
gitVersion, _, err := sourceVersion(tCtx, repoRoot)
|
||||
tCtx.ExpectNoError(err, "determine source code version for repo root %q", repoRoot)
|
||||
version, err := version.ParseGeneric(gitVersion)
|
||||
tCtx.ExpectNoError(err, "parse version %s of repo root %q", gitVersion, repoRoot)
|
||||
major, previousMinor := version.Major(), version.Minor()-1
|
||||
if strings.Contains(gitVersion, "-alpha.0") {
|
||||
// All version up to and including x.y.z-alpha.0 are treated as if we were
|
||||
// still the previous minor version x.(y-1). There are two reason for this:
|
||||
//
|
||||
// - During code freeze around (at?) -rc.0, the master branch already
|
||||
// identfies itself as the next release with -alpha.0. Without this
|
||||
// special case, we would change the version skew testing from what
|
||||
// has been tested and been known to work to something else, which
|
||||
// can and at least once did break.
|
||||
//
|
||||
// - Early in the next cycle the differences compared to the previous
|
||||
// release are small, so it's more interesting to go back further.
|
||||
previousMinor--
|
||||
}
|
||||
tCtx.Logf("got version: major: %d, minor: %d, previous minor: %d", major, version.Minor(), previousMinor)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// KUBERNETES_SERVER_CACHE_DIR can be set to keep downloaded files across test restarts.
|
||||
binDir, cacheBinaries := os.LookupEnv("KUBERNETES_SERVER_CACHE_DIR")
|
||||
if !cacheBinaries {
|
||||
binDir = tCtx.TempDir()
|
||||
}
|
||||
haveBinaries := false
|
||||
|
||||
// Get the previous release.
|
||||
tCtx = ktesting.Begin(tCtx, "get previous release info")
|
||||
tCtx.Logf("stable release %d.%d", major, previousMinor)
|
||||
previousURL, previousVersion, err := serverDownloadURL(tCtx, "stable", major, previousMinor)
|
||||
if errors.Is(err, errHTTP404) {
|
||||
tCtx.Logf("stable doesn't exist, get latest release %d.%d", major, previousMinor)
|
||||
previousURL, previousVersion, err = serverDownloadURL(tCtx, "latest", major, previousMinor)
|
||||
}
|
||||
tCtx.ExpectNoError(err)
|
||||
tCtx.Logf("got previous release version: %s, URL: %s", previousVersion, previousURL)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
if cacheBinaries {
|
||||
binDir = path.Join(binDir, previousVersion)
|
||||
_, err := os.Stat(path.Join(binDir, string(localupcluster.KubeClusterComponents[0])))
|
||||
if err == nil {
|
||||
haveBinaries = true
|
||||
}
|
||||
if !haveBinaries {
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("download and unpack %s", previousURL))
|
||||
req, err := http.NewRequestWithContext(tCtx, http.MethodGet, previousURL, nil)
|
||||
tCtx.ExpectNoError(err, "construct request")
|
||||
response, err := http.DefaultClient.Do(req)
|
||||
tCtx.ExpectNoError(err, "download")
|
||||
defer func() {
|
||||
_ = response.Body.Close()
|
||||
}()
|
||||
decompress, err := gzip.NewReader(response.Body)
|
||||
tCtx.ExpectNoError(err, "construct gzip reader")
|
||||
unpack := tar.NewReader(decompress)
|
||||
for {
|
||||
header, err := unpack.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
base := path.Base(header.Name)
|
||||
if slices.Contains(localupcluster.KubeClusterComponents, localupcluster.KubeComponentName(base)) {
|
||||
data, err := io.ReadAll(unpack)
|
||||
tCtx.ExpectNoError(err, fmt.Sprintf("read content of %s", header.Name))
|
||||
tCtx.ExpectNoError(os.MkdirAll(binDir, 0755), "create directory for binaries")
|
||||
tCtx.ExpectNoError(os.WriteFile(path.Join(binDir, base), data, 0555), fmt.Sprintf("write content of %s", header.Name))
|
||||
}
|
||||
}
|
||||
if !haveBinaries {
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("download and unpack %s", previousURL))
|
||||
req, err := http.NewRequestWithContext(tCtx, http.MethodGet, previousURL, nil)
|
||||
tCtx.ExpectNoError(err, "construct request")
|
||||
response, err := http.DefaultClient.Do(req)
|
||||
tCtx.ExpectNoError(err, "download")
|
||||
defer func() {
|
||||
_ = response.Body.Close()
|
||||
}()
|
||||
decompress, err := gzip.NewReader(response.Body)
|
||||
tCtx.ExpectNoError(err, "construct gzip reader")
|
||||
unpack := tar.NewReader(decompress)
|
||||
for {
|
||||
header, err := unpack.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
base := path.Base(header.Name)
|
||||
if slices.Contains(localupcluster.KubeClusterComponents, localupcluster.KubeComponentName(base)) {
|
||||
data, err := io.ReadAll(unpack)
|
||||
tCtx.ExpectNoError(err, fmt.Sprintf("read content of %s", header.Name))
|
||||
tCtx.ExpectNoError(os.MkdirAll(binDir, 0755), "create directory for binaries")
|
||||
tCtx.ExpectNoError(os.WriteFile(path.Join(binDir, base), data, 0555), fmt.Sprintf("write content of %s", header.Name))
|
||||
}
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("bring up v%d.%d", major, previousMinor))
|
||||
cluster := localupcluster.New(tCtx)
|
||||
localUpClusterEnv := map[string]string{
|
||||
"RUNTIME_CONFIG": "resource.k8s.io/v1beta1,resource.k8s.io/v1beta2",
|
||||
"FEATURE_GATES": "DynamicResourceAllocation=true",
|
||||
// *not* needed because driver will run in "local filesystem" mode (= driver.IsLocal): "ALLOW_PRIVILEGED": "1",
|
||||
}
|
||||
cluster.Start(tCtx, binDir, localUpClusterEnv)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
|
||||
restConfig := cluster.LoadConfig(tCtx)
|
||||
restConfig.UserAgent = fmt.Sprintf("%s -- dra", restclient.DefaultKubernetesUserAgent())
|
||||
tCtx = ktesting.WithRESTConfig(tCtx, restConfig)
|
||||
// TODO: rewrite all DRA test code to use ktesting.TContext once https://github.com/kubernetes/kubernetes/pull/122481 is
|
||||
// merged, then we don't need to fake a Framework instance.
|
||||
f := &framework.Framework{
|
||||
BaseName: "dra",
|
||||
Timeouts: framework.NewTimeoutContext(),
|
||||
ClientSet: tCtx.Client(),
|
||||
DynamicClient: tCtx.Dynamic(),
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("bring up v%d.%d", major, previousMinor))
|
||||
cluster := localupcluster.New(tCtx)
|
||||
localUpClusterEnv := map[string]string{
|
||||
"RUNTIME_CONFIG": "resource.k8s.io/v1beta1,resource.k8s.io/v1beta2",
|
||||
"FEATURE_GATES": "DynamicResourceAllocation=true",
|
||||
// *not* needed because driver will run in "local filesystem" mode (= driver.IsLocal): "ALLOW_PRIVILEGED": "1",
|
||||
}
|
||||
cluster.Start(tCtx, binDir, localUpClusterEnv)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// The driver containers have to run with sufficient privileges to
|
||||
// modify /var/lib/kubelet/plugins.
|
||||
NamespacePodSecurityLevel: admissionapi.LevelPrivileged,
|
||||
}
|
||||
f.SetClientConfig(restConfig)
|
||||
restConfig := cluster.LoadConfig(tCtx)
|
||||
restConfig.UserAgent = fmt.Sprintf("%s -- dra", restclient.DefaultKubernetesUserAgent())
|
||||
tCtx = tCtx.WithRESTConfig(restConfig).WithNamespace("default")
|
||||
|
||||
namespace, err := f.CreateNamespace(tCtx, f.BaseName, map[string]string{
|
||||
"e2e-framework": f.BaseName,
|
||||
})
|
||||
tCtx.ExpectNoError(err, "create namespace")
|
||||
f.Namespace = namespace
|
||||
f.UniqueName = namespace.Name
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("v%d.%d", major, previousMinor))
|
||||
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("v%d.%d", major, previousMinor))
|
||||
tCtx.ExpectNoError(e2enode.WaitForAllNodesSchedulable(tCtx, tCtx.Client(), 5*time.Minute), "wait for all nodes to be schedulable")
|
||||
nodes := drautils.NewNodesNow(tCtx, 1, 1)
|
||||
|
||||
tCtx.ExpectNoError(e2enode.WaitForAllNodesSchedulable(tCtx, tCtx.Client(), f.Timeouts.NodeSchedulable), "wait for all nodes to be schedulable")
|
||||
nodes := drautils.NewNodesNow(tCtx, 1, 1)
|
||||
// Opening sockets locally avoids intermittent errors and delays caused by proxying through the restarted apiserver.
|
||||
// We could speed up testing by shortening the sync delay in the ResourceSlice controller, but let's better
|
||||
// test the defaults.
|
||||
driver := drautils.NewDriverInstance(tCtx)
|
||||
driver.IsLocal = true
|
||||
driver.Run(tCtx, "/var/lib/kubelet", nodes, drautils.DriverResourcesNow(nodes, 8))
|
||||
b := drautils.NewBuilderNow(tCtx, driver)
|
||||
b.SkipCleanup = true
|
||||
|
||||
// Opening sockets locally avoids intermittent errors and delays caused by proxying through the restarted apiserver.
|
||||
// We could speed up testing by shortening the sync delay in the ResourceSlice controller, but let's better
|
||||
// test the defaults.
|
||||
driver := drautils.NewDriverInstance(tCtx)
|
||||
driver.IsLocal = true
|
||||
driver.Run(tCtx, nodes, drautils.DriverResourcesNow(nodes, 8))
|
||||
b := drautils.NewBuilderNow(tCtx, driver)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
steps2 := make(map[string]step2Func, len(subTests))
|
||||
for subTest, step1 := range subTests {
|
||||
tCtx = ktesting.Begin(tCtx, subTest)
|
||||
var result error
|
||||
func() {
|
||||
tCtx, finalize := ktesting.WithError(tCtx, &result)
|
||||
defer finalize()
|
||||
|
||||
// This only gets set in case of success.
|
||||
steps2[subTest] = step1(tCtx, f, b)
|
||||
}()
|
||||
if result != nil {
|
||||
results[subTest][steps[0]] = result.Error()
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("update to %s", gitVersion))
|
||||
// We could split this up into first updating the apiserver, then control plane components, then restarting kubelet.
|
||||
// For the purpose of this test here we we primarily care about full before/after comparisons, so not done yet.
|
||||
// TODO
|
||||
restoreOptions := cluster.Modify(tCtx, localupcluster.ModifyOptions{Upgrade: true, BinDir: dir})
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// The kubelet wipes all ResourceSlices on a restart because it doesn't know which drivers were running.
|
||||
// Wait for the ResourceSlice controller in the driver to notice and recreate the ResourceSlices.
|
||||
tCtx = ktesting.Begin(tCtx, "wait for ResourceSlices")
|
||||
ktesting.Eventually(tCtx, driver.NewGetSlices()).WithTimeout(5 * time.Minute).Should(gomega.HaveField("Items", gomega.HaveLen(len(nodes.NodeNames))))
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
steps3 := make(map[string]step3Func, len(subTests))
|
||||
for subTest := range subTests {
|
||||
step2 := steps2[subTest]
|
||||
if step2 == nil {
|
||||
continue
|
||||
}
|
||||
tCtx = ktesting.Begin(tCtx, subTest)
|
||||
var result error
|
||||
func() {
|
||||
tCtx, finalize := ktesting.WithError(tCtx, &result)
|
||||
defer finalize()
|
||||
|
||||
// This only gets set in case of success.
|
||||
steps3[subTest] = step2(tCtx)
|
||||
}()
|
||||
if result != nil {
|
||||
results[subTest][steps[0]] = result.Error()
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
}
|
||||
|
||||
// Roll back.
|
||||
tCtx = ktesting.Begin(tCtx, "downgrade")
|
||||
cluster.Modify(tCtx, restoreOptions)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// TODO: ensure that kube-controller-manager is up-and-running.
|
||||
// This works around https://github.com/kubernetes/kubernetes/issues/132334 and can be removed
|
||||
// once a fix for that is backported.
|
||||
tCtx = ktesting.Begin(tCtx, "wait for kube-controller-manager")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) string {
|
||||
output, _ := cluster.GetSystemLogs(tCtx, localupcluster.KubeControllerManager)
|
||||
return output
|
||||
}).Should(gomega.ContainSubstring(`"Caches are synced" controller="resource_claim"`))
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
for subTest := range subTests {
|
||||
step3 := steps3[subTest]
|
||||
if step3 == nil {
|
||||
continue
|
||||
}
|
||||
tCtx = ktesting.Begin(tCtx, subTest)
|
||||
var result error
|
||||
func() {
|
||||
tCtx, finalize := ktesting.WithError(tCtx, &result)
|
||||
defer finalize()
|
||||
|
||||
step3(tCtx)
|
||||
}()
|
||||
if result != nil {
|
||||
results[subTest][steps[0]] = result.Error()
|
||||
}
|
||||
tCtx = ktesting.End(tCtx)
|
||||
upgradedTestFuncs := make(map[string]upgradedTestFunc, len(subTests))
|
||||
tCtx.Run("after-cluster-creation", func(tCtx ktesting.TContext) {
|
||||
for subTest, f := range subTests {
|
||||
tCtx.Run(subTest, func(tCtx ktesting.TContext) {
|
||||
// This only gets set if f doesn't panic because of a fatal error,
|
||||
// so below we won't continue if step 1 already failed.
|
||||
// Other sub-tests are not affected.
|
||||
upgradedTestFuncs[subTest] = f(tCtx, b)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// This runs last because by default Ginkgo does not randomize within
|
||||
// a top-level container.
|
||||
for subTest := range subTests {
|
||||
ginkgo.Context(subTest, func() {
|
||||
for _, step := range steps {
|
||||
ginkgo.It(step, func() {
|
||||
failure := results[subTest][step]
|
||||
if failure != "" {
|
||||
// Source code location will be useless here. We can't have both:
|
||||
// separate test results *and* correct source code location.
|
||||
// This will become better with testing.T-based unit tests.
|
||||
_, _ = ginkgo.GinkgoWriter.Write([]byte("For log output see 'DRA upgrade/downgrade works'\n"))
|
||||
ginkgo.Fail(failure)
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
tCtx = ktesting.Begin(tCtx, fmt.Sprintf("update to %s", gitVersion))
|
||||
// We could split this up into first updating the apiserver, then control plane components, then restarting kubelet.
|
||||
// For the purpose of this test here we we primarily care about full before/after comparisons, so not done yet.
|
||||
// TODO
|
||||
restoreOptions := cluster.Modify(tCtx, localupcluster.ModifyOptions{Upgrade: true, BinDir: dir})
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// The kubelet wipes all ResourceSlices on a restart because it doesn't know which drivers were running.
|
||||
// Wait for the ResourceSlice controller in the driver to notice and recreate the ResourceSlices.
|
||||
tCtx = ktesting.Begin(tCtx, "wait for ResourceSlices")
|
||||
ktesting.Eventually(tCtx, driver.NewGetSlices()).WithTimeout(5 * time.Minute).Should(gomega.HaveField("Items", gomega.HaveLen(len(nodes.NodeNames))))
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
downgradedTestFuncs := make(map[string]downgradedTestFunc, len(subTests))
|
||||
tCtx.Run("after-cluster-upgrade", func(tCtx ktesting.TContext) {
|
||||
for subTest, f := range upgradedTestFuncs {
|
||||
tCtx.Run(subTest, func(tCtx ktesting.TContext) {
|
||||
downgradedTestFuncs[subTest] = f(tCtx)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// Roll back.
|
||||
tCtx = ktesting.Begin(tCtx, "downgrade")
|
||||
cluster.Modify(tCtx, restoreOptions)
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
// TODO: ensure that kube-controller-manager is up-and-running.
|
||||
// This works around https://github.com/kubernetes/kubernetes/issues/132334 and can be removed
|
||||
// once a fix for that is backported.
|
||||
tCtx = ktesting.Begin(tCtx, "wait for kube-controller-manager")
|
||||
ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) string {
|
||||
output, _ := cluster.GetSystemLogs(tCtx, localupcluster.KubeControllerManager)
|
||||
return output
|
||||
}).Should(gomega.ContainSubstring(`"Caches are synced" controller="resource_claim"`))
|
||||
tCtx = ktesting.End(tCtx)
|
||||
|
||||
tCtx.Run("after-cluster-downgrade", func(tCtx ktesting.TContext) {
|
||||
for subTest, f := range downgradedTestFuncs {
|
||||
tCtx.Run(subTest, func(tCtx ktesting.TContext) {
|
||||
f(tCtx)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// sourceVersion identifies the Kubernetes git version based on hack/print-workspace-status.sh.
|
||||
//
|
||||
|
|
|
|||
Loading…
Reference in a new issue