// Copyright IBM Corp. 2014, 2026 // SPDX-License-Identifier: BUSL-1.1 package cloud import ( "bufio" "context" "encoding/json" "errors" "fmt" "io" "net/http" "net/url" "path/filepath" "strings" "time" "github.com/hashicorp/go-tfe" tfaddr "github.com/hashicorp/terraform-registry-address" svchost "github.com/hashicorp/terraform-svchost" "github.com/hashicorp/terraform-svchost/disco" "github.com/zclconf/go-cty/cty" "github.com/hashicorp/terraform/internal/command/arguments" "github.com/hashicorp/terraform/internal/command/format" "github.com/hashicorp/terraform/internal/command/jsonformat" "github.com/hashicorp/terraform/internal/command/views" "github.com/hashicorp/terraform/internal/configs" "github.com/hashicorp/terraform/internal/logging" "github.com/hashicorp/terraform/internal/moduletest" "github.com/hashicorp/terraform/internal/plans" "github.com/hashicorp/terraform/internal/terminal" "github.com/hashicorp/terraform/internal/tfdiags" tfversion "github.com/hashicorp/terraform/version" ) // TestSuiteRunner executes any tests found in the relevant directories in TFC. // // It uploads the configuration and uses go-tfe to execute a . // // We keep this separate from Cloud, as the tests don't execute with a // particular workspace in mind but instead with a specific module from a // private registry. Many things within Cloud assume the existence of a // workspace when initialising so it isn't pratical to share this for tests. type TestSuiteRunner struct { // ConfigDirectory and TestingDirectory are the paths to the directory // that contains our configuration and our testing files. ConfigDirectory string TestingDirectory string // Config is the actual loaded config. Config *configs.Config Services *disco.Disco // Source is the private registry module we should be sending the tests // to when they execute. Source string // GlobalVariables are the variables provided by the TF_VAR_* environment // variables and -var and -var-file flags. GlobalVariables map[string]arguments.UnparsedVariableValue // Stopped and Cancelled track whether the user requested the testing // process to be interrupted. Stopped is a nice graceful exit, we'll still // tidy up any state that was created and mark the tests with relevant // `skipped` status updates. Cancelled is a hard stop right now exit, we // won't attempt to clean up any state left hanging, and tests will just // be left showing `pending` as the status. We will still print out the // destroy summary diagnostics that tell the user what state has been left // behind and needs manual clean up. Stopped bool Cancelled bool // StoppedCtx and CancelledCtx allow in progress Terraform operations to // respond to external calls from the test command. StoppedCtx context.Context CancelledCtx context.Context // Verbose tells the runner to print out plan files during each test run. Verbose bool // OperationParallelism is the limit Terraform places on total parallel operations // during the plan or apply command within a single test run. OperationParallelism int // Filters restricts which test files will be executed. Filters []string // Renderer knows how to convert JSON logs retrieved from TFE back into // human-readable. // // If this is nil, the runner will print the raw logs directly to Streams. Renderer *jsonformat.Renderer // View and Streams provide alternate ways to output raw data to the // user. View views.Test Streams *terminal.Streams // appName is the name of the instance this test suite runner is configured // against. Can be "HCP Terraform" or "Terraform Enterprise" appName string // clientOverride allows tests to specify the client instead of letting the // system initialise one itself. clientOverride *tfe.Client } func (runner *TestSuiteRunner) Stop() { runner.Stopped = true } func (runner *TestSuiteRunner) IsStopped() bool { return runner.Stopped } func (runner *TestSuiteRunner) Cancel() { runner.Cancelled = true } func (runner *TestSuiteRunner) Test(_ bool) (moduletest.Status, tfdiags.Diagnostics) { var diags tfdiags.Diagnostics configDirectory, err := filepath.Abs(runner.ConfigDirectory) if err != nil { diags = diags.Append(fmt.Errorf("Failed to get absolute path of the configuration directory: %v", err)) return moduletest.Error, diags } variables, variableDiags := ParseCloudRunTestVariables(runner.GlobalVariables) diags = diags.Append(variableDiags) if variableDiags.HasErrors() { // Stop early if we couldn't parse the global variables. return moduletest.Error, diags } addr, err := tfaddr.ParseModuleSource(runner.Source) if err != nil { if parserError, ok := err.(*tfaddr.ParserError); ok { diags = diags.Append(tfdiags.AttributeValue( tfdiags.Error, parserError.Summary, parserError.Detail, cty.Path{cty.GetAttrStep{Name: "source"}})) } else { diags = diags.Append(err) } return moduletest.Error, diags } if addr.Package.Host == tfaddr.DefaultModuleRegistryHost { // Then they've reference something from the public registry. We can't // run tests against that in this way yet. diags = diags.Append(tfdiags.AttributeValue( tfdiags.Error, "Module source points to the public registry", "HCP Terraform and Terraform Enterprise can only execute tests for modules held within private registries.", cty.Path{cty.GetAttrStep{Name: "source"}})) return moduletest.Error, diags } id := tfe.RegistryModuleID{ Organization: addr.Package.Namespace, Name: addr.Package.Name, Provider: addr.Package.TargetSystem, Namespace: addr.Package.Namespace, RegistryName: tfe.PrivateRegistry, } client, module, clientDiags := runner.client(addr, id) diags = diags.Append(clientDiags) if clientDiags.HasErrors() { return moduletest.Error, diags } configurationVersion, err := client.ConfigurationVersions.CreateForRegistryModule(runner.StoppedCtx, id) if err != nil { diags = diags.Append(runner.generalError("Failed to create configuration version", err)) return moduletest.Error, diags } if runner.Stopped || runner.Cancelled { return moduletest.Error, diags } if err := client.ConfigurationVersions.Upload(runner.StoppedCtx, configurationVersion.UploadURL, configDirectory); err != nil { diags = diags.Append(runner.generalError("Failed to upload configuration version", err)) return moduletest.Error, diags } if runner.Stopped || runner.Cancelled { return moduletest.Error, diags } // From here, we'll pass any cancellation signals into the test run instead // of cancelling things locally. The reason for this is we want to make sure // the test run tidies up any state properly. This means, we'll send the // cancellation signals and then still wait for and process the logs. // // This also means that all calls to HCP Terraform will use context.Background() // instead of the stopped or cancelled context as we want them to finish and // the run to be cancelled by HCP Terraform properly. opts := tfe.TestRunCreateOptions{ Filters: runner.Filters, TestDirectory: tfe.String(runner.TestingDirectory), Verbose: tfe.Bool(runner.Verbose), Parallelism: tfe.Int(runner.OperationParallelism), Variables: func() []*tfe.RunVariable { runVariables := make([]*tfe.RunVariable, 0, len(variables)) for name, value := range variables { runVariables = append(runVariables, &tfe.RunVariable{ Key: name, Value: value, }) } return runVariables }(), ConfigurationVersion: configurationVersion, RegistryModule: module, } run, err := client.TestRuns.Create(context.Background(), opts) if err != nil { diags = diags.Append(runner.generalError("Failed to create test run", err)) return moduletest.Error, diags } runningCtx, done := context.WithCancel(context.Background()) go func() { defer logging.PanicHandler() defer done() // Let's wait for the test run to start separately, so we can provide // some nice updates while we wait. completed := false started := time.Now() updated := started for i := 0; !completed; i++ { run, err := client.TestRuns.Read(context.Background(), id, run.ID) if err != nil { diags = diags.Append(runner.generalError("Failed to retrieve test run", err)) return // exit early } if run.Status != tfe.TestRunQueued { // We block as long as the test run is still queued. completed = true continue // We can render the logs now. } current := time.Now() if i == 0 || current.Sub(updated).Seconds() > 30 { updated = current // TODO: Provide better updates based on queue status etc. // We could look through the queue to find out exactly where the // test run is and give a count down. Other stuff like that. // For now, we'll just print a simple status updated. runner.View.TFCStatusUpdate(run.Status, current.Sub(started)) } } // The test run has actually started now, so let's render the logs. logDiags := runner.renderLogs(client, run, id) diags = diags.Append(logDiags) }() // We're doing a couple of things in the wait function. Firstly, waiting // for the test run to actually finish. Secondly, listening for interrupt // signals and forwarding them onto TFC. waitDiags := runner.wait(runningCtx, client, run, id) diags = diags.Append(waitDiags) if diags.HasErrors() { return moduletest.Error, diags } // Refresh the run now we know it is finished. run, err = client.TestRuns.Read(context.Background(), id, run.ID) if err != nil { diags = diags.Append(runner.generalError("Failed to retrieve completed test run", err)) return moduletest.Error, diags } if run.Status != tfe.TestRunFinished { // The only reason we'd get here without the run being finished properly // is because the run errored outside the scope of the tests, or because // the run was cancelled. Either way, we can just mark it has having // errored for the purpose of our return code. return moduletest.Error, diags } // Otherwise the run has finished successfully, and we can look at the // actual status of the test instead of the run to figure out what status we // should return. switch run.TestStatus { case tfe.TestError: return moduletest.Error, diags case tfe.TestFail: return moduletest.Fail, diags case tfe.TestPass: return moduletest.Pass, diags case tfe.TestPending: return moduletest.Pending, diags case tfe.TestSkip: return moduletest.Skip, diags default: panic("found unrecognized test status: " + run.TestStatus) } } // discover the TFC/E API service URL func discoverTfeURL(hostname svchost.Hostname, services *disco.Disco) (*url.URL, error) { host, err := services.Discover(hostname) if err != nil { var serviceDiscoErr *disco.ErrServiceDiscoveryNetworkRequest switch { case errors.As(err, &serviceDiscoErr): err = fmt.Errorf("a network issue prevented cloud configuration; %w", err) return nil, err default: return nil, err } } return host.ServiceURL(tfeServiceID) } func (runner *TestSuiteRunner) client(addr tfaddr.Module, id tfe.RegistryModuleID) (*tfe.Client, *tfe.RegistryModule, tfdiags.Diagnostics) { var diags tfdiags.Diagnostics var client *tfe.Client if runner.clientOverride != nil { client = runner.clientOverride } else { service, err := discoverTfeURL(addr.Package.Host, runner.Services) if err != nil { diags = diags.Append(tfdiags.AttributeValue( tfdiags.Error, strings.ToUpper(err.Error()[:1])+err.Error()[1:], "", // no description is needed here, the error is clear cty.Path{cty.GetAttrStep{Name: "hostname"}}, )) return nil, nil, diags } token, err := CliConfigToken(addr.Package.Host, runner.Services) if err != nil { diags = diags.Append(tfdiags.AttributeValue( tfdiags.Error, strings.ToUpper(err.Error()[:1])+err.Error()[1:], "", // no description is needed here, the error is clear cty.Path{cty.GetAttrStep{Name: "hostname"}}, )) return nil, nil, diags } if token == "" { hostname := addr.Package.Host.ForDisplay() loginCommand := "terraform login" if hostname != defaultHostname { loginCommand = loginCommand + " " + hostname } diags = diags.Append(tfdiags.Sourceless( tfdiags.Error, "Required token could not be found", fmt.Sprintf( "Run the following command to generate a token for %s:\n %s", hostname, loginCommand, ), )) return nil, nil, diags } cfg := &tfe.Config{ Address: service.String(), BasePath: service.Path, Token: token, Headers: make(http.Header), RetryLogHook: runner.View.TFCRetryHook, } // Set the version header to the current version. cfg.Headers.Set(tfversion.Header, tfversion.Version) cfg.Headers.Set(headerSourceKey, headerSourceValue) if client, err = tfe.NewClient(cfg); err != nil { diags = diags.Append(tfdiags.Sourceless( tfdiags.Error, "Failed to create the HCP Terraform or Terraform Enterprise client", fmt.Sprintf( `Encountered an unexpected error while creating the `+ `HCP Terraform or Terraform Enterprise client: %s.`, err, ), )) return nil, nil, diags } } module, err := client.RegistryModules.Read(runner.StoppedCtx, id) if err != nil { // Then the module doesn't exist, and we can't run tests against it. if err == tfe.ErrResourceNotFound { err = fmt.Errorf("module %q was not found.\n\nPlease ensure that the organization and hostname are correct and that your API token for %s is valid.", addr.ForDisplay(), addr.Package.Host.ForDisplay()) } diags = diags.Append(tfdiags.AttributeValue( tfdiags.Error, fmt.Sprintf("Failed to read module %q", addr.ForDisplay()), fmt.Sprintf("Encountered an unexpected error while the module: %s", err), cty.Path{cty.GetAttrStep{Name: "source"}})) return client, nil, diags } // Enable retries for server errors. client.RetryServerErrors(true) runner.appName = client.AppName() if isValidAppName(runner.appName) { runner.appName = "HCP Terraform" } // Aaaaand I'm done. return client, module, diags } func (runner *TestSuiteRunner) wait(ctx context.Context, client *tfe.Client, run *tfe.TestRun, moduleId tfe.RegistryModuleID) tfdiags.Diagnostics { var diags tfdiags.Diagnostics handleCancelled := func() { if err := client.TestRuns.Cancel(context.Background(), moduleId, run.ID); err != nil { diags = diags.Append(tfdiags.Sourceless( tfdiags.Error, "Could not cancel the test run", fmt.Sprintf("Terraform could not cancel the test run, you will have to navigate to the %s console and cancel the test run manually.\n\nThe error message received when cancelling the test run was %s", client.AppName(), err))) return } // At this point we've requested a force cancel, and we know that // Terraform locally is just going to quit after some amount of time so // we'll just wait for that to happen or for HCP Terraform to finish, whichever // happens first. <-ctx.Done() } handleStopped := func() { if err := client.TestRuns.Cancel(context.Background(), moduleId, run.ID); err != nil { diags = diags.Append(tfdiags.Sourceless( tfdiags.Error, "Could not stop the test run", fmt.Sprintf("Terraform could not stop the test run, you will have to navigate to the %s console and cancel the test run manually.\n\nThe error message received when stopping the test run was %s", client.AppName(), err))) return } // We've request a cancel, we're happy to just wait for HCP Terraform to cancel // the run appropriately. select { case <-runner.CancelledCtx.Done(): // We got more pushy, let's force cancel. handleCancelled() case <-ctx.Done(): // It finished normally after we request the cancel. Do nothing. } } select { case <-runner.StoppedCtx.Done(): // The StoppedCtx is passed in from the command package, which is // listening for interrupts from the user. After the first interrupt the // StoppedCtx is triggered. handleStopped() case <-ctx.Done(): // The remote run finished normally! Do nothing. } return diags } func (runner *TestSuiteRunner) renderLogs(client *tfe.Client, run *tfe.TestRun, moduleId tfe.RegistryModuleID) tfdiags.Diagnostics { var diags tfdiags.Diagnostics logs, err := client.TestRuns.Logs(context.Background(), moduleId, run.ID) if err != nil { diags = diags.Append(runner.generalError("Failed to retrieve logs", err)) return diags } reader := bufio.NewReaderSize(logs, 64*1024) for next := true; next; { var l, line []byte var err error for isPrefix := true; isPrefix; { l, isPrefix, err = reader.ReadLine() if err != nil { if err != io.EOF { diags = diags.Append(runner.generalError("Failed to read logs", err)) return diags } next = false } line = append(line, l...) } if next || len(line) > 0 { if runner.Renderer != nil { log := jsonformat.JSONLog{} if err := json.Unmarshal(line, &log); err != nil { runner.Streams.Println(string(line)) // Just print the raw line so the user can still try and interpret the information. continue } // Most of the log types can be rendered with just the // information they contain. We just pass these straight into // the renderer. Others, however, need additional context that // isn't available within the renderer so we process them first. switch log.Type { case jsonformat.LogTestInterrupt: interrupt := log.TestFatalInterrupt runner.Streams.Eprintln(format.WordWrap(log.Message, runner.Streams.Stderr.Columns())) if len(interrupt.State) > 0 { runner.Streams.Eprint(format.WordWrap("\nTerraform has already created the following resources from the module under test:\n", runner.Streams.Stderr.Columns())) for _, resource := range interrupt.State { if len(resource.DeposedKey) > 0 { runner.Streams.Eprintf(" - %s (%s)\n", resource.Instance, resource.DeposedKey) } else { runner.Streams.Eprintf(" - %s\n", resource.Instance) } } } if len(interrupt.States) > 0 { for run, resources := range interrupt.States { runner.Streams.Eprint(format.WordWrap(fmt.Sprintf("\nTerraform has already created the following resources for %q:\n", run), runner.Streams.Stderr.Columns())) for _, resource := range resources { if len(resource.DeposedKey) > 0 { runner.Streams.Eprintf(" - %s (%s)\n", resource.Instance, resource.DeposedKey) } else { runner.Streams.Eprintf(" - %s\n", resource.Instance) } } } } if len(interrupt.Planned) > 0 { module := "the module under test" for _, run := range runner.Config.Module.Tests[log.TestFile].Runs { if run.Name == log.TestRun && run.ConfigUnderTest != nil { module = fmt.Sprintf("%q", run.Module.Source.String()) } } runner.Streams.Eprint(format.WordWrap(fmt.Sprintf("\nTerraform was in the process of creating the following resources for %q from %s, and they may not have been destroyed:\n", log.TestRun, module), runner.Streams.Stderr.Columns())) for _, resource := range interrupt.Planned { runner.Streams.Eprintf(" - %s\n", resource) } } case jsonformat.LogTestPlan: var uimode plans.Mode for _, run := range runner.Config.Module.Tests[log.TestFile].Runs { if run.Name == log.TestRun { switch run.Options.Mode { case configs.RefreshOnlyTestMode: uimode = plans.RefreshOnlyMode case configs.NormalTestMode: uimode = plans.NormalMode } // Don't keep searching the runs. break } } runner.Renderer.RenderHumanPlan(*log.TestPlan, uimode) case jsonformat.LogTestState: runner.Renderer.RenderHumanState(*log.TestState) default: // For all the rest we can just hand over to the renderer // to handle directly. if err := runner.Renderer.RenderLog(&log); err != nil { runner.Streams.Println(string(line)) // Just print the raw line so the can still try and interpret the information. continue } } } else { runner.Streams.Println(string(line)) // If the renderer is null, it means the user just wants to see the raw JSON outputs anyway. } } } return diags } func (runner *TestSuiteRunner) generalError(msg string, err error) error { var diags tfdiags.Diagnostics if urlErr, ok := err.(*url.Error); ok { err = urlErr.Err } switch err { case context.Canceled: return err case tfe.ErrResourceNotFound: diags = diags.Append(tfdiags.Sourceless( tfdiags.Error, fmt.Sprintf("%s: %v", msg, err), fmt.Sprintf("For security, %s return '404 Not Found' responses for resources\n", runner.appName)+ "for resources that a user doesn't have access to, in addition to resources that\n"+ "do not exist. If the resource does exist, please check the permissions of the provided token.", )) return diags.Err() default: diags = diags.Append(tfdiags.Sourceless( tfdiags.Error, fmt.Sprintf("%s: %v", msg, err), fmt.Sprintf(`%s returned an unexpected error. Sometimes `, runner.appName)+ `this is caused by network connection problems, in which case you could retry `+ `the command. If the issue persists please open a support ticket to get help `+ `resolving the problem.`, )) return diags.Err() } }