mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2026-03-25 10:53:02 -04:00
Probably fixes (or improves, at least) https://code.forgejo.org/forgejo/runner/issues/1391, paired with the runner implementation https://code.forgejo.org/forgejo/runner/pulls/1393. When the FetchTask() API is invoked to create a task, unpreventable environmental errors may occur; for example, network disconnects and timeouts. It's possible that these errors occur after the server-side has assigned a task to the runner during the API call, in which case the error would cause that task to be lost between the two systems -- the server will think it's assigned to the runner, and the runner never received it. This can cause jobs to appear stuck at "Set up job". The solution implemented here is idempotency in the FetchTask() API call, which means that the "same" FetchTask() API call is expected to return the same values. Specifically, the runner creates a unique identifier which is transmitted to the server as a header `x-runner-request-key` with each FetchTask() invocation which defines the sameness of the call, and the runner retains the value until the API call receives a successful response. The server implementation returns the same tasks back if a second (or Nth) call is received with the same `x-runner-request-key` header. In order to accomplish this is records the `x-runner-request-key` value that is used with each request that assigns tasks. As a complication, the Forgejo server is unable to return the same `${{ secrets.forgejo_token }}` for the task because the server stores that value in a one-way hash in the database. To resolve this, the server regenerates the token when retrieving tasks for a second time. ## Checklist The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org). ### Tests for Go changes (can be removed for JavaScript changes) - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. - [x] in the `tests/integration` directory if it involves interactions with a live Forgejo server. - I ran... - [x] `make pr-go` before pushing ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [x] This change will be noticed by a Forgejo user or admin (feature, bug fix, performance, etc.). I suggest to include a release note for this change. - [ ] This change is not visible to a Forgejo user or admin (refactor, dependency upgrade, etc.). I think there is no need to add a release note for this change. *The decision if the pull request will be shown in the release notes is up to the mergers / release team.* The content of the `release-notes/<pull request number>.md` file will serve as the basis for the release notes. If the file does not exist, the title of the pull request will be used instead. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/11401 Reviewed-by: Andreas Ahlenstorf <aahlenst@noreply.codeberg.org> Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net> Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
193 lines
6.3 KiB
Go
193 lines
6.3 KiB
Go
// Copyright 2025 The Forgejo Authors. All rights reserved.
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
package integration
|
|
|
|
import (
|
|
"net/url"
|
|
"strings"
|
|
"testing"
|
|
|
|
actions_model "forgejo.org/models/actions"
|
|
unit_model "forgejo.org/models/unit"
|
|
"forgejo.org/models/unittest"
|
|
user_model "forgejo.org/models/user"
|
|
"forgejo.org/modules/setting"
|
|
"forgejo.org/modules/util"
|
|
files_service "forgejo.org/services/repository/files"
|
|
"forgejo.org/tests"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestActionFetchTask_TaskCapacity(t *testing.T) {
|
|
if !setting.Database.Type.IsSQLite3() {
|
|
// mock repo runner only supported on SQLite testing
|
|
t.Skip()
|
|
}
|
|
|
|
onApplicationRun(t, func(t *testing.T, u *url.URL) {
|
|
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
|
|
|
|
// create the repo
|
|
repo, _, f := tests.CreateDeclarativeRepo(t, user2, "repo-many-tasks",
|
|
[]unit_model.Type{unit_model.TypeActions}, nil,
|
|
[]*files_service.ChangeRepoFile{
|
|
{
|
|
Operation: "create",
|
|
TreePath: ".forgejo/workflows/matrix.yml",
|
|
ContentReader: strings.NewReader(`
|
|
on:
|
|
push:
|
|
jobs:
|
|
job1:
|
|
strategy:
|
|
# matrix creates 125 different jobs from one push...
|
|
matrix:
|
|
d1: [a, b, c, d, e]
|
|
d2: [a, b, c, d, e]
|
|
d3: [a, b, c, d, e]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- run: echo ${{ matrix.d1 }} ${{ matrix.d2 }} ${{ matrix.d3 }}
|
|
- run: sleep 2
|
|
`),
|
|
},
|
|
},
|
|
)
|
|
defer f()
|
|
|
|
runner := newMockRunner()
|
|
runner.registerAsRepoRunner(t, user2.Name, repo.Name, "mock-runner", []string{"ubuntu-latest"})
|
|
|
|
// Fetch with TaskCapacity undefined, set to nil, should return a single pending task
|
|
task := runner.fetchTask(t)
|
|
require.NotNil(t, task)
|
|
assert.Contains(t, string(task.GetWorkflowPayload()), "name: job1 (a, a, a)")
|
|
|
|
// After successfully fetching a task, the runner sets their next requested version to 0. This allows it to
|
|
// fetch back-to-back tasks without requiring that a server-side state change occurs. That behaviour is
|
|
// replicated here:
|
|
runner.lastTasksVersion = 0
|
|
|
|
// Fetch with TaskCapacity set to 1; additional should be nil
|
|
capacity := int64(1)
|
|
task, addt := runner.fetchMultipleTasks(t, &capacity)
|
|
require.NotNil(t, task, "task")
|
|
assert.Nil(t, addt, "addt")
|
|
assert.Contains(t, string(task.GetWorkflowPayload()), "name: job1 (a, a, b)")
|
|
|
|
runner.lastTasksVersion = 0
|
|
|
|
capacity = 10
|
|
task, addt = runner.fetchMultipleTasks(t, &capacity)
|
|
require.NotNil(t, task, "task")
|
|
require.NotNil(t, addt, "addt")
|
|
assert.Contains(t, string(task.GetWorkflowPayload()), "name: job1 (a, a, c)")
|
|
require.Len(t, addt, 9)
|
|
assert.Contains(t, string(addt[0].GetWorkflowPayload()), "name: job1 (a, a, d)")
|
|
})
|
|
}
|
|
|
|
func TestActionFetchTask_Idempotent(t *testing.T) {
|
|
if !setting.Database.Type.IsSQLite3() {
|
|
// mock repo runner only supported on SQLite testing
|
|
t.Skip()
|
|
}
|
|
|
|
onApplicationRun(t, func(t *testing.T, u *url.URL) {
|
|
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
|
|
|
|
// create the repo
|
|
repo, _, f := tests.CreateDeclarativeRepo(t, user2, "repo-many-tasks",
|
|
[]unit_model.Type{unit_model.TypeActions}, nil,
|
|
[]*files_service.ChangeRepoFile{
|
|
{
|
|
Operation: "create",
|
|
TreePath: ".forgejo/workflows/matrix.yml",
|
|
ContentReader: strings.NewReader(`
|
|
on:
|
|
push:
|
|
jobs:
|
|
job1:
|
|
strategy:
|
|
matrix:
|
|
d1: [a, b]
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- run: sleep 2
|
|
`),
|
|
},
|
|
},
|
|
)
|
|
defer f()
|
|
|
|
runner := newMockRunner()
|
|
runner.registerAsRepoRunner(t, user2.Name, repo.Name, "mock-runner", []string{"ubuntu-latest"})
|
|
|
|
runner.setRequestKey("4b518ff2-00c6-4c22-ba05-77d5b597c2b4")
|
|
|
|
// First request that fetches a task:
|
|
task1 := runner.fetchTask(t)
|
|
require.NotNil(t, task1)
|
|
assert.Contains(t, string(task1.GetWorkflowPayload()), "name: job1")
|
|
{
|
|
// Base assumption, the FORGEJO_TOKEN secret can be identified... this is typical but we'll verify that it
|
|
// doesn't work after the idempotent fetch.
|
|
taskTokenTest, err := actions_model.GetRunningTaskByToken(t.Context(), task1.Secrets["FORGEJO_TOKEN"])
|
|
require.NoError(t, err)
|
|
assert.Equal(t, task1.Id, taskTokenTest.ID)
|
|
}
|
|
|
|
// Having retrieved a task... if we sent a fetchTask call with the same requestKey then we expect to get the
|
|
// same task again:
|
|
task1fetchedAgain := runner.fetchTask(t)
|
|
require.NotNil(t, task1fetchedAgain)
|
|
assert.Contains(t, string(task1fetchedAgain.GetWorkflowPayload()), "name: job1")
|
|
|
|
assert.Equal(t, task1.Id, task1fetchedAgain.Id)
|
|
assert.Equal(t, task1.WorkflowPayload, task1fetchedAgain.WorkflowPayload)
|
|
m1 := task1.Context.AsMap()
|
|
m1fetchedAgain := task1fetchedAgain.Context.AsMap()
|
|
for k, v1 := range m1 {
|
|
v2 := m1fetchedAgain[k]
|
|
// "token" isn't expected to be the same as it is regenerated on recovery from idempotent fetch. But it is
|
|
// expected to be present, so we test for equal length. "gitea_runtime_token" is a signed JWT which can
|
|
// change between invocations based upon precise timestamps used, and so similarly should be validated to be
|
|
// present not necessarily identical.
|
|
if k == "token" || k == "gitea_runtime_token" {
|
|
assert.Len(t, v1.(string), len(v2.(string)))
|
|
} else {
|
|
assert.EqualValues(t, v1, v2, "context[%q]", k)
|
|
}
|
|
}
|
|
for k, v1 := range task1.Secrets {
|
|
v2 := task1fetchedAgain.Secrets[k]
|
|
if k == "FORGEJO_TOKEN" || k == "GITEA_TOKEN" || k == "GITHUB_TOKEN" {
|
|
// token isn't expected to be the same... but should be present.
|
|
assert.Len(t, v1, len(v2))
|
|
} else {
|
|
assert.Equal(t, v1, v2, "secret[%q]", k)
|
|
}
|
|
}
|
|
assert.Equal(t, task1.Needs, task1fetchedAgain.Needs)
|
|
assert.Equal(t, task1.Vars, task1fetchedAgain.Vars)
|
|
|
|
{
|
|
// Original FORGEJO_TOKEN should not be usable anymore.
|
|
_, err := actions_model.GetRunningTaskByToken(t.Context(), task1.Secrets["FORGEJO_TOKEN"])
|
|
require.ErrorIs(t, err, util.ErrNotExist)
|
|
// New FORGEJO_TOKEN should be usable.
|
|
taskTokenTest, err := actions_model.GetRunningTaskByToken(t.Context(), task1fetchedAgain.Secrets["FORGEJO_TOKEN"])
|
|
require.NoError(t, err)
|
|
assert.Equal(t, task1fetchedAgain.Id, taskTokenTest.ID)
|
|
}
|
|
|
|
// But now if we change the request key, we don't expect to get the same task anymore:
|
|
runner.setRequestKey("6d47d5f3-eaa2-449f-9040-8b20287401b3")
|
|
task2 := runner.fetchTask(t)
|
|
require.NotNil(t, task2)
|
|
assert.NotEqual(t, task1.Id, task2.Id)
|
|
})
|
|
}
|