forgejo/services/actions/cleanup.go
Manuel Ganter 5b6bbabd74 feat: implement ephemeral runners (#9962)
As described in [this comment](https://gitea.com/gitea/act_runner/issues/19#issuecomment-739221) one-job runners are not secure when running in host mode. We implemented a routine preventing runner tokens from receiving a second job in order to render a potentially compromised token useless. Also we implemented a routine that removes finished runners as soon as possible.

Big thanks to [ChristopherHX](https://github.com/ChristopherHX) who did all the work for gitea!

Rel: #9407

## Checklist

The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org).

### Tests

- I added test coverage for Go changes...
  - [ ] in their respective `*_test.go` for unit tests.
  - [x] in the `tests/integration` directory if it involves interactions with a live Forgejo server.
- I added test coverage for JavaScript changes...
  - [ ] in `web_src/js/*.test.js` if it can be unit tested.
  - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)).

### Documentation

- [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change.
- [ ] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [ ] I do not want this change to show in the release notes.
- [ ] I want the title to show in the release notes with a link to this pull request.
- [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/9962
Reviewed-by: Andreas Ahlenstorf <aahlenst@noreply.codeberg.org>
Reviewed-by: Mathieu Fenniak <mfenniak@noreply.codeberg.org>
Co-authored-by: Manuel Ganter <manuel.ganter@think-ahead.tech>
Co-committed-by: Manuel Ganter <manuel.ganter@think-ahead.tech>
2026-02-16 18:56:56 +01:00

186 lines
6 KiB
Go

// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"errors"
"fmt"
"os"
"time"
actions_model "forgejo.org/models/actions"
"forgejo.org/models/db"
actions_module "forgejo.org/modules/actions"
"forgejo.org/modules/log"
"forgejo.org/modules/setting"
"forgejo.org/modules/storage"
"forgejo.org/modules/timeutil"
"xorm.io/builder"
)
// Cleanup removes expired actions logs, data, artifacts and used ephemeral runners
func Cleanup(ctx context.Context) error {
// clean up expired artifacts
if err := CleanupArtifacts(ctx); err != nil {
return fmt.Errorf("failed to clean up artifacts: %w", err)
}
// clean up old logs
if err := CleanupLogs(ctx); err != nil {
return fmt.Errorf("failed to clean up logs: %w", err)
}
// clean up old ephemeral runners
if err := CleanupEphemeralRunners(ctx); err != nil {
return fmt.Errorf("failed to clean up old ephemeral runners: %w", err)
}
return nil
}
// CleanupArtifacts removes expired add need-deleted artifacts and set records expired status
func CleanupArtifacts(taskCtx context.Context) error {
if err := cleanExpiredArtifacts(taskCtx); err != nil {
return err
}
return cleanNeedDeleteArtifacts(taskCtx)
}
func cleanExpiredArtifacts(taskCtx context.Context) error {
artifacts, err := actions_model.ListNeedExpiredArtifacts(taskCtx)
if err != nil {
return err
}
log.Info("Found %d expired artifacts", len(artifacts))
for _, artifact := range artifacts {
if err := actions_model.SetArtifactExpired(taskCtx, artifact.ID); err != nil {
log.Error("Cannot set artifact %d expired: %v", artifact.ID, err)
continue
}
if err := storage.ActionsArtifacts.Delete(artifact.StoragePath); err != nil {
log.Error("Cannot delete artifact %d: %v", artifact.ID, err)
continue
}
log.Info("Artifact %d set expired", artifact.ID)
}
return nil
}
// deleteArtifactBatchSize is the batch size of deleting artifacts
const deleteArtifactBatchSize = 100
func cleanNeedDeleteArtifacts(taskCtx context.Context) error {
for {
artifacts, err := actions_model.ListPendingDeleteArtifacts(taskCtx, deleteArtifactBatchSize)
if err != nil {
return err
}
log.Info("Found %d artifacts pending deletion", len(artifacts))
for _, artifact := range artifacts {
if err := actions_model.SetArtifactDeleted(taskCtx, artifact.ID); err != nil {
log.Error("Cannot set artifact %d deleted: %v", artifact.ID, err)
continue
}
if err := storage.ActionsArtifacts.Delete(artifact.StoragePath); err != nil {
log.Error("Cannot delete artifact %d: %v", artifact.ID, err)
continue
}
log.Info("Artifact %d set deleted", artifact.ID)
}
if len(artifacts) < deleteArtifactBatchSize {
log.Debug("No more artifacts pending deletion")
break
}
}
return nil
}
const deleteLogBatchSize = 100
// CleanupLogs removes logs which are older than the configured retention time
func CleanupLogs(ctx context.Context) error {
olderThan := timeutil.TimeStampNow().AddDuration(-time.Duration(setting.Actions.LogRetentionDays) * 24 * time.Hour)
count := 0
for {
tasks, err := actions_model.FindOldTasksToExpire(ctx, olderThan, deleteLogBatchSize)
if err != nil {
return fmt.Errorf("find old tasks: %w", err)
}
for _, task := range tasks {
if err := actions_module.RemoveLogs(ctx, task.LogInStorage, task.LogFilename); err != nil && !errors.Is(err, os.ErrNotExist) {
log.Error("Failed to remove log %s (in storage %v) of task %v: %v", task.LogFilename, task.LogInStorage, task.ID, err)
// do not return error here, continue to next task
continue
}
task.LogIndexes = nil // clear log indexes since it's a heavy field
task.LogExpired = true
if err := actions_model.UpdateTask(ctx, task, "log_indexes", "log_expired"); err != nil {
log.Error("Failed to update task %v: %v", task.ID, err)
// do not return error here, continue to next task
continue
}
count++
log.Trace("Removed log %s of task %v", task.LogFilename, task.ID)
}
if len(tasks) < deleteLogBatchSize {
break
}
}
log.Info("Removed %d logs", count)
return nil
}
// CleanupEphemeralRunners removes used ephemeral runners which are no longer able to process jobs
func CleanupEphemeralRunners(ctx context.Context) error {
var ids []int
err := db.GetEngine(ctx).
Table("`action_runner`").
Select("DISTINCT `action_runner`.id").
Join("INNER", "`action_task`", "`action_task`.`runner_id` = `action_runner`.`id`").
Where(builder.Eq{"`action_runner`.`ephemeral`": true}).
And(builder.In("`action_task`.`status`", actions_model.DoneStatuses())).
Find(&ids)
if err != nil {
return fmt.Errorf("failed to find ephemeral runners: %w", err)
}
res, err := db.GetEngine(ctx).
In("id", ids).
Delete(&actions_model.ActionRunner{})
if err != nil {
return fmt.Errorf("failed to delete ephemeral runners: %w", err)
}
log.Info("Removed %d ephemeral runners", res)
return nil
}
// CleanupEphemeralRunnersByPickedTaskOfRepo removes all ephemeral runners that have active/finished tasks on the given repository
func CleanupEphemeralRunnersByPickedTaskOfRepo(ctx context.Context, repoID int64) error {
subQuery := builder.Select("`action_runner`.id").
From(builder.Select("*").From("`action_runner`"), "`action_runner`"). // mysql needs this redundant subquery
Join("INNER", "`action_task`", "`action_task`.`runner_id` = `action_runner`.`id`").
Where(builder.And(builder.Eq{"`action_runner`.`ephemeral`": true}, builder.Eq{"`action_task`.`repo_id`": repoID}))
b := builder.Delete(builder.In("id", subQuery)).From("`action_runner`")
res, err := db.GetEngine(ctx).Exec(b)
if err != nil {
return fmt.Errorf("find runners: %w", err)
}
affected, err := res.RowsAffected()
if err != nil {
return err
}
log.Info("Removed %d runners", affected)
return nil
}
// CleanupOfflineRunners removes offline runners
func CleanupOfflineRunners(ctx context.Context, duration time.Duration, globalOnly bool) error {
olderThan := timeutil.TimeStampNow().AddDuration(-duration)
return actions_model.DeleteOfflineRunners(ctx, olderThan, globalOnly)
}