nextcloud/lib/private/TaskProcessing/Db/TaskMapper.php
Yoan Bozhilov aa5f45cecd fix(taskprocessing): claim tasks atomically with SKIP LOCKED + composite index
Replace the worker retry/ignore-list claim-loop with a single atomic
SELECT ... FOR UPDATE SKIP LOCKED claim (SQLite bounded-retry fallback),
preserving the no-duplicate guarantee while removing the thundering-herd
contention that throttled backlog draining. Add a (status,type,last_updated)
index via the table-creating migration + db:add-missing-indices listener.

Signed-off-by: Yoan Bozhilov <bygadd@gmail.com>
Assisted-by: Claude Code:claude-opus-4-8
2026-06-17 19:00:53 +02:00

473 lines
16 KiB
PHP

<?php
declare(strict_types=1);
/**
* SPDX-FileCopyrightText: 2023 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace OC\TaskProcessing\Db;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\AppFramework\Db\Entity;
use OCP\AppFramework\Db\MultipleObjectsReturnedException;
use OCP\AppFramework\Db\QBMapper;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\DB\Exception;
use OCP\DB\QueryBuilder\ConflictResolutionMode;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\IDBConnection;
/**
* @extends QBMapper<Task>
*/
class TaskMapper extends QBMapper {
public function __construct(
IDBConnection $db,
private ITimeFactory $timeFactory,
) {
parent::__construct($db, 'taskprocessing_tasks', Task::class);
}
/**
* @param int $id
* @return Task
* @throws Exception
* @throws DoesNotExistException
* @throws MultipleObjectsReturnedException
*/
public function find(int $id): Task {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->eq('id', $qb->createPositionalParameter($id)));
return $this->findEntity($qb);
}
/**
* @param list<string> $taskTypes
* @param list<int> $taskIdsToIgnore
* @return Task
* @throws DoesNotExistException
* @throws Exception
*/
public function findOldestScheduledByType(array $taskTypes, array $taskIdsToIgnore): Task {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->eq('status', $qb->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_SCHEDULED, IQueryBuilder::PARAM_INT)))
->setMaxResults(1)
->orderBy('last_updated', 'ASC');
if (!empty($taskTypes)) {
$filter = [];
foreach ($taskTypes as $taskType) {
$filter[] = $qb->expr()->eq('type', $qb->createPositionalParameter($taskType));
}
$qb->andWhere($qb->expr()->orX(...$filter));
}
if (!empty($taskIdsToIgnore)) {
$qb->andWhere($qb->expr()->notIn('id', $qb->createNamedParameter($taskIdsToIgnore, IQueryBuilder::PARAM_INT_ARRAY)));
}
return $this->findEntity($qb);
}
/**
* Atomically claim the oldest scheduled task of the given task types and mark it RUNNING.
*
* This is the structural fix for the worker "claim loop": instead of every worker
* racing for the single oldest task (a thundering herd that grows a per-worker
* `id NOT IN (...)` ignore list and slows the SELECT), each worker claims a
* *distinct* task in one round trip.
*
* On databases that support row-level locking with SKIP LOCKED
* (MySQL/MariaDB/PostgreSQL) the claim is a single transaction:
* SELECT ... WHERE status = SCHEDULED [AND type IN (...)]
* ORDER BY last_updated ASC LIMIT 1 FOR UPDATE SKIP LOCKED
* followed by a guarded UPDATE to RUNNING. Concurrent workers skip rows already
* locked by another transaction, so no two workers ever claim the same task.
*
* SQLite does not support SKIP LOCKED (verified: Doctrine throws "Operation
* 'SKIP LOCKED' is not supported by platform"), so we feature-detect via the DB
* provider and fall back to the existing bounded {@see lockTask} retry, which is
* still safe because the UPDATE ... WHERE status = SCHEDULED is itself atomic and
* SQLite serialises writers.
*
* A task is only ever transitioned SCHEDULED -> RUNNING here; it is never marked
* FAILED by claiming. If the task cannot be claimed (none scheduled, or it was
* taken by another worker between SELECT and UPDATE) this returns null.
*
* @param list<string> $taskTypes When non-empty, only tasks of these task type IDs are considered.
* @return Task|null The claimed task (status RUNNING), or null if nothing could be claimed.
* @throws Exception
*/
public function claimOldestScheduledTask(array $taskTypes): ?Task {
if ($this->db->getDatabaseProvider() === IDBConnection::PLATFORM_SQLITE) {
// SKIP LOCKED is unsupported on SQLite: fall back to the bounded lock-and-retry claim.
return $this->claimWithBoundedRetry($taskTypes);
}
return $this->claimWithSkipLocked($taskTypes);
}
/**
* Atomic claim using FOR UPDATE SKIP LOCKED in a single transaction.
*
* @param list<string> $taskTypes
* @return Task|null
* @throws Exception
*/
private function claimWithSkipLocked(array $taskTypes): ?Task {
$this->db->beginTransaction();
try {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::COLUMNS)
->from($this->tableName)
->where($qb->expr()->eq('status', $qb->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_SCHEDULED, IQueryBuilder::PARAM_INT)))
->orderBy('last_updated', 'ASC')
->setMaxResults(1)
->forUpdate(ConflictResolutionMode::SkipLocked);
if (!empty($taskTypes)) {
$filter = [];
foreach ($taskTypes as $taskType) {
$filter[] = $qb->expr()->eq('type', $qb->createPositionalParameter($taskType));
}
$qb->andWhere($qb->expr()->orX(...$filter));
}
$result = $qb->executeQuery();
$row = $result->fetch();
$result->closeCursor();
if ($row === false) {
// Nothing schedulable (or every candidate is locked by another worker).
$this->db->commit();
return null;
}
/** @var Task $task */
$task = $this->mapRowToEntity($row);
// Record the start time at claim time: because the worker receives the task
// already in status RUNNING, the later SCHEDULED -> RUNNING transition in
// Manager::setTaskStatus is skipped and would otherwise never persist started_at.
$startedAt = $this->timeFactory->now()->getTimestamp();
// Guarded transition SCHEDULED -> RUNNING. The row is locked for this
// transaction, so the guard is belt-and-braces rather than strictly required.
$update = $this->db->getQueryBuilder();
$update->update($this->tableName)
->set('status', $update->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_RUNNING, IQueryBuilder::PARAM_INT))
->set('started_at', $update->createPositionalParameter($startedAt, IQueryBuilder::PARAM_INT))
->where($update->expr()->eq('id', $update->createPositionalParameter($task->getId(), IQueryBuilder::PARAM_INT)))
->andWhere($update->expr()->eq('status', $update->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_SCHEDULED, IQueryBuilder::PARAM_INT)));
$affected = $update->executeStatement();
$this->db->commit();
if ($affected === 0) {
// Lost the race (should not happen under SKIP LOCKED); leave the task SCHEDULED.
return null;
}
$task->setStatus(\OCP\TaskProcessing\Task::STATUS_RUNNING);
$task->setStartedAt($startedAt);
return $task;
} catch (\Throwable $e) {
$this->db->rollBack();
throw $e;
}
}
/**
* Fallback claim for databases without SKIP LOCKED (SQLite).
*
* Repeatedly fetches the oldest scheduled task and attempts the atomic
* UPDATE ... WHERE status = SCHEDULED. Tasks lost to another worker are added to a
* short ignore list so the next iteration moves on. Bounded to avoid unbounded
* looping under contention.
*
* @param list<string> $taskTypes
* @return Task|null
* @throws Exception
*/
private function claimWithBoundedRetry(array $taskTypes): ?Task {
$taskIdsToIgnore = [];
// A handful of attempts is plenty: on SQLite writers are serialised, so at most
// a few rows can be claimed out from under us before we either win or run dry.
for ($attempt = 0; $attempt < 10; $attempt++) {
try {
$task = $this->findOldestScheduledByType($taskTypes, $taskIdsToIgnore);
} catch (DoesNotExistException) {
return null;
}
if ($this->lockTask($task) !== 0) {
$task->setStatus(\OCP\TaskProcessing\Task::STATUS_RUNNING);
// Record the start time at claim time. lockTask only flips the status (and is
// shared with other callers), so persist started_at with a targeted follow-up
// UPDATE rather than changing lockTask's behaviour. The worker receives the task
// already RUNNING, so Manager::setTaskStatus would otherwise never write it.
$startedAt = $this->timeFactory->now()->getTimestamp();
$update = $this->db->getQueryBuilder();
$update->update($this->tableName)
->set('started_at', $update->createPositionalParameter($startedAt, IQueryBuilder::PARAM_INT))
->where($update->expr()->eq('id', $update->createPositionalParameter($task->getId(), IQueryBuilder::PARAM_INT)));
$update->executeStatement();
$task->setStartedAt($startedAt);
return $task;
}
// Another worker took it; skip this id and try the next oldest.
$taskIdsToIgnore[] = $task->getId();
}
return null;
}
/**
* @param int $id
* @param string|null $userId
* @return Task
* @throws DoesNotExistException
* @throws Exception
* @throws MultipleObjectsReturnedException
*/
public function findByIdAndUser(int $id, ?string $userId): Task {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->eq('id', $qb->createPositionalParameter($id)));
if ($userId === null) {
$qb->andWhere($qb->expr()->isNull('user_id'));
} else {
$qb->andWhere($qb->expr()->eq('user_id', $qb->createPositionalParameter($userId)));
}
return $this->findEntity($qb);
}
/**
* @param string|null $userId
* @param string|null $taskType
* @param string|null $customId
* @return list<Task>
* @throws Exception
*/
public function findByUserAndTaskType(?string $userId, ?string $taskType = null, ?string $customId = null): array {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->eq('user_id', $qb->createPositionalParameter($userId)));
if ($taskType !== null) {
$qb->andWhere($qb->expr()->eq('type', $qb->createPositionalParameter($taskType)));
}
if ($customId !== null) {
$qb->andWhere($qb->expr()->eq('custom_id', $qb->createPositionalParameter($customId)));
}
return $this->findEntities($qb);
}
/**
* @param string $userId
* @param string $appId
* @param string|null $customId
* @return list<Task>
* @throws Exception
*/
public function findUserTasksByApp(?string $userId, string $appId, ?string $customId = null): array {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->eq('user_id', $qb->createPositionalParameter($userId)))
->andWhere($qb->expr()->eq('app_id', $qb->createPositionalParameter($appId)));
if ($customId !== null) {
$qb->andWhere($qb->expr()->eq('custom_id', $qb->createPositionalParameter($customId)));
}
return $this->findEntities($qb);
}
/**
* @param string|null $userId
* @param string|null $taskType
* @param string|null $appId
* @param string|null $customId
* @param int|null $status
* @param int|null $scheduleAfter
* @param int|null $endedBefore
* @return list<Task>
* @throws Exception
*/
public function findTasks(
?string $userId, ?string $taskType = null, ?string $appId = null, ?string $customId = null,
?int $status = null, ?int $scheduleAfter = null, ?int $endedBefore = null): array {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName);
// empty string: no userId filter
if ($userId !== '') {
$qb->where($qb->expr()->eq('user_id', $qb->createPositionalParameter($userId)));
}
if ($taskType !== null) {
$qb->andWhere($qb->expr()->eq('type', $qb->createPositionalParameter($taskType)));
}
if ($appId !== null) {
$qb->andWhere($qb->expr()->eq('app_id', $qb->createPositionalParameter($appId)));
}
if ($customId !== null) {
$qb->andWhere($qb->expr()->eq('custom_id', $qb->createPositionalParameter($customId)));
}
if ($status !== null) {
$qb->andWhere($qb->expr()->eq('status', $qb->createPositionalParameter($status, IQueryBuilder::PARAM_INT)));
}
if ($scheduleAfter !== null) {
$qb->andWhere($qb->expr()->isNotNull('scheduled_at'));
$qb->andWhere($qb->expr()->gt('scheduled_at', $qb->createPositionalParameter($scheduleAfter, IQueryBuilder::PARAM_INT)));
}
if ($endedBefore !== null) {
$qb->andWhere($qb->expr()->isNotNull('ended_at'));
$qb->andWhere($qb->expr()->lt('ended_at', $qb->createPositionalParameter($endedBefore, IQueryBuilder::PARAM_INT)));
}
return $this->findEntities($qb);
}
/**
* @param int $timeout
* @param bool $force If true, ignore the allow_cleanup flag
* @return int the number of deleted tasks
* @throws Exception
*/
public function deleteOlderThan(int $timeout, bool $force = false): int {
$qb = $this->db->getQueryBuilder();
$qb->delete($this->tableName)
->where($qb->expr()->lt('last_updated', $qb->createPositionalParameter($this->timeFactory->getDateTime()->getTimestamp() - $timeout)));
if (!$force) {
$qb->andWhere($qb->expr()->eq('allow_cleanup', $qb->createPositionalParameter(1, IQueryBuilder::PARAM_INT)));
}
return $qb->executeStatement();
}
/**
* @param int $timeout
* @param bool $force If true, ignore the allow_cleanup flag
* @return \Generator<Task>
* @throws Exception
*/
public function getTasksToCleanup(int $timeout, bool $force = false): \Generator {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->lt('last_updated', $qb->createPositionalParameter($this->timeFactory->getDateTime()->getTimestamp() - $timeout)));
if (!$force) {
$qb->andWhere($qb->expr()->eq('allow_cleanup', $qb->createPositionalParameter(1, IQueryBuilder::PARAM_INT)));
}
foreach ($this->yieldEntities($qb) as $entity) {
yield $entity;
};
}
#[\Override]
public function update(Entity $entity): Entity {
$entity->setLastUpdated($this->timeFactory->now()->getTimestamp());
return parent::update($entity);
}
public function lockTask(Entity $entity): int {
$qb = $this->db->getQueryBuilder();
$qb->update($this->tableName)
->set('status', $qb->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_RUNNING, IQueryBuilder::PARAM_INT))
->where($qb->expr()->eq('id', $qb->createPositionalParameter($entity->getId(), IQueryBuilder::PARAM_INT)))
->andWhere($qb->expr()->neq('status', $qb->createPositionalParameter(2, IQueryBuilder::PARAM_INT)));
try {
return $qb->executeStatement();
} catch (Exception) {
return 0;
}
}
/**
* @param list<string> $taskTypes
* @param list<int> $taskIdsToIgnore
* @param int $numberOfTasks
* @return list<Task>
* @throws Exception
*/
public function findNOldestScheduledByType(array $taskTypes, array $taskIdsToIgnore, int $numberOfTasks) {
$qb = $this->db->getQueryBuilder();
$qb->select(Task::$columns)
->from($this->tableName)
->where($qb->expr()->eq('status', $qb->createPositionalParameter(\OCP\TaskProcessing\Task::STATUS_SCHEDULED, IQueryBuilder::PARAM_INT)))
->setMaxResults($numberOfTasks)
->orderBy('last_updated', 'ASC');
if (!empty($taskTypes)) {
$filter = [];
foreach ($taskTypes as $taskType) {
$filter[] = $qb->expr()->eq('type', $qb->createPositionalParameter($taskType));
}
$qb->andWhere($qb->expr()->orX(...$filter));
}
if (!empty($taskIdsToIgnore)) {
$qb->andWhere($qb->expr()->notIn('id', $qb->createNamedParameter($taskIdsToIgnore, IQueryBuilder::PARAM_INT_ARRAY)));
}
return $this->findEntities($qb);
}
/**
* @param list<string> $taskTypeIds
* @param int $status
* @return int
* @throws Exception
*/
public function countByStatus(array $taskTypeIds, int $status): int {
if ($taskTypeIds === []) {
return $this->countByStatusQuery($status);
}
$count = 0;
foreach (array_chunk($taskTypeIds, 900) as $chunk) {
$count += $this->countByStatusQuery($status, $chunk);
}
return $count;
}
private function countByStatusQuery(int $status, ?array $taskTypeIds = null): int {
$qb = $this->db->getQueryBuilder();
$qb->select($qb->func()->count('id'))
->from($this->tableName)
->where($qb->expr()->eq('status', $qb->createNamedParameter($status, IQueryBuilder::PARAM_INT)));
if ($taskTypeIds !== null) {
$qb->andWhere($qb->expr()->in('type', $qb->createNamedParameter($taskTypeIds, IQueryBuilder::PARAM_STR_ARRAY)));
}
$result = $qb->executeQuery();
$count = (int)$result->fetchOne();
$result->closeCursor();
return $count;
}
/**
* @throws Exception
*/
public function hasRunningTasksForTaskType(string $getTaskTypeId): bool {
$qb = $this->db->getQueryBuilder();
$qb->select('id')
->from($this->tableName);
$qb->where($qb->expr()->eq('type', $qb->createNamedParameter($getTaskTypeId)));
$qb->andWhere($qb->expr()->eq('status', $qb->createNamedParameter(\OCP\TaskProcessing\Task::STATUS_RUNNING, IQueryBuilder::PARAM_INT)));
$qb->setMaxResults(1);
$result = $qb->executeQuery();
$hasRunningTasks = $result->fetch() !== false;
$result->closeCursor();
return $hasRunningTasks;
}
}