Revert "[stable33] perf(preview): bulk process preview regeneration"

This commit is contained in:
John Molakvoæ 2026-03-26 15:41:10 +01:00 committed by GitHub
parent 7bf9845205
commit cbebc8214f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 68 additions and 509 deletions

View file

@ -16,7 +16,6 @@ use OC\Files\SimpleFS\SimpleFile;
use OC\Preview\Db\Preview;
use OC\Preview\Db\PreviewMapper;
use OCP\DB\Exception;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\Files\IMimeTypeDetector;
use OCP\Files\IMimeTypeLoader;
use OCP\Files\IRootFolder;
@ -31,8 +30,6 @@ use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
class LocalPreviewStorage implements IPreviewStorage {
private const SCAN_BATCH_SIZE = 1000;
public function __construct(
private readonly IConfig $config,
private readonly PreviewMapper $previewMapper,
@ -120,178 +117,81 @@ class LocalPreviewStorage implements IPreviewStorage {
if (!file_exists($this->getPreviewRootFolder())) {
return 0;
}
$scanner = new RecursiveDirectoryIterator($this->getPreviewRootFolder());
$previewsFound = 0;
/**
* Use an associative array keyed by path for O(1) lookup instead of
* the O(n) in_array() the original code used.
*
* @var array<string, true> $skipPaths
*/
$skipPaths = [];
/**
* Pending previews grouped by fileId. A single original file can have
* many preview variants (different sizes/formats), so we group them to
* issue one filecache lookup per original file rather than one per
* preview variant.
*
* @var array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
*/
$pendingByFileId = [];
/**
* path_hash => realPath for legacy filecache entries that need to be
* cleaned up. Only populated when $checkForFileCache is true.
*
* @var array<string, string> $pendingPathHashes
*/
$pendingPathHashes = [];
$pendingCount = 0;
$skipFiles = [];
foreach (new RecursiveIteratorIterator($scanner) as $file) {
if (!$file->isFile()) {
continue;
}
$filePath = $file->getPathname();
if (isset($skipPaths[$filePath])) {
continue;
}
$preview = Preview::fromPath($filePath, $this->mimeTypeDetector);
if ($preview === false) {
$this->logger->error('Unable to parse preview information for ' . $file->getRealPath());
continue;
}
$preview->setSize($file->getSize());
$preview->setMtime($file->getMtime());
$preview->setEncrypted(false);
$realPath = $file->getRealPath();
$pendingByFileId[$preview->getFileId()][] = [
'preview' => $preview,
'filePath' => $filePath,
'realPath' => $realPath,
];
$pendingCount++;
if ($checkForFileCache) {
$relativePath = str_replace($this->getRootFolder() . '/', '', $realPath);
$pendingPathHashes[md5($relativePath)] = $realPath;
}
if ($pendingCount >= self::SCAN_BATCH_SIZE) {
$this->connection->beginTransaction();
if ($file->isFile() && !in_array((string)$file, $skipFiles, true)) {
$preview = Preview::fromPath((string)$file, $this->mimeTypeDetector);
if ($preview === false) {
$this->logger->error('Unable to parse preview information for ' . $file->getRealPath());
continue;
}
try {
$previewsFound += $this->processScanBatch($pendingByFileId, $pendingPathHashes, $checkForFileCache, $skipPaths);
$this->connection->commit();
} catch (\Exception $e) {
$this->connection->rollBack();
$this->logger->error($e->getMessage(), ['exception' => $e]);
throw $e;
}
$pendingByFileId = [];
$pendingPathHashes = [];
$pendingCount = 0;
}
}
$preview->setSize($file->getSize());
$preview->setMtime($file->getMtime());
$preview->setEncrypted(false);
if ($pendingCount > 0) {
$this->connection->beginTransaction();
try {
$previewsFound += $this->processScanBatch($pendingByFileId, $pendingPathHashes, $checkForFileCache, $skipPaths);
$this->connection->commit();
} catch (\Exception $e) {
$this->connection->rollBack();
$this->logger->error($e->getMessage(), ['exception' => $e]);
throw $e;
}
}
$qb = $this->connection->getQueryBuilder();
$result = $qb->select('storage', 'etag', 'mimetype')
->from('filecache')
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($preview->getFileId())))
->setMaxResults(1)
->runAcrossAllShards() // Unavoidable because we can't extract the storage_id from the preview name
->executeQuery()
->fetchAssociative();
return $previewsFound;
}
/**
* Process one batch of preview files collected during scan().
*
* @param array<int, list<array{preview: Preview, filePath: string, realPath: string}>> $pendingByFileId
* @param array<string, string> $pendingPathHashes path_hash => realPath
* @param array<string, true> $skipPaths Modified in place: newly-moved paths are added so the outer iterator skips them.
*/
private function processScanBatch(
array $pendingByFileId,
array $pendingPathHashes,
bool $checkForFileCache,
array &$skipPaths,
): int {
$filecacheByFileId = $this->fetchFilecacheByFileIds(array_keys($pendingByFileId));
$legacyByPathHash = [];
if ($checkForFileCache && $pendingPathHashes !== []) {
$legacyByPathHash = $this->fetchFilecacheByPathHashes(array_keys($pendingPathHashes));
}
$previewsFound = 0;
foreach ($pendingByFileId as $fileId => $items) {
if (!isset($filecacheByFileId[$fileId])) {
// Original file has been deleted clean up all its previews.
foreach ($items as $item) {
$this->logger->warning('Original file ' . $fileId . ' was not found. Deleting preview at ' . $item['realPath']);
@unlink($item['realPath']);
}
continue;
}
$filecacheRow = $filecacheByFileId[$fileId];
foreach ($items as $item) {
$preview = $item['preview'];
if ($checkForFileCache) {
$relativePath = str_replace($this->getRootFolder() . '/', '', $item['realPath']);
$pathHash = md5($relativePath);
if (isset($legacyByPathHash[$pathHash])) {
$legacyRow = $legacyByPathHash[$pathHash];
$qb = $this->connection->getQueryBuilder();
$qb->delete('filecache')
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($legacyRow['fileid'])))
->andWhere($qb->expr()->eq('storage', $qb->createNamedParameter($legacyRow['storage'])))
->executeStatement();
$this->deleteParentsFromFileCache((int)$legacyRow['parent'], (int)$legacyRow['storage']);
if ($result === false) {
// original file is deleted
$this->logger->warning('Original file ' . $preview->getFileId() . ' was not found. Deleting preview at ' . $file->getRealPath());
@unlink($file->getRealPath());
continue;
}
}
$preview->setStorageId((int)$filecacheRow['storage']);
$preview->setEtag($filecacheRow['etag']);
$preview->setSourceMimetype($this->mimeTypeLoader->getMimetypeById((int)$filecacheRow['mimetype']));
$preview->generateId();
if ($checkForFileCache) {
$relativePath = str_replace($this->getRootFolder() . '/', '', $file->getRealPath());
$qb = $this->connection->getQueryBuilder();
$result2 = $qb->select('fileid', 'storage', 'etag', 'mimetype', 'parent')
->from('filecache')
->where($qb->expr()->eq('path_hash', $qb->createNamedParameter(md5($relativePath))))
->runAcrossAllShards()
->setMaxResults(1)
->executeQuery()
->fetchAssociative();
$this->connection->beginTransaction();
try {
if ($result2 !== false) {
$qb->delete('filecache')
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($result2['fileid'])))
->andWhere($qb->expr()->eq('storage', $qb->createNamedParameter($result2['storage'])))
->executeStatement();
$this->deleteParentsFromFileCache((int)$result2['parent'], (int)$result2['storage']);
}
}
$preview->setStorageId((int)$result['storage']);
$preview->setEtag($result['etag']);
$preview->setSourceMimetype($this->mimeTypeLoader->getMimetypeById((int)$result['mimetype']));
$preview->generateId();
// try to insert, if that fails the preview is already in the DB
$this->previewMapper->insert($preview);
$this->connection->commit();
// Move old flat preview to new format
$dirName = str_replace($this->getPreviewRootFolder(), '', $file->getPath());
if (preg_match('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/', $dirName) !== 1) {
$previewPath = $this->constructPath($preview);
$this->createParentFiles($previewPath);
$ok = rename($file->getRealPath(), $previewPath);
if (!$ok) {
throw new LogicException('Failed to move ' . $file->getRealPath() . ' to ' . $previewPath);
}
$skipFiles[] = $previewPath;
}
} catch (Exception $e) {
$this->connection->rollBack();
if ($e->getReason() !== Exception::REASON_UNIQUE_CONSTRAINT_VIOLATION) {
throw $e;
}
}
// Move old flat preview to new nested directory format.
$dirName = str_replace($this->getPreviewRootFolder(), '', $item['filePath']);
if (preg_match('/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9a-e]\/[0-9]+/', $dirName) !== 1) {
$previewPath = $this->constructPath($preview);
$this->createParentFiles($previewPath);
$ok = rename($item['realPath'], $previewPath);
if (!$ok) {
throw new LogicException('Failed to move ' . $item['realPath'] . ' to ' . $previewPath);
}
// Mark the destination so the outer iterator skips it if it encounters the path later.
$skipPaths[$previewPath] = true;
}
$previewsFound++;
}
}
@ -299,62 +199,6 @@ class LocalPreviewStorage implements IPreviewStorage {
return $previewsFound;
}
/**
* Bulk-fetch filecache rows for a set of fileIds.
*
* @param int[] $fileIds
*/
private function fetchFilecacheByFileIds(array $fileIds): array {
if (empty($fileIds)) {
return [];
}
$result = [];
$qb = $this->connection->getQueryBuilder();
$qb->select('fileid', 'storage', 'etag', 'mimetype')
->from('filecache');
foreach (array_chunk($fileIds, 1000) as $chunk) {
$qb->andWhere(
$qb->expr()->in('fileid', $qb->createNamedParameter($chunk, IQueryBuilder::PARAM_INT_ARRAY))
);
}
$rows = $qb->runAcrossAllShards()
->executeQuery();
while ($row = $rows->fetchAssociative()) {
$result[(int)$row['fileid']] = $row;
}
$rows->closeCursor();
return $result;
}
/**
* Bulk-fetch filecache rows for a set of path_hashes (legacy migration).
*
* @param string[] $pathHashes
*/
private function fetchFilecacheByPathHashes(array $pathHashes): array {
if (empty($pathHashes)) {
return [];
}
$result = [];
$qb = $this->connection->getQueryBuilder();
$qb->select('fileid', 'storage', 'etag', 'mimetype', 'parent', 'path_hash')
->from('filecache');
foreach (array_chunk($pathHashes, 1000) as $chunk) {
$qb->andWhere(
$qb->expr()->in('path_hash', $qb->createNamedParameter($chunk, IQueryBuilder::PARAM_STR_ARRAY))
);
}
$rows = $qb->runAcrossAllShards()
->executeQuery();
while ($row = $rows->fetchAssociative()) {
$result[$row['path_hash']] = $row;
}
$rows->closeCursor();
return $result;
}
/**
* Recursive method that deletes the folder and its parent folders if it's not
* empty.
@ -366,11 +210,10 @@ class LocalPreviewStorage implements IPreviewStorage {
->where($qb->expr()->eq('parent', $qb->createNamedParameter($folderId)))
->setMaxResults(1)
->runAcrossAllShards()
->executeQuery();
$row = $result->fetchAssociative();
$result->closeCursor();
->executeQuery()
->fetchAssociative();
if ($row !== false) {
if ($result !== false) {
// there are other files in the directory, don't delete yet
return;
}
@ -382,11 +225,11 @@ class LocalPreviewStorage implements IPreviewStorage {
->where($qb->expr()->eq('fileid', $qb->createNamedParameter($folderId)))
->andWhere($qb->expr()->eq('storage', $qb->createNamedParameter($storageId)))
->setMaxResults(1)
->executeQuery();
$row = $result->fetchAssociative();
$result->closeCursor();
if ($row !== false) {
$parentFolderId = (int)$row['parent'];
->executeQuery()
->fetchAssociative();
if ($result !== false) {
$parentFolderId = (int)$result['parent'];
$qb = $this->connection->getQueryBuilder();
$qb->delete('filecache')

View file

@ -1,284 +0,0 @@
<?php
declare(strict_types=1);
/**
* SPDX-FileCopyrightText: 2026 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace Test\Preview\Storage;
use OC\Preview\Db\PreviewMapper;
use OC\Preview\Storage\LocalPreviewStorage;
use OCP\DB\Exception as DBException;
use OCP\DB\IResult;
use OCP\DB\QueryBuilder\IExpressionBuilder;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\Files\IMimeTypeDetector;
use OCP\Files\IMimeTypeLoader;
use OCP\Files\IRootFolder;
use OCP\IAppConfig;
use OCP\IConfig;
use OCP\IDBConnection;
use PHPUnit\Framework\MockObject\MockObject;
use PHPUnit\Framework\TestCase;
use Psr\Log\LoggerInterface;
class LocalPreviewStorageTest extends TestCase {
private IConfig&MockObject $config;
private PreviewMapper&MockObject $previewMapper;
private IAppConfig&MockObject $appConfig;
private IDBConnection&MockObject $connection;
private IMimeTypeDetector&MockObject $mimeTypeDetector;
private LoggerInterface&MockObject $logger;
private IMimeTypeLoader&MockObject $mimeTypeLoader;
private IRootFolder&MockObject $rootFolder;
private string $tmpDir;
private LocalPreviewStorage $storage;
/** File ID used across the single-file tests. */
private const FILE_ID = 1;
protected function setUp(): void {
parent::setUp();
$this->tmpDir = sys_get_temp_dir() . '/nc_preview_test_' . uniqid();
mkdir($this->tmpDir, 0777, true);
$this->config = $this->createMock(IConfig::class);
$this->config->method('getSystemValueString')
->with('datadirectory', $this->anything())
->willReturn($this->tmpDir);
$this->rootFolder = $this->createMock(IRootFolder::class);
$this->rootFolder->method('getAppDataDirectoryName')->willReturn('appdata_test');
$this->previewMapper = $this->createMock(PreviewMapper::class);
$this->appConfig = $this->createMock(IAppConfig::class);
$this->connection = $this->createMock(IDBConnection::class);
$this->mimeTypeDetector = $this->createMock(IMimeTypeDetector::class);
$this->logger = $this->createMock(LoggerInterface::class);
$this->mimeTypeLoader = $this->createMock(IMimeTypeLoader::class);
$this->mimeTypeDetector->method('detectPath')->willReturn('image/jpeg');
$this->mimeTypeLoader->method('getMimetypeById')->willReturn('image/jpeg');
$this->storage = new LocalPreviewStorage(
$this->config,
$this->previewMapper,
$this->appConfig,
$this->connection,
$this->mimeTypeDetector,
$this->logger,
$this->mimeTypeLoader,
$this->rootFolder,
);
}
protected function tearDown(): void {
$this->removeDir($this->tmpDir);
parent::tearDown();
}
private function removeDir(string $path): void {
if (!is_dir($path)) {
return;
}
foreach (scandir($path) as $entry) {
if ($entry === '.' || $entry === '..') {
continue;
}
$full = $path . '/' . $entry;
is_dir($full) ? $this->removeDir($full) : unlink($full);
}
rmdir($path);
}
/**
* Create a preview file in the legacy flat directory format so the scan
* code will attempt to move it to the new nested path.
* Returns the absolute path to the created file.
*/
private function createFlatPreviewFile(int $fileId, string $previewName): string {
$dir = $this->tmpDir . '/appdata_test/preview/' . $fileId;
mkdir($dir, 0777, true);
$path = $dir . '/' . $previewName;
file_put_contents($path, 'fake preview data');
return $path;
}
/**
* Build a mock IQueryBuilder chain and configure it to return the given
* rows from executeQuery()->fetchAssociative().
*/
private function buildQueryBuilderMock(array $rows): IQueryBuilder&MockObject {
$exprMock = $this->createMock(IExpressionBuilder::class);
$exprMock->method('in')->willReturn('1=1');
$callIndex = 0;
$resultMock = $this->createMock(IResult::class);
$resultMock->method('fetchAssociative')
->willReturnCallback(static function () use ($rows, &$callIndex) {
return $rows[$callIndex++] ?? false;
});
$qbMock = $this->createMock(IQueryBuilder::class);
$qbMock->method('select')->willReturnSelf();
$qbMock->method('from')->willReturnSelf();
$qbMock->method('andWhere')->willReturnSelf();
$qbMock->method('runAcrossAllShards')->willReturnSelf();
$qbMock->method('executeQuery')->willReturn($resultMock);
$qbMock->method('expr')->willReturn($exprMock);
$qbMock->method('createNamedParameter')->willReturn(':param');
return $qbMock;
}
/**
* Configure appConfig so migration is considered done, meaning
* checkForFileCache = false (no legacy path-hash queries).
*/
private function setMigrationDone(): void {
$this->appConfig->method('getValueBool')
->with('core', 'previewMovedDone')
->willReturn(true);
}
/**
* When fewer previews than SCAN_BATCH_SIZE exist, scan() must still open
* and commit a transaction for the tail batch.
*
* Before the fix: commit() was never called for the tail batch, leaving the
* transaction open.
*/
public function testScanCommitsFinalBatch(): void {
$this->setMigrationDone();
$this->createFlatPreviewFile(self::FILE_ID, '1024-1024.jpg');
$filecacheRow = [
'fileid' => (string)self::FILE_ID,
'storage' => '42',
'etag' => 'abc',
'mimetype' => '6',
];
$this->connection->method('getQueryBuilder')
->willReturn($this->buildQueryBuilderMock([$filecacheRow]));
// Outer batch transaction + one inner savepoint for the insert.
$this->connection->expects($this->exactly(2))->method('beginTransaction');
$this->connection->expects($this->exactly(2))->method('commit');
$this->connection->expects($this->never())->method('rollBack');
$count = $this->storage->scan();
$this->assertSame(1, $count);
}
/**
* When previewMapper->insert() throws a unique-constraint violation, scan()
* must roll back only the inner savepoint and continue, leaving the outer
* transaction intact so its final commit() succeeds.
*
* Before the fix: the plain catch swallowed the PHP exception but left the
* PostgreSQL transaction in an aborted state, so all subsequent queries
* (including commit()) failed with "current transaction is aborted".
*/
public function testScanHandlesUniqueConstraintViolation(): void {
$this->setMigrationDone();
$this->createFlatPreviewFile(self::FILE_ID, '1024-1024.jpg');
$filecacheRow = [
'fileid' => (string)self::FILE_ID,
'storage' => '42',
'etag' => 'abc',
'mimetype' => '6',
];
$this->connection->method('getQueryBuilder')
->willReturn($this->buildQueryBuilderMock([$filecacheRow]));
$ucvException = new class('duplicate key') extends DBException {
public function getReason(): int {
return self::REASON_UNIQUE_CONSTRAINT_VIOLATION;
}
};
$this->previewMapper->method('insert')->willThrowException($ucvException);
// Inner savepoint is rolled back; outer batch transaction is committed.
$this->connection->expects($this->exactly(2))->method('beginTransaction');
$this->connection->expects($this->once())->method('commit');
$this->connection->expects($this->exactly(1))->method('rollBack');
$count = $this->storage->scan();
// Even when the DB row already exists the preview file still counts.
$this->assertSame(1, $count);
}
/**
* A non-UCE exception from previewMapper->insert() must be re-thrown after
* rolling back both the inner savepoint and the outer batch transaction.
*/
public function testScanRethrowsUnexpectedInsertException(): void {
$this->setMigrationDone();
$this->createFlatPreviewFile(self::FILE_ID, '1024-1024.jpg');
$filecacheRow = [
'fileid' => (string)self::FILE_ID,
'storage' => '42',
'etag' => 'abc',
'mimetype' => '6',
];
$this->connection->method('getQueryBuilder')
->willReturn($this->buildQueryBuilderMock([$filecacheRow]));
$driverException = new class('some driver error') extends DBException {
public function getReason(): int {
return self::REASON_DRIVER;
}
};
$this->previewMapper->method('insert')->willThrowException($driverException);
// Inner savepoint rolled back; outer batch also rolled back via rethrow.
$this->connection->expects($this->exactly(2))->method('beginTransaction');
$this->connection->expects($this->never())->method('commit');
$this->connection->expects($this->exactly(2))->method('rollBack');
$this->expectException(DBException::class);
$this->storage->scan();
}
/**
* fetchFilecacheByFileIds() must return a row for every file ID returned by
* the query, not just one. Before the fix, the foreach loop iterated over
* the key-value pairs of the first row, so previews for all but the first
* file ID were silently deleted (filecache row not found unlink).
*/
public function testScanFetchesAllFilecacheRows(): void {
$this->setMigrationDone();
$fileIds = [1, 2, 3];
foreach ($fileIds as $id) {
$this->createFlatPreviewFile($id, '1024-1024.jpg');
}
$filecacheRows = array_map(static fn (int $id) => [
'fileid' => (string)$id,
'storage' => '42',
'etag' => 'abc',
'mimetype' => '6',
], $fileIds);
$this->connection->method('getQueryBuilder')
->willReturn($this->buildQueryBuilderMock($filecacheRows));
// 1 outer batch transaction + 3 inner savepoints (one per preview insert).
$this->connection->expects($this->exactly(4))->method('beginTransaction');
$this->connection->expects($this->exactly(4))->method('commit');
$this->connection->expects($this->never())->method('rollBack');
$count = $this->storage->scan();
$this->assertSame(3, $count);
}
}