2012-09-16 10:52:32 -04:00
< ? php
2024-05-23 03:26:56 -04:00
2012-09-16 10:52:32 -04:00
/**
2024-05-23 03:26:56 -04:00
* SPDX - FileCopyrightText : 2016 - 2024 Nextcloud GmbH and Nextcloud contributors
* SPDX - FileCopyrightText : 2016 ownCloud , Inc .
* SPDX - License - Identifier : AGPL - 3.0 - only
2012-09-16 10:52:32 -04:00
*/
namespace OC\Files\Cache ;
2021-01-03 09:28:31 -05:00
use Doctrine\DBAL\Exception ;
2023-02-24 11:38:25 -05:00
use OC\Files\Storage\Wrapper\Encryption ;
2023-11-23 04:22:34 -05:00
use OC\Files\Storage\Wrapper\Jail ;
use OC\Hooks\BasicEmitter ;
2015-12-02 09:05:29 -05:00
use OCP\Files\Cache\IScanner ;
2016-06-01 08:22:12 -04:00
use OCP\Files\ForbiddenException ;
2020-09-18 11:43:55 -04:00
use OCP\Files\NotFoundException ;
2024-09-15 11:14:37 -04:00
use OCP\Files\Storage\ILockingStorage ;
2022-03-25 10:15:02 -04:00
use OCP\Files\Storage\IReliableEtagStorage ;
2025-09-30 07:44:34 -04:00
use OCP\IConfig ;
2023-04-12 12:08:14 -04:00
use OCP\IDBConnection ;
2015-06-03 10:44:57 -04:00
use OCP\Lock\ILockingProvider ;
2025-09-30 07:44:34 -04:00
use OCP\Server ;
2022-03-30 04:55:41 -04:00
use Psr\Log\LoggerInterface ;
2013-06-14 09:30:41 -04:00
2013-07-19 10:32:43 -04:00
/**
* Class Scanner
*
* Hooks available in scope \OC\Files\Cache\Scanner :
* - scanFile ( string $path , string $storageId )
* - scanFolder ( string $path , string $storageId )
2013-11-07 10:22:29 -05:00
* - postScanFile ( string $path , string $storageId )
* - postScanFolder ( string $path , string $storageId )
2013-07-19 10:32:43 -04:00
*
* @ package OC\Files\Cache
*/
2015-12-02 09:05:29 -05:00
class Scanner extends BasicEmitter implements IScanner {
2012-10-03 05:24:49 -04:00
/**
* @ var \OC\Files\Storage\Storage $storage
*/
2014-05-28 07:59:38 -04:00
protected $storage ;
2012-10-03 05:24:49 -04:00
2012-11-21 16:44:43 -05:00
/**
* @ var string $storageId
*/
2014-05-28 07:59:38 -04:00
protected $storageId ;
2012-11-21 16:44:43 -05:00
2012-10-03 05:24:49 -04:00
/**
* @ var \OC\Files\Cache\Cache $cache
*/
2014-05-28 07:59:38 -04:00
protected $cache ;
2012-10-03 05:24:49 -04:00
2014-05-30 09:42:41 -04:00
/**
* @ var boolean $cacheActive If true , perform cache operations , if false , do not affect cache
*/
protected $cacheActive ;
2014-09-07 19:34:03 -04:00
/**
2014-09-08 10:34:03 -04:00
* @ var bool $useTransactions whether to use transactions
2014-09-07 19:34:03 -04:00
*/
protected $useTransactions = true ;
2015-06-03 10:44:57 -04:00
/**
* @ var \OCP\Lock\ILockingProvider
*/
protected $lockingProvider ;
2023-04-12 12:08:14 -04:00
protected IDBConnection $connection ;
2012-10-03 05:24:49 -04:00
public function __construct ( \OC\Files\Storage\Storage $storage ) {
$this -> storage = $storage ;
2012-11-21 16:44:43 -05:00
$this -> storageId = $this -> storage -> getId ();
2012-11-18 08:10:28 -05:00
$this -> cache = $storage -> getCache ();
2025-09-30 07:44:34 -04:00
$config = Server :: get ( IConfig :: class );
$this -> cacheActive = ! $config -> getSystemValueBool ( 'filesystem_cache_readonly' , false );
$this -> useTransactions = ! $config -> getSystemValueBool ( 'filescanner_no_transactions' , false );
$this -> lockingProvider = Server :: get ( ILockingProvider :: class );
$this -> connection = Server :: get ( IDBConnection :: class );
2012-10-03 05:24:49 -04:00
}
2014-09-07 19:34:03 -04:00
/**
* Whether to wrap the scanning of a folder in a database transaction
* On default transactions are used
*
* @ param bool $useTransactions
*/
2025-01-26 08:56:17 -05:00
public function setUseTransactions ( $useTransactions ) : void {
2014-09-07 19:34:03 -04:00
$this -> useTransactions = $useTransactions ;
}
2012-09-16 10:52:32 -04:00
/**
* get all the metadata of a file or folder
* *
2012-09-26 11:52:02 -04:00
*
2012-10-03 05:24:49 -04:00
* @ param string $path
2021-02-15 11:52:11 -05:00
* @ return array | null an array of metadata of the file
2012-09-16 10:52:32 -04:00
*/
2015-12-02 09:05:29 -05:00
protected function getData ( $path ) {
2015-04-20 10:50:12 -04:00
$data = $this -> storage -> getMetaData ( $path );
if ( is_null ( $data )) {
2022-03-30 04:55:41 -04:00
\OC :: $server -> get ( LoggerInterface :: class ) -> debug ( " !!! Path ' $path ' is not accessible or present !!! " , [ 'app' => 'core' ]);
2013-10-24 08:24:56 -04:00
}
2012-09-16 10:52:32 -04:00
return $data ;
}
/**
* scan a single file and store it in the cache
*
2012-10-03 05:24:49 -04:00
* @ param string $file
2013-06-14 10:53:08 -04:00
* @ param int $reuseExisting
2014-09-10 09:57:59 -04:00
* @ param int $parentId
2025-01-26 08:56:17 -05:00
* @ param array | CacheEntry | null | false $cacheData existing data in the cache for the file to be scanned
2015-06-18 08:30:25 -04:00
* @ param bool $lock set to false to disable getting an additional read lock during scanning
2025-01-26 08:56:17 -05:00
* @ param array | null $data the metadata for the file , as returned by the storage
2021-02-15 11:52:11 -05:00
* @ return array | null an array of metadata of the scanned file
2015-06-18 08:30:25 -04:00
* @ throws \OCP\Lock\LockedException
2012-09-16 10:52:32 -04:00
*/
2020-03-27 12:47:20 -04:00
public function scanFile ( $file , $reuseExisting = 0 , $parentId = - 1 , $cacheData = null , $lock = true , $data = null ) {
2016-11-09 04:58:11 -05:00
if ( $file !== '' ) {
try {
$this -> storage -> verifyPath ( dirname ( $file ), basename ( $file ));
} catch ( \Exception $e ) {
2016-11-02 04:23:01 -04:00
return null ;
}
}
2025-01-26 08:56:17 -05:00
2022-03-17 11:05:52 -04:00
// only proceed if $file is not a partial file, blacklist is handled by the storage
2025-01-26 08:56:17 -05:00
if ( self :: isPartialFile ( $file )) {
return null ;
}
// acquire a lock
if ( $lock ) {
if ( $this -> storage -> instanceOfStorage ( ILockingStorage :: class )) {
$this -> storage -> acquireLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
}
}
try {
$data = $data ? ? $this -> getData ( $file );
} catch ( ForbiddenException $e ) {
2015-06-18 07:47:03 -04:00
if ( $lock ) {
2024-09-15 11:14:37 -04:00
if ( $this -> storage -> instanceOfStorage ( ILockingStorage :: class )) {
2025-01-26 08:56:17 -05:00
$this -> storage -> releaseLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
2016-01-04 08:11:21 -05:00
}
2015-06-18 07:47:03 -04:00
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
return null ;
}
try {
if ( $data === null ) {
$this -> removeFromCache ( $file );
} else {
// pre-emit only if it was a file. By that we avoid counting/treating folders as files
if ( $data [ 'mimetype' ] !== 'httpd/unix-directory' ) {
$this -> emit ( '\OC\Files\Cache\Scanner' , 'scanFile' , [ $file , $this -> storageId ]);
\OC_Hook :: emit ( '\OC\Files\Cache\Scanner' , 'scan_file' , [ 'path' => $file , 'storage' => $this -> storageId ]);
2017-08-25 08:53:16 -04:00
}
2025-01-26 08:56:17 -05:00
$parent = dirname ( $file );
if ( $parent === '.' || $parent === '/' ) {
$parent = '' ;
}
if ( $parentId === - 1 ) {
$parentId = $this -> cache -> getParentId ( $file );
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
// scan the parent if it's not in the cache (id -1) and the current file is not the root folder
if ( $file && $parentId === - 1 ) {
$parentData = $this -> scanFile ( $parent );
if ( $parentData === null ) {
return null ;
2017-08-25 08:53:16 -04:00
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
$parentId = $parentData [ 'fileid' ];
}
if ( $parent ) {
$data [ 'parent' ] = $parentId ;
}
2014-06-10 09:37:43 -04:00
2025-01-26 08:56:17 -05:00
$cacheData = $cacheData ? ? $this -> cache -> get ( $file );
if ( $reuseExisting && $cacheData !== false && isset ( $cacheData [ 'fileid' ])) {
// prevent empty etag
$etag = empty ( $cacheData [ 'etag' ]) ? $data [ 'etag' ] : $cacheData [ 'etag' ];
$fileId = $cacheData [ 'fileid' ];
$data [ 'fileid' ] = $fileId ;
// only reuse data if the file hasn't explicitly changed
$mtimeUnchanged = isset ( $data [ 'storage_mtime' ]) && isset ( $cacheData [ 'storage_mtime' ]) && $data [ 'storage_mtime' ] === $cacheData [ 'storage_mtime' ];
// if the folder is marked as unscanned, never reuse etags
if ( $mtimeUnchanged && $cacheData [ 'size' ] !== - 1 ) {
$data [ 'mtime' ] = $cacheData [ 'mtime' ];
if (( $reuseExisting & self :: REUSE_SIZE ) && ( $data [ 'size' ] === - 1 )) {
$data [ 'size' ] = $cacheData [ 'size' ];
2017-08-25 08:53:16 -04:00
}
2025-01-26 08:56:17 -05:00
if ( $reuseExisting & self :: REUSE_ETAG && ! $this -> storage -> instanceOfStorage ( IReliableEtagStorage :: class )) {
$data [ 'etag' ] = $etag ;
2013-10-24 08:24:56 -04:00
}
2017-08-25 08:53:16 -04:00
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
// we only updated unencrypted_size if it's already set
if ( isset ( $cacheData [ 'unencrypted_size' ]) && $cacheData [ 'unencrypted_size' ] === 0 ) {
unset ( $data [ 'unencrypted_size' ]);
2017-08-25 08:53:16 -04:00
}
2016-04-20 09:22:22 -04:00
2025-01-26 08:56:17 -05:00
/**
* Only update metadata that has changed .
* i . e . get all the values in $data that are not present in the cache already
*
* We need the OC implementation for usage of " getData " method below .
* @ var \OC\Files\Cache\CacheEntry $cacheData
*/
$newData = $this -> array_diff_assoc_multi ( $data , $cacheData -> getData ());
2025-04-25 12:35:12 -04:00
2025-01-26 08:56:17 -05:00
// make it known to the caller that etag has been changed and needs propagation
if ( isset ( $newData [ 'etag' ])) {
$data [ 'etag_changed' ] = true ;
2017-08-25 08:53:16 -04:00
}
} else {
2025-01-26 08:56:17 -05:00
unset ( $data [ 'unencrypted_size' ]);
$newData = $data ;
$fileId = - 1 ;
2017-08-25 08:53:16 -04:00
}
2025-01-26 08:56:17 -05:00
if ( ! empty ( $newData )) {
// Reset the checksum if the data has changed
$newData [ 'checksum' ] = '' ;
$newData [ 'parent' ] = $parentId ;
$data [ 'fileid' ] = $this -> addToCache ( $file , $newData , $fileId );
2017-08-25 08:53:16 -04:00
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
if ( $cacheData !== false ) {
$data [ 'oldSize' ] = $cacheData [ 'size' ] ? ? 0 ;
$data [ 'encrypted' ] = $cacheData [ 'encrypted' ] ? ? false ;
2016-01-04 08:11:21 -05:00
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
// post-emit only if it was a file. By that we avoid counting/treating folders as files
if ( $data [ 'mimetype' ] !== 'httpd/unix-directory' ) {
$this -> emit ( '\OC\Files\Cache\Scanner' , 'postScanFile' , [ $file , $this -> storageId ]);
\OC_Hook :: emit ( '\OC\Files\Cache\Scanner' , 'post_scan_file' , [ 'path' => $file , 'storage' => $this -> storageId ]);
}
}
} finally {
// release the acquired lock
if ( $lock && $this -> storage -> instanceOfStorage ( ILockingStorage :: class )) {
$this -> storage -> releaseLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
2016-04-20 09:22:22 -04:00
}
2012-10-03 05:24:49 -04:00
}
2016-01-13 15:40:48 -05:00
2025-01-26 08:56:17 -05:00
return $data ;
2012-09-16 10:52:32 -04:00
}
2014-06-02 08:52:21 -04:00
protected function removeFromCache ( $path ) {
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'removeFromCache' , [ 'file' => $path ]);
$this -> emit ( '\OC\Files\Cache\Scanner' , 'removeFromCache' , [ $path ]);
2014-06-02 08:52:21 -04:00
if ( $this -> cacheActive ) {
$this -> cache -> remove ( $path );
}
}
/**
* @ param string $path
* @ param array $data
2014-09-10 09:57:59 -04:00
* @ param int $fileId
2014-06-02 08:52:21 -04:00
* @ return int the id of the added file
*/
2014-09-10 09:57:59 -04:00
protected function addToCache ( $path , $data , $fileId = - 1 ) {
2017-05-18 10:38:54 -04:00
if ( isset ( $data [ 'scan_permissions' ])) {
$data [ 'permissions' ] = $data [ 'scan_permissions' ];
}
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'addToCache' , [ 'file' => $path , 'data' => $data ]);
2023-06-16 11:56:34 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'addToCache' , [ $path , $this -> storageId , $data , $fileId ]);
2014-06-02 08:52:21 -04:00
if ( $this -> cacheActive ) {
2014-09-10 09:57:59 -04:00
if ( $fileId !== - 1 ) {
$this -> cache -> update ( $fileId , $data );
return $fileId ;
} else {
2020-02-14 16:10:09 -05:00
return $this -> cache -> insert ( $path , $data );
2014-09-10 09:57:59 -04:00
}
2014-06-02 08:52:21 -04:00
} else {
return - 1 ;
}
}
/**
* @ param string $path
* @ param array $data
2014-09-10 09:57:59 -04:00
* @ param int $fileId
2014-06-02 08:52:21 -04:00
*/
2014-09-10 09:57:59 -04:00
protected function updateCache ( $path , $data , $fileId = - 1 ) {
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'addToCache' , [ 'file' => $path , 'data' => $data ]);
$this -> emit ( '\OC\Files\Cache\Scanner' , 'updateCache' , [ $path , $this -> storageId , $data ]);
2014-06-02 08:52:21 -04:00
if ( $this -> cacheActive ) {
2014-09-10 09:57:59 -04:00
if ( $fileId !== - 1 ) {
$this -> cache -> update ( $fileId , $data );
} else {
$this -> cache -> put ( $path , $data );
}
2014-06-02 08:52:21 -04:00
}
}
2012-09-16 10:52:32 -04:00
/**
2013-06-14 09:30:41 -04:00
* scan a folder and all it ' s children
2012-09-16 10:52:32 -04:00
*
2012-10-03 05:24:49 -04:00
* @ param string $path
2013-03-23 21:06:50 -04:00
* @ param bool $recursive
2013-06-14 10:53:08 -04:00
* @ param int $reuse
2015-06-18 08:30:25 -04:00
* @ param bool $lock set to false to disable getting an additional read lock during scanning
2021-02-15 11:52:11 -05:00
* @ return array | null an array of the meta data of the scanned file or folder
2012-09-16 10:52:32 -04:00
*/
2015-06-18 07:47:03 -04:00
public function scan ( $path , $recursive = self :: SCAN_RECURSIVE , $reuse = - 1 , $lock = true ) {
2013-06-14 10:53:08 -04:00
if ( $reuse === - 1 ) {
2014-11-06 12:34:10 -05:00
$reuse = ( $recursive === self :: SCAN_SHALLOW ) ? self :: REUSE_ETAG | self :: REUSE_SIZE : self :: REUSE_ETAG ;
2013-06-14 10:53:08 -04:00
}
2025-01-26 08:56:17 -05:00
if ( $lock && $this -> storage -> instanceOfStorage ( ILockingStorage :: class )) {
$this -> storage -> acquireLock ( 'scanner::' . $path , ILockingProvider :: LOCK_EXCLUSIVE , $this -> lockingProvider );
$this -> storage -> acquireLock ( $path , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
2015-06-18 07:47:03 -04:00
}
2025-01-26 08:56:17 -05:00
2018-05-04 12:05:46 -04:00
try {
2025-01-26 08:56:17 -05:00
$data = $this -> scanFile ( $path , $reuse , - 1 , lock : $lock );
if ( $data !== null && $data [ 'mimetype' ] === 'httpd/unix-directory' ) {
$size = $this -> scanChildren ( $path , $recursive , $reuse , $data [ 'fileid' ], $lock , $data [ 'size' ]);
$data [ 'size' ] = $size ;
2018-05-04 12:05:46 -04:00
}
2025-01-26 08:56:17 -05:00
} catch ( NotFoundException $e ) {
$this -> removeFromCache ( $path );
return null ;
2018-05-04 12:05:46 -04:00
} finally {
2025-01-26 08:56:17 -05:00
if ( $lock && $this -> storage -> instanceOfStorage ( ILockingStorage :: class )) {
$this -> storage -> releaseLock ( $path , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
$this -> storage -> releaseLock ( 'scanner::' . $path , ILockingProvider :: LOCK_EXCLUSIVE , $this -> lockingProvider );
2016-01-04 08:11:21 -05:00
}
2015-06-18 07:47:03 -04:00
}
2014-02-28 08:23:07 -05:00
return $data ;
2013-06-14 09:30:41 -04:00
}
2012-10-03 05:40:09 -04:00
2024-02-23 12:55:58 -05:00
/**
* Compares $array1 against $array2 and returns all the values in $array1 that are not in $array2
* Note this is a one - way check - i . e . we don 't care about things that are in $array2 that aren' t in $array1
*
* Supports multi - dimensional arrays
* Also checks keys / indexes
* Comparisons are strict just like array_diff_assoc
* Order of keys / values does not matter
*
* @ param array $array1
* @ param array $array2
* @ return array with the differences between $array1 and $array1
* @ throws \InvalidArgumentException if $array1 isn ' t an actual array
*
*/
protected function array_diff_assoc_multi ( array $array1 , array $array2 ) {
2025-04-25 12:35:12 -04:00
2024-02-23 12:55:58 -05:00
$result = [];
foreach ( $array1 as $key => $value ) {
2025-04-25 12:35:12 -04:00
2024-02-23 12:55:58 -05:00
// if $array2 doesn't have the same key, that's a result
if ( ! array_key_exists ( $key , $array2 )) {
$result [ $key ] = $value ;
continue ;
}
2025-04-25 12:35:12 -04:00
2024-02-23 12:55:58 -05:00
// if $array2's value for the same key is different, that's a result
if ( $array2 [ $key ] !== $value && ! is_array ( $value )) {
$result [ $key ] = $value ;
continue ;
}
2025-04-25 12:35:12 -04:00
2024-02-23 12:55:58 -05:00
if ( is_array ( $value )) {
$nestedDiff = $this -> array_diff_assoc_multi ( $value , $array2 [ $key ]);
if ( ! empty ( $nestedDiff )) {
$result [ $key ] = $nestedDiff ;
continue ;
}
}
}
return $result ;
}
2014-09-10 09:57:59 -04:00
/**
* Get the children currently in the cache
*
* @ param int $folderId
2025-01-26 08:56:17 -05:00
* @ return array < string , \OCP\Files\Cache\ICacheEntry >
2014-09-10 09:57:59 -04:00
*/
2025-01-26 08:56:17 -05:00
protected function getExistingChildren ( $folderId ) : array {
2020-03-26 04:30:18 -04:00
$existingChildren = [];
2014-09-10 09:57:59 -04:00
$children = $this -> cache -> getFolderContentsById ( $folderId );
foreach ( $children as $child ) {
$existingChildren [ $child [ 'name' ]] = $child ;
2014-09-24 09:44:56 -04:00
}
return $existingChildren ;
}
2013-06-14 09:30:41 -04:00
/**
* scan all the files and folders in a folder
*
* @ param string $path
2023-06-16 10:07:10 -04:00
* @ param bool | IScanner :: SCAN_RECURSIVE_INCOMPLETE $recursive
* @ param int $reuse a combination of self :: REUSE_ *
2016-04-29 09:16:03 -04:00
* @ param int $folderId id for the folder to be scanned
2015-06-18 08:30:25 -04:00
* @ param bool $lock set to false to disable getting an additional read lock during scanning
2023-09-19 07:43:01 -04:00
* @ param int | float $oldSize the size of the folder before ( re ) scanning the children
2023-05-15 06:24:42 -04:00
* @ return int | float the size of the scanned folder or - 1 if the size is unknown at this stage
2013-06-14 09:30:41 -04:00
*/
2023-11-22 10:27:29 -05:00
protected function scanChildren ( string $path , $recursive , int $reuse , int $folderId , bool $lock , int | float $oldSize , & $etagChanged = false ) {
2013-06-14 10:53:08 -04:00
if ( $reuse === - 1 ) {
2014-11-06 12:34:10 -05:00
$reuse = ( $recursive === self :: SCAN_SHALLOW ) ? self :: REUSE_ETAG | self :: REUSE_SIZE : self :: REUSE_ETAG ;
2013-06-14 10:53:08 -04:00
}
2020-03-26 04:30:18 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'scanFolder' , [ $path , $this -> storageId ]);
2012-09-16 10:52:32 -04:00
$size = 0 ;
2023-11-22 10:27:29 -05:00
$childQueue = $this -> handleChildren ( $path , $recursive , $reuse , $folderId , $lock , $size , $etagChanged );
2016-04-29 09:16:03 -04:00
2023-06-16 10:07:10 -04:00
foreach ( $childQueue as $child => [ $childId , $childSize ]) {
2023-11-22 10:27:29 -05:00
// "etag changed" propagates up, but not down, so we pass `false` to the children even if we already know that the etag of the current folder changed
$childEtagChanged = false ;
$childSize = $this -> scanChildren ( $child , $recursive , $reuse , $childId , $lock , $childSize , $childEtagChanged );
$etagChanged |= $childEtagChanged ;
2016-04-29 09:16:03 -04:00
if ( $childSize === - 1 ) {
$size = - 1 ;
2020-04-10 04:35:09 -04:00
} elseif ( $size !== - 1 ) {
2016-04-29 09:16:03 -04:00
$size += $childSize ;
}
}
2023-02-24 11:38:25 -05:00
// for encrypted storages, we trigger a regular folder size calculation instead of using the calculated size
// to make sure we also updated the unencrypted-size where applicable
if ( $this -> storage -> instanceOfStorage ( Encryption :: class )) {
$this -> cache -> calculateFolderSize ( $path );
} else {
2023-11-22 10:27:29 -05:00
if ( $this -> cacheActive ) {
$updatedData = [];
if ( $oldSize !== $size ) {
$updatedData [ 'size' ] = $size ;
}
if ( $etagChanged ) {
$updatedData [ 'etag' ] = uniqid ();
}
if ( $updatedData ) {
$this -> cache -> update ( $folderId , $updatedData );
}
2023-02-24 11:38:25 -05:00
}
2016-05-23 07:53:05 -04:00
}
2020-03-26 04:30:18 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'postScanFolder' , [ $path , $this -> storageId ]);
2016-04-29 09:16:03 -04:00
return $size ;
}
2023-09-19 07:43:01 -04:00
/**
* @ param bool | IScanner :: SCAN_RECURSIVE_INCOMPLETE $recursive
*/
2023-11-22 10:27:29 -05:00
private function handleChildren ( string $path , $recursive , int $reuse , int $folderId , bool $lock , int | float & $size , bool & $etagChanged ) : array {
2016-04-29 09:16:03 -04:00
// we put this in it's own function so it cleans up the memory before we start recursing
2014-09-10 09:57:59 -04:00
$existingChildren = $this -> getExistingChildren ( $folderId );
2020-03-27 12:47:20 -04:00
$newChildren = iterator_to_array ( $this -> storage -> getDirectoryContent ( $path ));
2014-09-10 09:57:59 -04:00
2022-08-15 15:10:39 -04:00
if ( count ( $existingChildren ) === 0 && count ( $newChildren ) === 0 ) {
// no need to do a transaction
return [];
}
2014-09-10 09:57:59 -04:00
if ( $this -> useTransactions ) {
2023-04-12 12:08:14 -04:00
$this -> connection -> beginTransaction ();
2014-09-10 09:57:59 -04:00
}
2016-04-29 09:16:03 -04:00
2014-09-10 09:57:59 -04:00
$exceptionOccurred = false ;
2016-04-29 09:16:03 -04:00
$childQueue = [];
2020-03-27 12:47:20 -04:00
$newChildNames = [];
foreach ( $newChildren as $fileMeta ) {
2023-07-07 06:13:21 -04:00
$permissions = $fileMeta [ 'scan_permissions' ] ? ? $fileMeta [ 'permissions' ];
2021-01-06 11:25:31 -05:00
if ( $permissions === 0 ) {
continue ;
}
2021-11-10 09:09:25 -05:00
$originalFile = $fileMeta [ 'name' ];
$file = trim ( \OC\Files\Filesystem :: normalizePath ( $originalFile ), '/' );
2021-11-17 03:19:10 -05:00
if ( trim ( $originalFile , '/' ) !== $file ) {
2021-11-10 09:09:25 -05:00
// encoding mismatch, might require compatibility wrapper
2024-09-19 05:10:31 -04:00
\OC :: $server -> get ( LoggerInterface :: class ) -> debug ( 'Scanner: Skipping non-normalized file name "' . $originalFile . '" in path "' . $path . '".' , [ 'app' => 'core' ]);
2021-11-10 09:09:25 -05:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'normalizedNameMismatch' , [ $path ? $path . '/' . $originalFile : $originalFile ]);
2021-11-17 03:19:10 -05:00
// skip this entry
continue ;
2021-11-10 09:09:25 -05:00
}
2020-03-27 12:47:20 -04:00
$newChildNames [] = $file ;
2018-01-26 17:46:40 -05:00
$child = $path ? $path . '/' . $file : $file ;
2014-09-10 09:57:59 -04:00
try {
2023-07-07 06:13:21 -04:00
$existingData = $existingChildren [ $file ] ? ? false ;
2020-03-27 12:47:20 -04:00
$data = $this -> scanFile ( $child , $reuse , $folderId , $existingData , $lock , $fileMeta );
2014-09-10 09:57:59 -04:00
if ( $data ) {
2022-12-12 08:57:37 -05:00
if ( $data [ 'mimetype' ] === 'httpd/unix-directory' && $recursive === self :: SCAN_RECURSIVE ) {
2023-06-16 10:07:10 -04:00
$childQueue [ $child ] = [ $data [ 'fileid' ], $data [ 'size' ]];
2022-12-12 08:57:37 -05:00
} elseif ( $data [ 'mimetype' ] === 'httpd/unix-directory' && $recursive === self :: SCAN_RECURSIVE_INCOMPLETE && $data [ 'size' ] === - 1 ) {
2016-05-18 09:06:15 -04:00
// only recurse into folders which aren't fully scanned
2023-06-16 10:07:10 -04:00
$childQueue [ $child ] = [ $data [ 'fileid' ], $data [ 'size' ]];
2020-04-10 04:35:09 -04:00
} elseif ( $data [ 'size' ] === - 1 ) {
2014-09-10 09:57:59 -04:00
$size = - 1 ;
2020-04-10 04:35:09 -04:00
} elseif ( $size !== - 1 ) {
2014-09-10 09:57:59 -04:00
$size += $data [ 'size' ];
2012-09-16 10:52:32 -04:00
}
2023-11-22 10:27:29 -05:00
if ( isset ( $data [ 'etag_changed' ]) && $data [ 'etag_changed' ]) {
$etagChanged = true ;
}
2012-09-16 10:52:32 -04:00
}
2021-01-03 09:28:31 -05:00
} catch ( Exception $ex ) {
2014-09-10 09:57:59 -04:00
// might happen if inserting duplicate while a scanning
// process is running in parallel
// log and ignore
2017-08-25 08:53:16 -04:00
if ( $this -> useTransactions ) {
2023-04-12 12:08:14 -04:00
$this -> connection -> rollback ();
$this -> connection -> beginTransaction ();
2017-08-25 08:53:16 -04:00
}
2022-03-30 04:55:41 -04:00
\OC :: $server -> get ( LoggerInterface :: class ) -> debug ( 'Exception while scanning file "' . $child . '"' , [
2018-01-17 09:21:56 -05:00
'app' => 'core' ,
2022-03-30 04:55:41 -04:00
'exception' => $ex ,
2018-01-17 09:21:56 -05:00
]);
2014-09-10 09:57:59 -04:00
$exceptionOccurred = true ;
2015-06-24 09:42:00 -04:00
} catch ( \OCP\Lock\LockedException $e ) {
if ( $this -> useTransactions ) {
2023-04-12 12:08:14 -04:00
$this -> connection -> rollback ();
2015-06-24 09:42:00 -04:00
}
throw $e ;
2012-09-16 10:52:32 -04:00
}
2014-09-10 09:57:59 -04:00
}
2020-03-27 12:47:20 -04:00
$removedChildren = \array_diff ( array_keys ( $existingChildren ), $newChildNames );
2014-09-10 09:57:59 -04:00
foreach ( $removedChildren as $childName ) {
2018-01-26 17:46:40 -05:00
$child = $path ? $path . '/' . $childName : $childName ;
2014-09-10 09:57:59 -04:00
$this -> removeFromCache ( $child );
}
if ( $this -> useTransactions ) {
2023-04-12 12:08:14 -04:00
$this -> connection -> commit ();
2014-09-10 09:57:59 -04:00
}
if ( $exceptionOccurred ) {
// It might happen that the parallel scan process has already
// inserted mimetypes but those weren't available yet inside the transaction
// To make sure to have the updated mime types in such cases,
// we reload them here
2015-09-11 07:44:53 -04:00
\OC :: $server -> getMimeTypeLoader () -> reset ();
2014-09-10 09:57:59 -04:00
}
2016-04-29 09:16:03 -04:00
return $childQueue ;
2012-09-16 10:52:32 -04:00
}
2013-02-10 12:15:23 -05:00
2013-02-10 08:16:45 -05:00
/**
2014-05-19 11:50:53 -04:00
* check if the file should be ignored when scanning
2013-02-10 08:16:45 -05:00
* NOTE : files with a '.part' extension are ignored as well !
* prevents unfinished put requests to be scanned
2014-06-02 08:52:21 -04:00
*
2014-05-11 16:51:30 -04:00
* @ param string $file
2013-02-10 08:16:45 -05:00
* @ return boolean
*/
2013-05-10 06:00:13 -04:00
public static function isPartialFile ( $file ) {
if ( pathinfo ( $file , PATHINFO_EXTENSION ) === 'part' ) {
2013-02-10 08:16:45 -05:00
return true ;
}
2023-05-15 07:47:19 -04:00
if ( str_contains ( $file , '.part/' )) {
2015-10-12 07:59:16 -04:00
return true ;
}
2013-02-10 08:16:45 -05:00
return false ;
}
2012-11-21 17:18:58 -05:00
/**
* walk over any folders that are not fully scanned yet and scan them
*/
public function backgroundScan () {
2021-11-16 12:10:09 -05:00
if ( $this -> storage -> instanceOfStorage ( Jail :: class )) {
// for jail storage wrappers (shares, groupfolders) we run the background scan on the source storage
// this is mainly done because the jail wrapper doesn't implement `getIncomplete` (because it would be inefficient).
//
// Running the scan on the source storage might scan more than "needed", but the unscanned files outside the jail will
// have to be scanned at some point anyway.
$unJailedScanner = $this -> storage -> getUnjailedStorage () -> getScanner ();
$unJailedScanner -> backgroundScan ();
2016-03-24 10:17:13 -04:00
} else {
2021-11-16 12:10:09 -05:00
if ( ! $this -> cache -> inCache ( '' )) {
// if the storage isn't in the cache yet, just scan the root completely
$this -> runBackgroundScanJob ( function () {
$this -> scan ( '' , self :: SCAN_RECURSIVE , self :: REUSE_ETAG );
}, '' );
} else {
$lastPath = null ;
// find any path marked as unscanned and run the scanner until no more paths are unscanned (or we get stuck)
while (( $path = $this -> cache -> getIncomplete ()) !== false && $path !== $lastPath ) {
$this -> runBackgroundScanJob ( function () use ( $path ) {
$this -> scan ( $path , self :: SCAN_RECURSIVE_INCOMPLETE , self :: REUSE_ETAG | self :: REUSE_SIZE );
}, $path );
// FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
// to make this possible
$lastPath = $path ;
}
2016-03-24 10:17:13 -04:00
}
}
}
2023-04-12 12:08:14 -04:00
protected function runBackgroundScanJob ( callable $callback , $path ) {
2016-03-24 10:17:13 -04:00
try {
$callback ();
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'correctFolderSize' , [ 'path' => $path ]);
2016-04-15 11:33:02 -04:00
if ( $this -> cacheActive && $this -> cache instanceof Cache ) {
2019-03-01 17:52:58 -05:00
$this -> cache -> correctFolderSize ( $path , null , true );
2014-05-28 13:20:20 -04:00
}
2016-03-24 10:17:13 -04:00
} catch ( \OCP\Files\StorageInvalidException $e ) {
// skip unavailable storages
} catch ( \OCP\Files\StorageNotAvailableException $e ) {
// skip unavailable storages
} catch ( \OCP\Files\ForbiddenException $e ) {
// skip forbidden storages
} catch ( \OCP\Lock\LockedException $e ) {
// skip unavailable storages
2012-11-21 17:18:58 -05:00
}
}
2014-05-30 09:42:41 -04:00
/**
* Set whether the cache is affected by scan operations
2014-06-02 08:52:21 -04:00
*
2014-05-30 09:42:41 -04:00
* @ param boolean $active The active state of the cache
*/
public function setCacheActive ( $active ) {
$this -> cacheActive = $active ;
}
2012-09-16 10:52:32 -04:00
}