2012-09-16 10:52:32 -04:00
< ? php
/**
2016-07-21 11:07:57 -04:00
* @ copyright Copyright ( c ) 2016 , ownCloud , Inc .
*
2019-12-03 13:57:53 -05:00
* @ author Ari Selseng < ari @ selseng . net >
2016-05-26 13:56:05 -04:00
* @ author Arthur Schiwon < blizzz @ arthur - schiwon . de >
* @ author Björn Schießle < bjoern @ schiessle . org >
2020-03-31 04:49:10 -04:00
* @ author Christoph Wurst < christoph @ winzerhof - wurst . at >
2016-05-26 13:56:05 -04:00
* @ author Daniel Jagszent < daniel @ jagszent . de >
2017-11-06 09:56:42 -05:00
* @ author Joas Schilling < coding @ schilljs . com >
2015-03-26 06:44:34 -04:00
* @ author Jörn Friedrich Dreyer < jfd @ butonic . de >
2016-05-26 13:56:05 -04:00
* @ author Lukas Reschke < lukas @ statuscode . ch >
2015-03-26 06:44:34 -04:00
* @ author Martin Mattel < martin . mattel @ diemattels . at >
* @ author Morris Jobke < hey @ morrisjobke . de >
* @ author Owen Winkler < a_github @ midnightcircus . com >
2016-07-21 12:13:36 -04:00
* @ author Robin Appelman < robin @ icewind . nl >
2016-01-12 09:02:16 -05:00
* @ author Robin McCorkell < robin @ mccorkell . me . uk >
2015-03-26 06:44:34 -04:00
* @ author Thomas Müller < thomas . mueller @ tmit . eu >
2020-12-16 08:54:15 -05:00
* @ author Vincent Petry < vincent @ nextcloud . com >
2015-03-26 06:44:34 -04:00
*
* @ license AGPL - 3.0
*
* This code is free software : you can redistribute it and / or modify
* it under the terms of the GNU Affero General Public License , version 3 ,
* as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Affero General Public License for more details .
*
* You should have received a copy of the GNU Affero General Public License , version 3 ,
2019-12-03 13:57:53 -05:00
* along with this program . If not , see < http :// www . gnu . org / licenses />
2015-03-26 06:44:34 -04:00
*
2012-09-16 10:52:32 -04:00
*/
namespace OC\Files\Cache ;
2021-01-03 09:28:31 -05:00
use Doctrine\DBAL\Exception ;
2023-02-24 11:38:25 -05:00
use OC\Files\Storage\Wrapper\Encryption ;
2015-12-02 09:05:29 -05:00
use OCP\Files\Cache\IScanner ;
2016-06-01 08:22:12 -04:00
use OCP\Files\ForbiddenException ;
2022-03-25 10:15:02 -04:00
use OCP\Files\Storage\IReliableEtagStorage ;
2015-06-03 10:44:57 -04:00
use OCP\Lock\ILockingProvider ;
2022-03-30 04:55:41 -04:00
use OC\Files\Storage\Wrapper\Encoding ;
use OC\Files\Storage\Wrapper\Jail ;
use OC\Hooks\BasicEmitter ;
use Psr\Log\LoggerInterface ;
2013-06-14 09:30:41 -04:00
2013-07-19 10:32:43 -04:00
/**
* Class Scanner
*
* Hooks available in scope \OC\Files\Cache\Scanner :
* - scanFile ( string $path , string $storageId )
* - scanFolder ( string $path , string $storageId )
2013-11-07 10:22:29 -05:00
* - postScanFile ( string $path , string $storageId )
* - postScanFolder ( string $path , string $storageId )
2013-07-19 10:32:43 -04:00
*
* @ package OC\Files\Cache
*/
2015-12-02 09:05:29 -05:00
class Scanner extends BasicEmitter implements IScanner {
2012-10-03 05:24:49 -04:00
/**
* @ var \OC\Files\Storage\Storage $storage
*/
2014-05-28 07:59:38 -04:00
protected $storage ;
2012-10-03 05:24:49 -04:00
2012-11-21 16:44:43 -05:00
/**
* @ var string $storageId
*/
2014-05-28 07:59:38 -04:00
protected $storageId ;
2012-11-21 16:44:43 -05:00
2012-10-03 05:24:49 -04:00
/**
* @ var \OC\Files\Cache\Cache $cache
*/
2014-05-28 07:59:38 -04:00
protected $cache ;
2012-10-03 05:24:49 -04:00
2014-05-30 09:42:41 -04:00
/**
* @ var boolean $cacheActive If true , perform cache operations , if false , do not affect cache
*/
protected $cacheActive ;
2014-09-07 19:34:03 -04:00
/**
2014-09-08 10:34:03 -04:00
* @ var bool $useTransactions whether to use transactions
2014-09-07 19:34:03 -04:00
*/
protected $useTransactions = true ;
2015-06-03 10:44:57 -04:00
/**
* @ var \OCP\Lock\ILockingProvider
*/
protected $lockingProvider ;
2012-10-03 05:24:49 -04:00
public function __construct ( \OC\Files\Storage\Storage $storage ) {
$this -> storage = $storage ;
2012-11-21 16:44:43 -05:00
$this -> storageId = $this -> storage -> getId ();
2012-11-18 08:10:28 -05:00
$this -> cache = $storage -> getCache ();
2017-07-27 07:43:18 -04:00
$this -> cacheActive = ! \OC :: $server -> getConfig () -> getSystemValue ( 'filesystem_cache_readonly' , false );
2015-06-03 10:44:57 -04:00
$this -> lockingProvider = \OC :: $server -> getLockingProvider ();
2012-10-03 05:24:49 -04:00
}
2014-09-07 19:34:03 -04:00
/**
* Whether to wrap the scanning of a folder in a database transaction
* On default transactions are used
*
* @ param bool $useTransactions
*/
public function setUseTransactions ( $useTransactions ) {
$this -> useTransactions = $useTransactions ;
}
2012-09-16 10:52:32 -04:00
/**
* get all the metadata of a file or folder
* *
2012-09-26 11:52:02 -04:00
*
2012-10-03 05:24:49 -04:00
* @ param string $path
2021-02-15 11:52:11 -05:00
* @ return array | null an array of metadata of the file
2012-09-16 10:52:32 -04:00
*/
2015-12-02 09:05:29 -05:00
protected function getData ( $path ) {
2015-04-20 10:50:12 -04:00
$data = $this -> storage -> getMetaData ( $path );
if ( is_null ( $data )) {
2022-03-30 04:55:41 -04:00
\OC :: $server -> get ( LoggerInterface :: class ) -> debug ( " !!! Path ' $path ' is not accessible or present !!! " , [ 'app' => 'core' ]);
2013-10-24 08:24:56 -04:00
}
2012-09-16 10:52:32 -04:00
return $data ;
}
/**
* scan a single file and store it in the cache
*
2012-10-03 05:24:49 -04:00
* @ param string $file
2013-06-14 10:53:08 -04:00
* @ param int $reuseExisting
2014-09-10 09:57:59 -04:00
* @ param int $parentId
2020-02-14 16:09:11 -05:00
* @ param array | null | false $cacheData existing data in the cache for the file to be scanned
2015-06-18 08:30:25 -04:00
* @ param bool $lock set to false to disable getting an additional read lock during scanning
2020-03-27 12:47:20 -04:00
* @ param null $data the metadata for the file , as returned by the storage
2021-02-15 11:52:11 -05:00
* @ return array | null an array of metadata of the scanned file
2015-06-18 08:30:25 -04:00
* @ throws \OCP\Lock\LockedException
2012-09-16 10:52:32 -04:00
*/
2020-03-27 12:47:20 -04:00
public function scanFile ( $file , $reuseExisting = 0 , $parentId = - 1 , $cacheData = null , $lock = true , $data = null ) {
2016-11-09 04:58:11 -05:00
if ( $file !== '' ) {
try {
$this -> storage -> verifyPath ( dirname ( $file ), basename ( $file ));
} catch ( \Exception $e ) {
2016-11-02 04:23:01 -04:00
return null ;
}
}
2022-03-17 11:05:52 -04:00
// only proceed if $file is not a partial file, blacklist is handled by the storage
if ( ! self :: isPartialFile ( $file )) {
2022-12-12 08:57:37 -05:00
// acquire a lock
2015-06-18 07:47:03 -04:00
if ( $lock ) {
2016-01-04 08:11:21 -05:00
if ( $this -> storage -> instanceOfStorage ( '\OCP\Files\Storage\ILockingStorage' )) {
$this -> storage -> acquireLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
}
2015-06-18 07:47:03 -04:00
}
2016-01-13 15:40:48 -05:00
2016-06-01 08:22:12 -04:00
try {
2020-03-27 12:47:20 -04:00
$data = $data ? ? $this -> getData ( $file );
2016-06-01 08:22:12 -04:00
} catch ( ForbiddenException $e ) {
2017-08-25 08:53:16 -04:00
if ( $lock ) {
if ( $this -> storage -> instanceOfStorage ( '\OCP\Files\Storage\ILockingStorage' )) {
$this -> storage -> releaseLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
}
}
2016-06-01 08:22:12 -04:00
return null ;
}
2016-01-13 15:40:48 -05:00
2017-08-25 08:53:16 -04:00
try {
if ( $data ) {
// pre-emit only if it was a file. By that we avoid counting/treating folders as files
if ( $data [ 'mimetype' ] !== 'httpd/unix-directory' ) {
2020-03-26 04:30:18 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'scanFile' , [ $file , $this -> storageId ]);
\OC_Hook :: emit ( '\OC\Files\Cache\Scanner' , 'scan_file' , [ 'path' => $file , 'storage' => $this -> storageId ]);
2017-08-25 08:53:16 -04:00
}
2016-01-13 15:40:48 -05:00
2017-08-25 08:53:16 -04:00
$parent = dirname ( $file );
2022-12-12 08:57:37 -05:00
if ( $parent === '.' || $parent === '/' ) {
2017-08-25 08:53:16 -04:00
$parent = '' ;
}
if ( $parentId === - 1 ) {
$parentId = $this -> cache -> getParentId ( $file );
}
2014-06-10 09:37:43 -04:00
2017-08-25 08:53:16 -04:00
// scan the parent if it's not in the cache (id -1) and the current file is not the root folder
2022-12-12 08:57:37 -05:00
if ( $file && $parentId === - 1 ) {
2017-08-25 08:53:16 -04:00
$parentData = $this -> scanFile ( $parent );
if ( ! $parentData ) {
return null ;
}
$parentId = $parentData [ 'fileid' ];
2016-11-02 04:23:01 -04:00
}
2017-08-25 08:53:16 -04:00
if ( $parent ) {
$data [ 'parent' ] = $parentId ;
}
if ( is_null ( $cacheData )) {
/** @var CacheEntry $cacheData */
$cacheData = $this -> cache -> get ( $file );
2014-06-10 09:26:18 -04:00
}
2022-12-12 08:57:37 -05:00
if ( $cacheData && $reuseExisting && isset ( $cacheData [ 'fileid' ])) {
2017-08-25 08:53:16 -04:00
// prevent empty etag
2022-12-12 08:57:37 -05:00
$etag = empty ( $cacheData [ 'etag' ]) ? $data [ 'etag' ] : $cacheData [ 'etag' ];
2017-08-25 08:53:16 -04:00
$fileId = $cacheData [ 'fileid' ];
$data [ 'fileid' ] = $fileId ;
// only reuse data if the file hasn't explicitly changed
if ( isset ( $data [ 'storage_mtime' ]) && isset ( $cacheData [ 'storage_mtime' ]) && $data [ 'storage_mtime' ] === $cacheData [ 'storage_mtime' ]) {
$data [ 'mtime' ] = $cacheData [ 'mtime' ];
if (( $reuseExisting & self :: REUSE_SIZE ) && ( $data [ 'size' ] === - 1 )) {
$data [ 'size' ] = $cacheData [ 'size' ];
}
2022-03-25 10:15:02 -04:00
if ( $reuseExisting & self :: REUSE_ETAG && ! $this -> storage -> instanceOfStorage ( IReliableEtagStorage :: class )) {
2017-08-25 08:53:16 -04:00
$data [ 'etag' ] = $etag ;
}
2013-10-24 08:24:56 -04:00
}
2023-03-03 11:10:56 -05:00
// we only updated unencrypted_size if it's already set
if ( $cacheData [ 'unencrypted_size' ] === 0 ) {
unset ( $data [ 'unencrypted_size' ]);
}
2017-08-25 08:53:16 -04:00
// Only update metadata that has changed
$newData = array_diff_assoc ( $data , $cacheData -> getData ());
} else {
2023-03-03 11:10:56 -05:00
// we only updated unencrypted_size if it's already set
unset ( $data [ 'unencrypted_size' ]);
2017-08-25 08:53:16 -04:00
$newData = $data ;
$fileId = - 1 ;
}
if ( ! empty ( $newData )) {
// Reset the checksum if the data has changed
$newData [ 'checksum' ] = '' ;
2020-02-14 16:08:13 -05:00
$newData [ 'parent' ] = $parentId ;
2017-08-25 08:53:16 -04:00
$data [ 'fileid' ] = $this -> addToCache ( $file , $newData , $fileId );
}
2023-02-24 11:38:25 -05:00
2022-12-12 08:57:37 -05:00
$data [ 'oldSize' ] = ( $cacheData && isset ( $cacheData [ 'size' ])) ? $cacheData [ 'size' ] : 0 ;
2016-01-13 15:40:48 -05:00
2020-02-14 16:09:11 -05:00
if ( $cacheData && isset ( $cacheData [ 'encrypted' ])) {
2017-08-25 08:53:16 -04:00
$data [ 'encrypted' ] = $cacheData [ 'encrypted' ];
}
2016-04-20 09:22:22 -04:00
2017-08-25 08:53:16 -04:00
// post-emit only if it was a file. By that we avoid counting/treating folders as files
if ( $data [ 'mimetype' ] !== 'httpd/unix-directory' ) {
2020-03-26 04:30:18 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'postScanFile' , [ $file , $this -> storageId ]);
\OC_Hook :: emit ( '\OC\Files\Cache\Scanner' , 'post_scan_file' , [ 'path' => $file , 'storage' => $this -> storageId ]);
2017-08-25 08:53:16 -04:00
}
} else {
$this -> removeFromCache ( $file );
}
} catch ( \Exception $e ) {
if ( $lock ) {
if ( $this -> storage -> instanceOfStorage ( '\OCP\Files\Storage\ILockingStorage' )) {
$this -> storage -> releaseLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
}
}
throw $e ;
2013-03-26 11:03:40 -04:00
}
2016-01-13 15:40:48 -05:00
2022-12-12 08:57:37 -05:00
// release the acquired lock
2015-06-18 07:47:03 -04:00
if ( $lock ) {
2016-01-04 08:11:21 -05:00
if ( $this -> storage -> instanceOfStorage ( '\OCP\Files\Storage\ILockingStorage' )) {
$this -> storage -> releaseLock ( $file , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
}
2015-06-18 07:47:03 -04:00
}
2016-01-13 15:40:48 -05:00
2016-04-20 09:22:22 -04:00
if ( $data && ! isset ( $data [ 'encrypted' ])) {
$data [ 'encrypted' ] = false ;
}
2013-03-22 08:52:07 -04:00
return $data ;
2012-10-03 05:24:49 -04:00
}
2016-01-13 15:40:48 -05:00
2013-03-22 08:52:07 -04:00
return null ;
2012-09-16 10:52:32 -04:00
}
2014-06-02 08:52:21 -04:00
protected function removeFromCache ( $path ) {
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'removeFromCache' , [ 'file' => $path ]);
$this -> emit ( '\OC\Files\Cache\Scanner' , 'removeFromCache' , [ $path ]);
2014-06-02 08:52:21 -04:00
if ( $this -> cacheActive ) {
$this -> cache -> remove ( $path );
}
}
/**
* @ param string $path
* @ param array $data
2014-09-10 09:57:59 -04:00
* @ param int $fileId
2014-06-02 08:52:21 -04:00
* @ return int the id of the added file
*/
2014-09-10 09:57:59 -04:00
protected function addToCache ( $path , $data , $fileId = - 1 ) {
2017-05-18 10:38:54 -04:00
if ( isset ( $data [ 'scan_permissions' ])) {
$data [ 'permissions' ] = $data [ 'scan_permissions' ];
}
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'addToCache' , [ 'file' => $path , 'data' => $data ]);
$this -> emit ( '\OC\Files\Cache\Scanner' , 'addToCache' , [ $path , $this -> storageId , $data ]);
2014-06-02 08:52:21 -04:00
if ( $this -> cacheActive ) {
2014-09-10 09:57:59 -04:00
if ( $fileId !== - 1 ) {
$this -> cache -> update ( $fileId , $data );
return $fileId ;
} else {
2020-02-14 16:10:09 -05:00
return $this -> cache -> insert ( $path , $data );
2014-09-10 09:57:59 -04:00
}
2014-06-02 08:52:21 -04:00
} else {
return - 1 ;
}
}
/**
* @ param string $path
* @ param array $data
2014-09-10 09:57:59 -04:00
* @ param int $fileId
2014-06-02 08:52:21 -04:00
*/
2014-09-10 09:57:59 -04:00
protected function updateCache ( $path , $data , $fileId = - 1 ) {
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'addToCache' , [ 'file' => $path , 'data' => $data ]);
$this -> emit ( '\OC\Files\Cache\Scanner' , 'updateCache' , [ $path , $this -> storageId , $data ]);
2014-06-02 08:52:21 -04:00
if ( $this -> cacheActive ) {
2014-09-10 09:57:59 -04:00
if ( $fileId !== - 1 ) {
$this -> cache -> update ( $fileId , $data );
} else {
$this -> cache -> put ( $path , $data );
}
2014-06-02 08:52:21 -04:00
}
}
2012-09-16 10:52:32 -04:00
/**
2013-06-14 09:30:41 -04:00
* scan a folder and all it ' s children
2012-09-16 10:52:32 -04:00
*
2012-10-03 05:24:49 -04:00
* @ param string $path
2013-03-23 21:06:50 -04:00
* @ param bool $recursive
2013-06-14 10:53:08 -04:00
* @ param int $reuse
2015-06-18 08:30:25 -04:00
* @ param bool $lock set to false to disable getting an additional read lock during scanning
2021-02-15 11:52:11 -05:00
* @ return array | null an array of the meta data of the scanned file or folder
2012-09-16 10:52:32 -04:00
*/
2015-06-18 07:47:03 -04:00
public function scan ( $path , $recursive = self :: SCAN_RECURSIVE , $reuse = - 1 , $lock = true ) {
2013-06-14 10:53:08 -04:00
if ( $reuse === - 1 ) {
2014-11-06 12:34:10 -05:00
$reuse = ( $recursive === self :: SCAN_SHALLOW ) ? self :: REUSE_ETAG | self :: REUSE_SIZE : self :: REUSE_ETAG ;
2013-06-14 10:53:08 -04:00
}
2015-06-18 07:47:03 -04:00
if ( $lock ) {
2016-01-04 08:11:21 -05:00
if ( $this -> storage -> instanceOfStorage ( '\OCP\Files\Storage\ILockingStorage' )) {
2016-04-22 10:07:08 -04:00
$this -> storage -> acquireLock ( 'scanner::' . $path , ILockingProvider :: LOCK_EXCLUSIVE , $this -> lockingProvider );
2016-01-04 08:11:21 -05:00
$this -> storage -> acquireLock ( $path , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
}
2015-06-18 07:47:03 -04:00
}
2018-05-04 12:05:46 -04:00
try {
$data = $this -> scanFile ( $path , $reuse , - 1 , null , $lock );
2022-12-12 08:57:37 -05:00
if ( $data && $data [ 'mimetype' ] === 'httpd/unix-directory' ) {
2022-08-15 15:10:39 -04:00
$size = $this -> scanChildren ( $path , $recursive , $reuse , $data [ 'fileid' ], $lock , $data );
2018-05-04 12:05:46 -04:00
$data [ 'size' ] = $size ;
}
} finally {
if ( $lock ) {
if ( $this -> storage -> instanceOfStorage ( '\OCP\Files\Storage\ILockingStorage' )) {
$this -> storage -> releaseLock ( $path , ILockingProvider :: LOCK_SHARED , $this -> lockingProvider );
$this -> storage -> releaseLock ( 'scanner::' . $path , ILockingProvider :: LOCK_EXCLUSIVE , $this -> lockingProvider );
}
2016-01-04 08:11:21 -05:00
}
2015-06-18 07:47:03 -04:00
}
2014-02-28 08:23:07 -05:00
return $data ;
2013-06-14 09:30:41 -04:00
}
2012-10-03 05:40:09 -04:00
2014-09-10 09:57:59 -04:00
/**
* Get the children currently in the cache
*
* @ param int $folderId
* @ return array []
*/
protected function getExistingChildren ( $folderId ) {
2020-03-26 04:30:18 -04:00
$existingChildren = [];
2014-09-10 09:57:59 -04:00
$children = $this -> cache -> getFolderContentsById ( $folderId );
foreach ( $children as $child ) {
$existingChildren [ $child [ 'name' ]] = $child ;
2014-09-24 09:44:56 -04:00
}
return $existingChildren ;
}
2013-06-14 09:30:41 -04:00
/**
* scan all the files and folders in a folder
*
* @ param string $path
* @ param bool $recursive
2013-06-14 10:53:08 -04:00
* @ param int $reuse
2016-04-29 09:16:03 -04:00
* @ param int $folderId id for the folder to be scanned
2015-06-18 08:30:25 -04:00
* @ param bool $lock set to false to disable getting an additional read lock during scanning
2022-08-15 15:10:39 -04:00
* @ param array $data the data of the folder before ( re ) scanning the children
2023-05-15 06:24:42 -04:00
* @ return int | float the size of the scanned folder or - 1 if the size is unknown at this stage
2013-06-14 09:30:41 -04:00
*/
2022-08-15 15:10:39 -04:00
protected function scanChildren ( $path , $recursive = self :: SCAN_RECURSIVE , $reuse = - 1 , $folderId = null , $lock = true , array $data = []) {
2013-06-14 10:53:08 -04:00
if ( $reuse === - 1 ) {
2014-11-06 12:34:10 -05:00
$reuse = ( $recursive === self :: SCAN_SHALLOW ) ? self :: REUSE_ETAG | self :: REUSE_SIZE : self :: REUSE_ETAG ;
2013-06-14 10:53:08 -04:00
}
2020-03-26 04:30:18 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'scanFolder' , [ $path , $this -> storageId ]);
2012-09-16 10:52:32 -04:00
$size = 0 ;
2016-04-29 09:16:03 -04:00
if ( ! is_null ( $folderId )) {
2014-09-10 09:57:59 -04:00
$folderId = $this -> cache -> getId ( $path );
}
2016-04-29 09:16:03 -04:00
$childQueue = $this -> handleChildren ( $path , $recursive , $reuse , $folderId , $lock , $size );
foreach ( $childQueue as $child => $childId ) {
2016-05-18 09:06:15 -04:00
$childSize = $this -> scanChildren ( $child , $recursive , $reuse , $childId , $lock );
2016-04-29 09:16:03 -04:00
if ( $childSize === - 1 ) {
$size = - 1 ;
2020-04-10 04:35:09 -04:00
} elseif ( $size !== - 1 ) {
2016-04-29 09:16:03 -04:00
$size += $childSize ;
}
}
2022-08-15 15:10:39 -04:00
$oldSize = $data [ 'size' ] ? ? null ;
2023-02-24 11:38:25 -05:00
// for encrypted storages, we trigger a regular folder size calculation instead of using the calculated size
// to make sure we also updated the unencrypted-size where applicable
if ( $this -> storage -> instanceOfStorage ( Encryption :: class )) {
$this -> cache -> calculateFolderSize ( $path );
} else {
if ( $this -> cacheActive && $oldSize !== $size ) {
$this -> cache -> update ( $folderId , [ 'size' => $size ]);
}
2016-05-23 07:53:05 -04:00
}
2020-03-26 04:30:18 -04:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'postScanFolder' , [ $path , $this -> storageId ]);
2016-04-29 09:16:03 -04:00
return $size ;
}
private function handleChildren ( $path , $recursive , $reuse , $folderId , $lock , & $size ) {
// we put this in it's own function so it cleans up the memory before we start recursing
2014-09-10 09:57:59 -04:00
$existingChildren = $this -> getExistingChildren ( $folderId );
2020-03-27 12:47:20 -04:00
$newChildren = iterator_to_array ( $this -> storage -> getDirectoryContent ( $path ));
2014-09-10 09:57:59 -04:00
2022-08-15 15:10:39 -04:00
if ( count ( $existingChildren ) === 0 && count ( $newChildren ) === 0 ) {
// no need to do a transaction
return [];
}
2014-09-10 09:57:59 -04:00
if ( $this -> useTransactions ) {
2016-01-07 04:18:38 -05:00
\OC :: $server -> getDatabaseConnection () -> beginTransaction ();
2014-09-10 09:57:59 -04:00
}
2016-04-29 09:16:03 -04:00
2014-09-10 09:57:59 -04:00
$exceptionOccurred = false ;
2016-04-29 09:16:03 -04:00
$childQueue = [];
2020-03-27 12:47:20 -04:00
$newChildNames = [];
foreach ( $newChildren as $fileMeta ) {
2021-01-06 11:25:31 -05:00
$permissions = isset ( $fileMeta [ 'scan_permissions' ]) ? $fileMeta [ 'scan_permissions' ] : $fileMeta [ 'permissions' ];
if ( $permissions === 0 ) {
continue ;
}
2021-11-10 09:09:25 -05:00
$originalFile = $fileMeta [ 'name' ];
$file = trim ( \OC\Files\Filesystem :: normalizePath ( $originalFile ), '/' );
2021-11-17 03:19:10 -05:00
if ( trim ( $originalFile , '/' ) !== $file ) {
2021-11-10 09:09:25 -05:00
// encoding mismatch, might require compatibility wrapper
2022-03-30 04:55:41 -04:00
\OC :: $server -> get ( LoggerInterface :: class ) -> debug ( 'Scanner: Skipping non-normalized file name "' . $originalFile . '" in path "' . $path . '".' , [ 'app' => 'core' ]);
2021-11-10 09:09:25 -05:00
$this -> emit ( '\OC\Files\Cache\Scanner' , 'normalizedNameMismatch' , [ $path ? $path . '/' . $originalFile : $originalFile ]);
2021-11-17 03:19:10 -05:00
// skip this entry
continue ;
2021-11-10 09:09:25 -05:00
}
2020-03-27 12:47:20 -04:00
$newChildNames [] = $file ;
2018-01-26 17:46:40 -05:00
$child = $path ? $path . '/' . $file : $file ;
2014-09-10 09:57:59 -04:00
try {
2020-02-14 16:09:11 -05:00
$existingData = isset ( $existingChildren [ $file ]) ? $existingChildren [ $file ] : false ;
2020-03-27 12:47:20 -04:00
$data = $this -> scanFile ( $child , $reuse , $folderId , $existingData , $lock , $fileMeta );
2014-09-10 09:57:59 -04:00
if ( $data ) {
2022-12-12 08:57:37 -05:00
if ( $data [ 'mimetype' ] === 'httpd/unix-directory' && $recursive === self :: SCAN_RECURSIVE ) {
2016-04-29 09:16:03 -04:00
$childQueue [ $child ] = $data [ 'fileid' ];
2022-12-12 08:57:37 -05:00
} elseif ( $data [ 'mimetype' ] === 'httpd/unix-directory' && $recursive === self :: SCAN_RECURSIVE_INCOMPLETE && $data [ 'size' ] === - 1 ) {
2016-05-18 09:06:15 -04:00
// only recurse into folders which aren't fully scanned
$childQueue [ $child ] = $data [ 'fileid' ];
2020-04-10 04:35:09 -04:00
} elseif ( $data [ 'size' ] === - 1 ) {
2014-09-10 09:57:59 -04:00
$size = - 1 ;
2020-04-10 04:35:09 -04:00
} elseif ( $size !== - 1 ) {
2014-09-10 09:57:59 -04:00
$size += $data [ 'size' ];
2012-09-16 10:52:32 -04:00
}
}
2021-01-03 09:28:31 -05:00
} catch ( Exception $ex ) {
2014-09-10 09:57:59 -04:00
// might happen if inserting duplicate while a scanning
// process is running in parallel
// log and ignore
2017-08-25 08:53:16 -04:00
if ( $this -> useTransactions ) {
\OC :: $server -> getDatabaseConnection () -> rollback ();
\OC :: $server -> getDatabaseConnection () -> beginTransaction ();
}
2022-03-30 04:55:41 -04:00
\OC :: $server -> get ( LoggerInterface :: class ) -> debug ( 'Exception while scanning file "' . $child . '"' , [
2018-01-17 09:21:56 -05:00
'app' => 'core' ,
2022-03-30 04:55:41 -04:00
'exception' => $ex ,
2018-01-17 09:21:56 -05:00
]);
2014-09-10 09:57:59 -04:00
$exceptionOccurred = true ;
2015-06-24 09:42:00 -04:00
} catch ( \OCP\Lock\LockedException $e ) {
if ( $this -> useTransactions ) {
2016-01-07 04:18:38 -05:00
\OC :: $server -> getDatabaseConnection () -> rollback ();
2015-06-24 09:42:00 -04:00
}
throw $e ;
2012-09-16 10:52:32 -04:00
}
2014-09-10 09:57:59 -04:00
}
2020-03-27 12:47:20 -04:00
$removedChildren = \array_diff ( array_keys ( $existingChildren ), $newChildNames );
2014-09-10 09:57:59 -04:00
foreach ( $removedChildren as $childName ) {
2018-01-26 17:46:40 -05:00
$child = $path ? $path . '/' . $childName : $childName ;
2014-09-10 09:57:59 -04:00
$this -> removeFromCache ( $child );
}
if ( $this -> useTransactions ) {
2016-01-07 04:18:38 -05:00
\OC :: $server -> getDatabaseConnection () -> commit ();
2014-09-10 09:57:59 -04:00
}
if ( $exceptionOccurred ) {
// It might happen that the parallel scan process has already
// inserted mimetypes but those weren't available yet inside the transaction
// To make sure to have the updated mime types in such cases,
// we reload them here
2015-09-11 07:44:53 -04:00
\OC :: $server -> getMimeTypeLoader () -> reset ();
2014-09-10 09:57:59 -04:00
}
2016-04-29 09:16:03 -04:00
return $childQueue ;
2012-09-16 10:52:32 -04:00
}
2013-02-10 12:15:23 -05:00
2013-02-10 08:16:45 -05:00
/**
2014-05-19 11:50:53 -04:00
* check if the file should be ignored when scanning
2013-02-10 08:16:45 -05:00
* NOTE : files with a '.part' extension are ignored as well !
* prevents unfinished put requests to be scanned
2014-06-02 08:52:21 -04:00
*
2014-05-11 16:51:30 -04:00
* @ param string $file
2013-02-10 08:16:45 -05:00
* @ return boolean
*/
2013-05-10 06:00:13 -04:00
public static function isPartialFile ( $file ) {
if ( pathinfo ( $file , PATHINFO_EXTENSION ) === 'part' ) {
2013-02-10 08:16:45 -05:00
return true ;
}
2015-10-12 07:59:16 -04:00
if ( strpos ( $file , '.part/' ) !== false ) {
return true ;
}
2013-02-10 08:16:45 -05:00
return false ;
}
2012-11-21 17:18:58 -05:00
/**
* walk over any folders that are not fully scanned yet and scan them
*/
public function backgroundScan () {
2021-11-16 12:10:09 -05:00
if ( $this -> storage -> instanceOfStorage ( Jail :: class )) {
// for jail storage wrappers (shares, groupfolders) we run the background scan on the source storage
// this is mainly done because the jail wrapper doesn't implement `getIncomplete` (because it would be inefficient).
//
// Running the scan on the source storage might scan more than "needed", but the unscanned files outside the jail will
// have to be scanned at some point anyway.
$unJailedScanner = $this -> storage -> getUnjailedStorage () -> getScanner ();
$unJailedScanner -> backgroundScan ();
2016-03-24 10:17:13 -04:00
} else {
2021-11-16 12:10:09 -05:00
if ( ! $this -> cache -> inCache ( '' )) {
// if the storage isn't in the cache yet, just scan the root completely
$this -> runBackgroundScanJob ( function () {
$this -> scan ( '' , self :: SCAN_RECURSIVE , self :: REUSE_ETAG );
}, '' );
} else {
$lastPath = null ;
// find any path marked as unscanned and run the scanner until no more paths are unscanned (or we get stuck)
while (( $path = $this -> cache -> getIncomplete ()) !== false && $path !== $lastPath ) {
$this -> runBackgroundScanJob ( function () use ( $path ) {
$this -> scan ( $path , self :: SCAN_RECURSIVE_INCOMPLETE , self :: REUSE_ETAG | self :: REUSE_SIZE );
}, $path );
// FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
// to make this possible
$lastPath = $path ;
}
2016-03-24 10:17:13 -04:00
}
}
}
private function runBackgroundScanJob ( callable $callback , $path ) {
try {
$callback ();
2020-03-26 04:30:18 -04:00
\OC_Hook :: emit ( 'Scanner' , 'correctFolderSize' , [ 'path' => $path ]);
2016-04-15 11:33:02 -04:00
if ( $this -> cacheActive && $this -> cache instanceof Cache ) {
2019-03-01 17:52:58 -05:00
$this -> cache -> correctFolderSize ( $path , null , true );
2014-05-28 13:20:20 -04:00
}
2016-03-24 10:17:13 -04:00
} catch ( \OCP\Files\StorageInvalidException $e ) {
// skip unavailable storages
} catch ( \OCP\Files\StorageNotAvailableException $e ) {
// skip unavailable storages
} catch ( \OCP\Files\ForbiddenException $e ) {
// skip forbidden storages
} catch ( \OCP\Lock\LockedException $e ) {
// skip unavailable storages
2012-11-21 17:18:58 -05:00
}
}
2014-05-30 09:42:41 -04:00
/**
* Set whether the cache is affected by scan operations
2014-06-02 08:52:21 -04:00
*
2014-05-30 09:42:41 -04:00
* @ param boolean $active The active state of the cache
*/
public function setCacheActive ( $active ) {
$this -> cacheActive = $active ;
}
2012-09-16 10:52:32 -04:00
}