postgresql/src/include/storage/fd.h
Michael Paquier 84fb27511d Replace off_t by pgoff_t in I/O routines
PostgreSQL's Windows port has never been able to handle files larger
than 2GB due to the use of off_t for file offsets, only 32-bit on
Windows.  This causes signed integer overflow at exactly 2^31 bytes when
trying to handle files larger than 2GB, for the routines touched by this
commit.

Note that large files are forbidden by ./configure (3c6248a828) and
meson (recent change, see 79cd66f28c).  This restriction also exists
in v16 and older versions for the now-dead MSVC scripts.

The code base already defines pgoff_t as __int64 (64-bit) on Windows for
this purpose, and some function declarations in headers use it, but many
internals still rely on off_t.  This commit switches more routines to
use pgoff_t, offering more portability, for areas mainly related to file
extensions and storage.

These are not critical for WAL segments yet, which have currently a
maximum size allowed of 1GB (well, this opens the door at allowing a
larger size for them).  This matters more for segment files if we want
to lift the large file restriction in ./configure and meson in the
future, which would make sense to remove once/if all traces of off_t are
gone from the tree.  This can additionally matter for out-of-core code
that may want files larger than 2GB in places where off_t is four bytes
in size.

Note that off_t is still used in other parts of the tree like
buffile.c, WAL sender/receiver, base backup, pg_combinebackup, etc.
These other code paths can be addressed separately, and their update
will be required if we want to remove the large file restriction in the
future.  This commit is a good first cut in itself towards more
portability, hopefully.

On Unix-like systems, pgoff_t is defined as off_t, so this change only
affects Windows behavior.

Author: Bryan Green <dbryan.green@gmail.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/0f238ff4-c442-42f5-adb8-01b762c94ca1@gmail.com
2025-11-13 12:41:40 +09:00

222 lines
8.2 KiB
C

/*-------------------------------------------------------------------------
*
* fd.h
* Virtual file descriptor definitions.
*
*
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/storage/fd.h
*
*-------------------------------------------------------------------------
*/
/*
* calls:
*
* File {Close, Read, ReadV, Write, WriteV, Size, Sync}
* {Path Name Open, Allocate, Free} File
*
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
* Use them for all file activity...
*
* File fd;
* fd = PathNameOpenFile("foo", O_RDONLY);
*
* AllocateFile();
* FreeFile();
*
* Use AllocateFile, not fopen, if you need a stdio file (FILE*); then
* use FreeFile, not fclose, to close it. AVOID using stdio for files
* that you intend to hold open for any length of time, since there is
* no way for them to share kernel file descriptors with other files.
*
* Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
* open directories (DIR*), and OpenTransientFile/CloseTransientFile for an
* unbuffered file descriptor.
*
* If you really can't use any of the above, at least call AcquireExternalFD
* or ReserveExternalFD to report any file descriptors that are held for any
* length of time. Failure to do so risks unnecessary EMFILE errors.
*/
#ifndef FD_H
#define FD_H
#include "port/pg_iovec.h"
#include <dirent.h>
#include <fcntl.h>
typedef int File;
#define IO_DIRECT_DATA 0x01
#define IO_DIRECT_WAL 0x02
#define IO_DIRECT_WAL_INIT 0x04
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
*/
extern PGDLLIMPORT int max_safe_fds;
/*
* On Windows, we have to interpret EACCES as possibly meaning the same as
* ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
* that's what you get. Ugh. This code is designed so that we don't
* actually believe these cases are okay without further evidence (namely,
* a pending fsync request getting canceled ... see ProcessSyncRequests).
*/
#ifndef WIN32
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT)
#else
#define FILE_POSSIBLY_DELETED(err) ((err) == ENOENT || (err) == EACCES)
#endif
/*
* O_DIRECT is not standard, but almost every Unix has it. We translate it
* to the appropriate Windows flag in src/port/open.c. We simulate it with
* fcntl(F_NOCACHE) on macOS inside fd.c's open() wrapper. We use the name
* PG_O_DIRECT rather than defining O_DIRECT in that case (probably not a good
* idea on a Unix). We can only use it if the compiler will correctly align
* PGIOAlignedBlock for us, though.
*/
#if defined(O_DIRECT) && defined(pg_attribute_aligned)
#define PG_O_DIRECT O_DIRECT
#elif defined(F_NOCACHE)
#define PG_O_DIRECT 0x80000000
#define PG_O_DIRECT_USE_F_NOCACHE
#else
#define PG_O_DIRECT 0
#endif
/*
* prototypes for functions in fd.c
*/
struct PgAioHandle;
/* Operations on virtual Files --- equivalent to Unix kernel file ops */
extern File PathNameOpenFile(const char *fileName, int fileFlags);
extern File PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
extern int FilePrefetch(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info);
extern ssize_t FileReadV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info);
extern ssize_t FileWriteV(File file, const struct iovec *iov, int iovcnt, pgoff_t offset, uint32 wait_event_info);
extern int FileStartReadV(struct PgAioHandle *ioh, File file, int iovcnt, pgoff_t offset, uint32 wait_event_info);
extern int FileSync(File file, uint32 wait_event_info);
extern int FileZero(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info);
extern int FileFallocate(File file, pgoff_t offset, pgoff_t amount, uint32 wait_event_info);
extern pgoff_t FileSize(File file);
extern int FileTruncate(File file, pgoff_t offset, uint32 wait_event_info);
extern void FileWriteback(File file, pgoff_t offset, pgoff_t nbytes, uint32 wait_event_info);
extern char *FilePathName(File file);
extern int FileGetRawDesc(File file);
extern int FileGetRawFlags(File file);
extern mode_t FileGetRawMode(File file);
/* Operations used for sharing named temporary files */
extern File PathNameCreateTemporaryFile(const char *path, bool error_on_failure);
extern File PathNameOpenTemporaryFile(const char *path, int mode);
extern bool PathNameDeleteTemporaryFile(const char *path, bool error_on_failure);
extern void PathNameCreateTemporaryDir(const char *basedir, const char *directory);
extern void PathNameDeleteTemporaryDir(const char *dirname);
extern void TempTablespacePath(char *path, Oid tablespace);
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
extern FILE *AllocateFile(const char *name, const char *mode);
extern int FreeFile(FILE *file);
/* Operations that allow use of pipe streams (popen/pclose) */
extern FILE *OpenPipeStream(const char *command, const char *mode);
extern int ClosePipeStream(FILE *file);
/* Operations to allow use of the <dirent.h> library routines */
extern DIR *AllocateDir(const char *dirname);
extern struct dirent *ReadDir(DIR *dir, const char *dirname);
extern struct dirent *ReadDirExtended(DIR *dir, const char *dirname,
int elevel);
extern int FreeDir(DIR *dir);
/* Operations to allow use of a plain kernel FD, with automatic cleanup */
extern int OpenTransientFile(const char *fileName, int fileFlags);
extern int OpenTransientFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
extern int CloseTransientFile(int fd);
/* If you've really really gotta have a plain kernel FD, use this */
extern int BasicOpenFile(const char *fileName, int fileFlags);
extern int BasicOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode);
/* Use these for other cases, and also for long-lived BasicOpenFile FDs */
extern bool AcquireExternalFD(void);
extern void ReserveExternalFD(void);
extern void ReleaseExternalFD(void);
/* Make a directory with default permissions */
extern int MakePGDirectory(const char *directoryName);
/* Miscellaneous support routines */
extern void InitFileAccess(void);
extern void InitTemporaryFileAccess(void);
extern void set_max_safe_fds(void);
extern void closeAllVfds(void);
extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
extern bool TempTablespacesAreSet(void);
extern int GetTempTablespaces(Oid *tableSpaces, int numSpaces);
extern Oid GetNextTempTableSpace(void);
extern void AtEOXact_Files(bool isCommit);
extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
SubTransactionId parentSubid);
extern void RemovePgTempFiles(void);
extern void RemovePgTempFilesInDir(const char *tmpdirname, bool missing_ok,
bool unlink_all);
extern bool looks_like_temp_rel_name(const char *name);
extern int pg_fsync(int fd);
extern int pg_fsync_no_writethrough(int fd);
extern int pg_fsync_writethrough(int fd);
extern int pg_fdatasync(int fd);
extern bool pg_file_exists(const char *name);
extern void pg_flush_data(int fd, pgoff_t offset, pgoff_t nbytes);
extern int pg_truncate(const char *path, pgoff_t length);
extern void fsync_fname(const char *fname, bool isdir);
extern int fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel);
extern int durable_rename(const char *oldfile, const char *newfile, int elevel);
extern int durable_unlink(const char *fname, int elevel);
extern void SyncDataDirectory(void);
extern int data_sync_elevel(int elevel);
static inline ssize_t
FileRead(File file, void *buffer, size_t amount, pgoff_t offset,
uint32 wait_event_info)
{
struct iovec iov = {
.iov_base = buffer,
.iov_len = amount
};
return FileReadV(file, &iov, 1, offset, wait_event_info);
}
static inline ssize_t
FileWrite(File file, const void *buffer, size_t amount, pgoff_t offset,
uint32 wait_event_info)
{
struct iovec iov = {
.iov_base = unconstify(void *, buffer),
.iov_len = amount
};
return FileWriteV(file, &iov, 1, offset, wait_event_info);
}
#endif /* FD_H */