mirror of
https://github.com/postgres/postgres.git
synced 2026-03-10 18:28:35 -04:00
On some operating systems, it doesn't make sense to retry fsync(), because dirty data cached by the kernel may have been dropped on write-back failure. In that case the only remaining copy of the data is in the WAL. A subsequent fsync() could appear to succeed, but not have flushed the data. That means that a future checkpoint could apparently complete successfully but have lost data. Therefore, violently prevent any future checkpoint attempts by panicking on the first fsync() failure. Note that we already did the same for WAL data; this change extends that behavior to non-temporary data files. Provide a GUC data_sync_retry to control this new behavior, for users of operating systems that don't eject dirty data, and possibly forensic/testing uses. If it is set to on and the write-back error was transient, a later checkpoint might genuinely succeed (on a system that does not throw away buffers on failure); if the error is permanent, later checkpoints will continue to fail. The GUC defaults to off, meaning that we panic. Back-patch to all supported releases. There is still a narrow window for error-loss on some operating systems: if the file is closed and later reopened and a write-back error occurs in the intervening time, but the inode has the bad luck to be evicted due to memory pressure before we reopen, we could miss the error. A later patch will address that with a scheme for keeping files with dirty data open at all times, but we judge that to be too complicated to back-patch. Author: Craig Ringer, with some adjustments by Thomas Munro Reported-by: Craig Ringer Reviewed-by: Robert Haas, Thomas Munro, Andres Freund Discussion: https://postgr.es/m/20180427222842.in2e4mibx45zdth5%40alap3.anarazel.de
133 lines
4.4 KiB
C
133 lines
4.4 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* fd.h
|
|
* Virtual file descriptor definitions.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* src/include/storage/fd.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
/*
|
|
* calls:
|
|
*
|
|
* File {Close, Read, Write, Seek, Tell, Sync}
|
|
* {Path Name Open, Allocate, Free} File
|
|
*
|
|
* These are NOT JUST RENAMINGS OF THE UNIX ROUTINES.
|
|
* Use them for all file activity...
|
|
*
|
|
* File fd;
|
|
* fd = PathNameOpenFile("foo", O_RDONLY, 0600);
|
|
*
|
|
* AllocateFile();
|
|
* FreeFile();
|
|
*
|
|
* Use AllocateFile, not fopen, if you need a stdio file (FILE*); then
|
|
* use FreeFile, not fclose, to close it. AVOID using stdio for files
|
|
* that you intend to hold open for any length of time, since there is
|
|
* no way for them to share kernel file descriptors with other files.
|
|
*
|
|
* Likewise, use AllocateDir/FreeDir, not opendir/closedir, to allocate
|
|
* open directories (DIR*), and OpenTransientFile/CloseTransient File for an
|
|
* unbuffered file descriptor.
|
|
*/
|
|
#ifndef FD_H
|
|
#define FD_H
|
|
|
|
#include <dirent.h>
|
|
|
|
|
|
/*
|
|
* FileSeek uses the standard UNIX lseek(2) flags.
|
|
*/
|
|
|
|
typedef char *FileName;
|
|
|
|
typedef int File;
|
|
|
|
|
|
/* GUC parameter */
|
|
extern PGDLLIMPORT int max_files_per_process;
|
|
extern PGDLLIMPORT bool data_sync_retry;
|
|
|
|
/*
|
|
* This is private to fd.c, but exported for save/restore_backend_variables()
|
|
*/
|
|
extern int max_safe_fds;
|
|
|
|
|
|
/*
|
|
* prototypes for functions in fd.c
|
|
*/
|
|
|
|
/* Operations on virtual Files --- equivalent to Unix kernel file ops */
|
|
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
|
|
extern File OpenTemporaryFile(bool interXact);
|
|
extern void FileClose(File file);
|
|
extern int FilePrefetch(File file, off_t offset, int amount);
|
|
extern int FileRead(File file, char *buffer, int amount);
|
|
extern int FileWrite(File file, char *buffer, int amount);
|
|
extern int FileSync(File file);
|
|
extern off_t FileSeek(File file, off_t offset, int whence);
|
|
extern int FileTruncate(File file, off_t offset);
|
|
extern void FileWriteback(File file, off_t offset, off_t nbytes);
|
|
extern char *FilePathName(File file);
|
|
extern int FileGetRawDesc(File file);
|
|
extern int FileGetRawFlags(File file);
|
|
extern int FileGetRawMode(File file);
|
|
|
|
/* Operations that allow use of regular stdio --- USE WITH CAUTION */
|
|
extern FILE *AllocateFile(const char *name, const char *mode);
|
|
extern int FreeFile(FILE *file);
|
|
|
|
/* Operations that allow use of pipe streams (popen/pclose) */
|
|
extern FILE *OpenPipeStream(const char *command, const char *mode);
|
|
extern int ClosePipeStream(FILE *file);
|
|
|
|
/* Operations to allow use of the <dirent.h> library routines */
|
|
extern DIR *AllocateDir(const char *dirname);
|
|
extern struct dirent *ReadDir(DIR *dir, const char *dirname);
|
|
extern struct dirent *ReadDirExtended(DIR *dir, const char *dirname,
|
|
int elevel);
|
|
extern int FreeDir(DIR *dir);
|
|
|
|
/* Operations to allow use of a plain kernel FD, with automatic cleanup */
|
|
extern int OpenTransientFile(FileName fileName, int fileFlags, int fileMode);
|
|
extern int CloseTransientFile(int fd);
|
|
|
|
/* If you've really really gotta have a plain kernel FD, use this */
|
|
extern int BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
|
|
|
|
/* Miscellaneous support routines */
|
|
extern void InitFileAccess(void);
|
|
extern void set_max_safe_fds(void);
|
|
extern void closeAllVfds(void);
|
|
extern void SetTempTablespaces(Oid *tableSpaces, int numSpaces);
|
|
extern bool TempTablespacesAreSet(void);
|
|
extern Oid GetNextTempTableSpace(void);
|
|
extern void AtEOXact_Files(void);
|
|
extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid,
|
|
SubTransactionId parentSubid);
|
|
extern void RemovePgTempFiles(void);
|
|
|
|
extern int pg_fsync(int fd);
|
|
extern int pg_fsync_no_writethrough(int fd);
|
|
extern int pg_fsync_writethrough(int fd);
|
|
extern int pg_fdatasync(int fd);
|
|
extern void pg_flush_data(int fd, off_t offset, off_t amount);
|
|
extern void fsync_fname(const char *fname, bool isdir);
|
|
extern int durable_rename(const char *oldfile, const char *newfile, int loglevel);
|
|
extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
|
|
extern void SyncDataDirectory(void);
|
|
extern int data_sync_elevel(int elevel);
|
|
|
|
/* Filename components for OpenTemporaryFile */
|
|
#define PG_TEMP_FILES_DIR "pgsql_tmp"
|
|
#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
|
|
|
|
#endif /* FD_H */
|