Add file_extend_method=posix_fallocate,write_zeros.

Provide a way to disable the use of posix_fallocate() for relation
files.  It was introduced by commit 4d330a61bb.  The new setting
file_extend_method=write_zeros can be used as a workaround for problems
reported from the field:

 * BTRFS compression is disabled by the use of posix_fallocate()
 * XFS could produce spurious ENOSPC errors in some Linux kernel
   versions, though that problem is reported to have been fixed

The default is file_extend_method=posix_fallocate if available, as
before.  The write_zeros option is similar to PostgreSQL < 16, except
that now it's multi-block.

Backpatch-through: 16
Reviewed-by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reported-by: Dimitrios Apostolou <jimis@gmx.net>
Discussion: https://postgr.es/m/b1843124-fd22-e279-a31f-252dffb6fbf2%40gmx.net
This commit is contained in:
Thomas Munro 2025-05-31 22:50:22 +12:00
parent e35add48cc
commit f94e9141a0
7 changed files with 87 additions and 5 deletions

View file

@ -2412,6 +2412,43 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
<varlistentry id="guc-file-extend-method" xreflabel="file_extend_method">
<term><varname>file_extend_method</varname> (<type>enum</type>)
<indexterm>
<primary><varname>file_extend_method</varname> configuration parameter</primary>
</indexterm>
</term>
<listitem>
<para>
Specifies the method used to extend data files during bulk operations
such as <command>COPY</command>. The first available option is used as
the default, depending on the operating system:
<itemizedlist>
<listitem>
<para>
<literal>posix_fallocate</literal> (Unix) uses the standard POSIX
interface for allocating disk space, but is missing on some systems.
If it is present but the underlying file system doesn't support it,
this option silently falls back to <literal>write_zeros</literal>.
Current versions of BTRFS are known to disable compression when
this option is used.
This is the default on systems that have the function.
</para>
</listitem>
<listitem>
<para>
<literal>write_zeros</literal> extends files by writing out blocks
of zero bytes. This is the default on systems that don't have the
function <function>posix_fallocate</function>.
</para>
</listitem>
</itemizedlist>
The <literal>write_zeros</literal> method is always used when data
files are extended by 8 blocks or fewer.
</para>
</listitem>
</varlistentry>
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
<indexterm>

View file

@ -164,6 +164,9 @@ bool data_sync_retry = false;
/* How SyncDataDirectory() should do its job. */
int recovery_init_sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* How data files should be bulk-extended with zeros. */
int file_extend_method = DEFAULT_FILE_EXTEND_METHOD;
/* Which kinds of files should be opened with PG_O_DIRECT. */
int io_direct_flags;

View file

@ -602,13 +602,24 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
* that decision should be made though? For now just use a cutoff of
* 8, anything between 4 and 8 worked OK in some local testing.
*/
if (numblocks > 8)
if (numblocks > 8 &&
file_extend_method != FILE_EXTEND_METHOD_WRITE_ZEROS)
{
int ret;
int ret = 0;
ret = FileFallocate(v->mdfd_vfd,
seekpos, (pgoff_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
#ifdef HAVE_POSIX_FALLOCATE
if (file_extend_method == FILE_EXTEND_METHOD_POSIX_FALLOCATE)
{
ret = FileFallocate(v->mdfd_vfd,
seekpos, (pgoff_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
}
else
#endif
{
elog(ERROR, "unsupported file_extend_method: %d",
file_extend_method);
}
if (ret != 0)
{
ereport(ERROR,

View file

@ -1042,6 +1042,13 @@
options => 'file_copy_method_options',
},
{ name => 'file_extend_method', type => 'enum', context => 'PGC_SIGHUP', group => 'RESOURCES_DISK',
short_desc => 'Selects the method used for extending data files.',
variable => 'file_extend_method',
boot_val => 'DEFAULT_FILE_EXTEND_METHOD',
options => 'file_extend_method_options',
},
{ name => 'from_collapse_limit', type => 'int', context => 'PGC_USERSET', group => 'QUERY_TUNING_OTHER',
short_desc => 'Sets the FROM-list size beyond which subqueries are not collapsed.',
long_desc => 'The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.',

View file

@ -80,6 +80,7 @@
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/io_worker.h"
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
@ -491,6 +492,14 @@ static const struct config_enum_entry file_copy_method_options[] = {
{NULL, 0, false}
};
static const struct config_enum_entry file_extend_method_options[] = {
#ifdef HAVE_POSIX_FALLOCATE
{"posix_fallocate", FILE_EXTEND_METHOD_POSIX_FALLOCATE, false},
#endif
{"write_zeros", FILE_EXTEND_METHOD_WRITE_ZEROS, false},
{NULL, 0, false}
};
/*
* Options for enum values stored in other modules
*/

View file

@ -179,6 +179,10 @@
# in kilobytes, or -1 for no limit
#file_copy_method = copy # copy, clone (if supported by OS)
#file_extend_method = posix_fallocate # the default is the first option supported
# by the operating system:
# posix_fallocate (most Unix-like systems)
# write_zeros
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
# for NOTIFY / LISTEN queue

View file

@ -55,12 +55,23 @@ typedef int File;
#define IO_DIRECT_WAL 0x02
#define IO_DIRECT_WAL_INIT 0x04
enum FileExtendMethod
{
#ifdef HAVE_POSIX_FALLOCATE
FILE_EXTEND_METHOD_POSIX_FALLOCATE,
#endif
FILE_EXTEND_METHOD_WRITE_ZEROS,
};
/* Default to the first available file_extend_method. */
#define DEFAULT_FILE_EXTEND_METHOD 0
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
extern PGDLLIMPORT bool data_sync_retry;
extern PGDLLIMPORT int recovery_init_sync_method;
extern PGDLLIMPORT int io_direct_flags;
extern PGDLLIMPORT int file_extend_method;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()