1996-08-27 21:59:28 -04:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
*
|
1999-02-13 18:22:53 -05:00
|
|
|
* proc.h
|
2000-11-28 18:27:57 -05:00
|
|
|
* per-process shared memory data structures
|
1996-08-27 21:59:28 -04:00
|
|
|
*
|
|
|
|
|
*
|
2016-01-02 13:33:40 -05:00
|
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
2000-01-26 00:58:53 -05:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-08-27 21:59:28 -04:00
|
|
|
*
|
2010-09-20 16:08:53 -04:00
|
|
|
* src/include/storage/proc.h
|
1996-08-27 21:59:28 -04:00
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
#ifndef _PROC_H_
|
|
|
|
|
#define _PROC_H_
|
|
|
|
|
|
2012-06-25 17:45:15 -04:00
|
|
|
#include "access/xlogdefs.h"
|
2014-12-25 11:24:30 -05:00
|
|
|
#include "lib/ilist.h"
|
2011-03-06 17:49:16 -05:00
|
|
|
#include "storage/latch.h"
|
1999-07-15 19:04:24 -04:00
|
|
|
#include "storage/lock.h"
|
2002-05-04 20:03:29 -04:00
|
|
|
#include "storage/pg_sema.h"
|
2016-08-15 18:09:55 -04:00
|
|
|
#include "storage/proclist_types.h"
|
2000-05-30 20:28:42 -04:00
|
|
|
|
2004-08-01 13:32:22 -04:00
|
|
|
/*
|
|
|
|
|
* Each backend advertises up to PGPROC_MAX_CACHED_SUBXIDS TransactionIds
|
2014-05-06 12:12:18 -04:00
|
|
|
* for non-aborted subtransactions of its current top transaction. These
|
2004-08-01 13:32:22 -04:00
|
|
|
* have to be treated as running XIDs by other backends.
|
|
|
|
|
*
|
|
|
|
|
* We also keep track of whether the cache overflowed (ie, the transaction has
|
|
|
|
|
* generated at least one subtransaction that didn't fit in the cache).
|
|
|
|
|
* If none of the caches have overflowed, we can assume that an XID that's not
|
|
|
|
|
* listed anywhere in the PGPROC array is not a running transaction. Else we
|
|
|
|
|
* have to look at pg_subtrans.
|
|
|
|
|
*/
|
2004-08-29 01:07:03 -04:00
|
|
|
#define PGPROC_MAX_CACHED_SUBXIDS 64 /* XXX guessed-at value */
|
2004-08-01 13:32:22 -04:00
|
|
|
|
2004-08-29 01:07:03 -04:00
|
|
|
struct XidCache
|
|
|
|
|
{
|
|
|
|
|
TransactionId xids[PGPROC_MAX_CACHED_SUBXIDS];
|
2004-08-01 13:32:22 -04:00
|
|
|
};
|
|
|
|
|
|
2012-05-14 03:22:44 -04:00
|
|
|
/* Flags for PGXACT->vacuumFlags */
|
2007-10-24 16:55:36 -04:00
|
|
|
#define PROC_IS_AUTOVACUUM 0x01 /* is it an autovac worker? */
|
|
|
|
|
#define PROC_IN_VACUUM 0x02 /* currently running lazy vacuum */
|
|
|
|
|
#define PROC_IN_ANALYZE 0x04 /* currently running analyze */
|
2014-05-06 12:12:18 -04:00
|
|
|
#define PROC_VACUUM_FOR_WRAPAROUND 0x08 /* set by autovac only */
|
|
|
|
|
#define PROC_IN_LOGICAL_DECODING 0x10 /* currently doing logical
|
2014-12-02 17:42:26 -05:00
|
|
|
* decoding outside xact */
|
2007-10-24 16:55:36 -04:00
|
|
|
|
|
|
|
|
/* flags reset at EOXact */
|
Introduce logical decoding.
This feature, building on previous commits, allows the write-ahead log
stream to be decoded into a series of logical changes; that is,
inserts, updates, and deletes and the transactions which contain them.
It is capable of handling decoding even across changes to the schema
of the effected tables. The output format is controlled by a
so-called "output plugin"; an example is included. To make use of
this in a real replication system, the output plugin will need to be
modified to produce output in the format appropriate to that system,
and to perform filtering.
Currently, information can be extracted from the logical decoding
system only via SQL; future commits will add the ability to stream
changes via walsender.
Andres Freund, with review and other contributions from many other
people, including Álvaro Herrera, Abhijit Menon-Sen, Peter Gheogegan,
Kevin Grittner, Robert Haas, Heikki Linnakangas, Fujii Masao, Abhijit
Menon-Sen, Michael Paquier, Simon Riggs, Craig Ringer, and Steve
Singer.
2014-03-03 16:32:18 -05:00
|
|
|
#define PROC_VACUUM_STATE_MASK \
|
|
|
|
|
(PROC_IN_VACUUM | PROC_IN_ANALYZE | PROC_VACUUM_FOR_WRAPAROUND)
|
2007-10-24 16:55:36 -04:00
|
|
|
|
2011-05-28 19:52:00 -04:00
|
|
|
/*
|
|
|
|
|
* We allow a small number of "weak" relation locks (AccesShareLock,
|
|
|
|
|
* RowShareLock, RowExclusiveLock) to be recorded in the PGPROC structure
|
|
|
|
|
* rather than the main lock table. This eases contention on the lock
|
|
|
|
|
* manager LWLocks. See storage/lmgr/README for additional details.
|
|
|
|
|
*/
|
|
|
|
|
#define FP_LOCK_SLOTS_PER_BACKEND 16
|
|
|
|
|
|
2015-08-06 11:52:51 -04:00
|
|
|
/*
|
|
|
|
|
* An invalid pgprocno. Must be larger than the maximum number of PGPROC
|
|
|
|
|
* structures we could possibly have. See comments for MAX_BACKENDS.
|
|
|
|
|
*/
|
|
|
|
|
#define INVALID_PGPROCNO PG_INT32_MAX
|
|
|
|
|
|
1996-08-27 21:59:28 -04:00
|
|
|
/*
|
2002-06-11 09:40:53 -04:00
|
|
|
* Each backend has a PGPROC struct in shared memory. There is also a list of
|
|
|
|
|
* currently-unused PGPROC structs that will be reallocated to new backends.
|
2001-01-16 01:11:34 -05:00
|
|
|
*
|
2014-05-06 12:12:18 -04:00
|
|
|
* links: list link for any list the PGPROC is in. When waiting for a lock,
|
2002-06-11 09:40:53 -04:00
|
|
|
* the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
|
2001-01-16 01:11:34 -05:00
|
|
|
* is linked into ProcGlobal's freeProcs list.
|
2005-06-17 18:32:51 -04:00
|
|
|
*
|
|
|
|
|
* Note: twophase.c also sets up a dummy PGPROC struct for each currently
|
|
|
|
|
* prepared transaction. These PGPROCs appear in the ProcArray data structure
|
|
|
|
|
* so that the prepared transactions appear to be still running and are
|
|
|
|
|
* correctly shown as holding locks. A prepared transaction PGPROC can be
|
|
|
|
|
* distinguished from a real one at need by the fact that it has pid == 0.
|
2005-12-11 16:02:18 -05:00
|
|
|
* The semaphore and lock-activity fields in a prepared-xact PGPROC are unused,
|
|
|
|
|
* but its myProcLocks[] lists are valid.
|
1996-08-27 21:59:28 -04:00
|
|
|
*/
|
2002-06-11 09:40:53 -04:00
|
|
|
struct PGPROC
|
1997-09-07 01:04:48 -04:00
|
|
|
{
|
2001-01-22 17:30:06 -05:00
|
|
|
/* proc->links MUST BE FIRST IN STRUCT (see ProcSleep,ProcWakeup,etc) */
|
2001-01-16 01:11:34 -05:00
|
|
|
SHM_QUEUE links; /* list link if process is in a list */
|
2016-06-09 18:02:36 -04:00
|
|
|
PGPROC **procgloballist; /* procglobal list that owns this PGPROC */
|
2001-01-16 01:11:34 -05:00
|
|
|
|
2002-05-04 20:03:29 -04:00
|
|
|
PGSemaphoreData sem; /* ONE semaphore to sleep on */
|
2006-04-13 23:38:56 -04:00
|
|
|
int waitStatus; /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */
|
1996-08-27 21:59:28 -04:00
|
|
|
|
2011-08-10 12:20:30 -04:00
|
|
|
Latch procLatch; /* generic latch for process */
|
|
|
|
|
|
2007-09-05 14:10:48 -04:00
|
|
|
LocalTransactionId lxid; /* local id of top-level transaction currently
|
|
|
|
|
* being executed by this proc, if running;
|
|
|
|
|
* else InvalidLocalTransactionId */
|
2009-08-12 16:53:31 -04:00
|
|
|
int pid; /* Backend's process ID; 0 if prepared xact */
|
2011-11-25 08:02:10 -05:00
|
|
|
int pgprocno;
|
2009-08-12 16:53:31 -04:00
|
|
|
|
|
|
|
|
/* These fields are zero while a backend is still starting up: */
|
2007-09-05 14:10:48 -04:00
|
|
|
BackendId backendId; /* This backend's backend ID (if assigned) */
|
2001-09-29 00:02:27 -04:00
|
|
|
Oid databaseId; /* OID of database this backend is using */
|
2005-07-31 13:19:22 -04:00
|
|
|
Oid roleId; /* OID of role using this backend */
|
2001-09-29 00:02:27 -04:00
|
|
|
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-18 20:32:45 -05:00
|
|
|
/*
|
2010-01-16 05:05:59 -05:00
|
|
|
* While in hot standby mode, shows that a conflict signal has been sent
|
|
|
|
|
* for the current transaction. Set/cleared while holding ProcArrayLock,
|
|
|
|
|
* though not required. Accessed without lock, if needed.
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-18 20:32:45 -05:00
|
|
|
*/
|
2010-01-16 05:05:59 -05:00
|
|
|
bool recoveryConflictPending;
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-18 20:32:45 -05:00
|
|
|
|
2001-09-29 00:02:27 -04:00
|
|
|
/* Info about LWLock the process is currently waiting for, if any. */
|
|
|
|
|
bool lwWaiting; /* true if waiting for an LW lock */
|
Make group commit more effective.
When a backend needs to flush the WAL, and someone else is already flushing
the WAL, wait until it releases the WALInsertLock and check if we still need
to do the flush or if the other backend already did the work for us, before
acquiring WALInsertLock. This helps group commit, because when the WAL flush
finishes, all the backends that were waiting for it can be woken up in one
go, and the can all concurrently observe that they're done, rather than
waking them up one by one in a cascading fashion.
This is based on a new LWLock function, LWLockWaitUntilFree(), which has
peculiar semantics. If the lock is immediately free, it grabs the lock and
returns true. If it's not free, it waits until it is released, but then
returns false without grabbing the lock. This is used in XLogFlush(), so
that when the lock is acquired, the backend flushes the WAL, but if it's
not, the backend first checks the current flush location before retrying.
Original patch and benchmarking by Peter Geoghegan and Simon Riggs, although
this patch as committed ended up being very different from that.
2012-01-30 09:40:58 -05:00
|
|
|
uint8 lwWaitMode; /* lwlock mode being waited for */
|
2016-08-15 18:09:55 -04:00
|
|
|
proclist_node lwWaitLink; /* position in LW lock wait list */
|
2001-09-29 00:02:27 -04:00
|
|
|
|
2001-01-22 17:30:06 -05:00
|
|
|
/* Info about lock the process is currently waiting for, if any. */
|
2004-08-27 13:07:42 -04:00
|
|
|
/* waitLock and waitProcLock are NULL if not currently waiting. */
|
2000-12-21 19:51:54 -05:00
|
|
|
LOCK *waitLock; /* Lock object we're sleeping on ... */
|
2004-08-27 13:07:42 -04:00
|
|
|
PROCLOCK *waitProcLock; /* Per-holder info for awaited lock */
|
2000-12-21 19:51:54 -05:00
|
|
|
LOCKMODE waitLockMode; /* type of lock we're waiting for */
|
2005-10-14 22:49:52 -04:00
|
|
|
LOCKMASK heldLocks; /* bitmask for lock types already held on this
|
|
|
|
|
* lock object by this backend */
|
2000-12-21 19:51:54 -05:00
|
|
|
|
2011-03-06 17:49:16 -05:00
|
|
|
/*
|
|
|
|
|
* Info to allow us to wait for synchronous replication, if needed.
|
|
|
|
|
* waitLSN is InvalidXLogRecPtr if not waiting; set only by user backend.
|
|
|
|
|
* syncRepState must not be touched except by owning process or WALSender.
|
2011-03-10 15:56:18 -05:00
|
|
|
* syncRepLinks used only while holding SyncRepLock.
|
2011-03-06 17:49:16 -05:00
|
|
|
*/
|
2011-04-10 11:42:00 -04:00
|
|
|
XLogRecPtr waitLSN; /* waiting for this LSN or higher */
|
|
|
|
|
int syncRepState; /* wait state for sync rep */
|
|
|
|
|
SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */
|
2011-03-06 17:49:16 -05:00
|
|
|
|
2005-12-11 16:02:18 -05:00
|
|
|
/*
|
|
|
|
|
* All PROCLOCK objects for locks held or awaited by this backend are
|
|
|
|
|
* linked into one of these lists, according to the partition number of
|
|
|
|
|
* their lock.
|
|
|
|
|
*/
|
|
|
|
|
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
|
2004-08-01 13:32:22 -04:00
|
|
|
|
2004-08-29 01:07:03 -04:00
|
|
|
struct XidCache subxids; /* cache for subtransaction XIDs */
|
2011-05-28 19:52:00 -04:00
|
|
|
|
2015-08-06 11:52:51 -04:00
|
|
|
/* Support for group XID clearing. */
|
2016-02-11 08:55:24 -05:00
|
|
|
/* true, if member of ProcArray group waiting for XID clear */
|
2016-06-09 18:02:36 -04:00
|
|
|
bool procArrayGroupMember;
|
2016-02-11 08:55:24 -05:00
|
|
|
/* next ProcArray group member waiting for XID clear */
|
2016-06-09 18:02:36 -04:00
|
|
|
pg_atomic_uint32 procArrayGroupNext;
|
|
|
|
|
|
2016-02-11 08:55:24 -05:00
|
|
|
/*
|
|
|
|
|
* latest transaction id among the transaction's main XID and
|
|
|
|
|
* subtransactions
|
|
|
|
|
*/
|
2016-06-09 18:02:36 -04:00
|
|
|
TransactionId procArrayGroupMemberXid;
|
2015-08-06 11:52:51 -04:00
|
|
|
|
2016-06-09 18:02:36 -04:00
|
|
|
uint32 wait_event_info; /* proc's wait information */
|
2016-03-10 12:44:09 -05:00
|
|
|
|
2016-02-21 05:12:02 -05:00
|
|
|
/* Per-backend LWLock. Protects fields below (but not group fields). */
|
2016-01-29 08:10:47 -05:00
|
|
|
LWLock backendLock;
|
2011-05-28 19:52:00 -04:00
|
|
|
|
|
|
|
|
/* Lock manager data, recording fast-path locks taken by this backend. */
|
|
|
|
|
uint64 fpLockBits; /* lock modes held for each fast-path slot */
|
2012-06-10 15:20:04 -04:00
|
|
|
Oid fpRelId[FP_LOCK_SLOTS_PER_BACKEND]; /* slots for rel oids */
|
2011-08-04 12:38:33 -04:00
|
|
|
bool fpVXIDLock; /* are we holding a fast-path VXID lock? */
|
2012-06-10 15:20:04 -04:00
|
|
|
LocalTransactionId fpLocalTransactionId; /* lxid for fast-path VXID
|
|
|
|
|
* lock */
|
2016-02-07 10:16:13 -05:00
|
|
|
|
|
|
|
|
/*
|
2016-02-21 05:12:02 -05:00
|
|
|
* Support for lock groups. Use LockHashPartitionLockByProc on the group
|
|
|
|
|
* leader to get the LWLock protecting these fields.
|
2016-02-07 10:16:13 -05:00
|
|
|
*/
|
2016-02-21 05:12:02 -05:00
|
|
|
PGPROC *lockGroupLeader; /* lock group leader, if I'm a member */
|
2016-06-09 18:02:36 -04:00
|
|
|
dlist_head lockGroupMembers; /* list of members, if I'm a leader */
|
|
|
|
|
dlist_node lockGroupLink; /* my member link, if I'm a member */
|
2000-12-21 19:51:54 -05:00
|
|
|
};
|
|
|
|
|
|
2002-06-11 09:40:53 -04:00
|
|
|
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
|
2000-12-21 19:51:54 -05:00
|
|
|
|
1996-08-27 21:59:28 -04:00
|
|
|
|
2007-07-25 08:22:54 -04:00
|
|
|
extern PGDLLIMPORT PGPROC *MyProc;
|
2011-11-25 08:02:10 -05:00
|
|
|
extern PGDLLIMPORT struct PGXACT *MyPgXact;
|
|
|
|
|
|
|
|
|
|
/*
|
2012-01-29 18:56:35 -05:00
|
|
|
* Prior to PostgreSQL 9.2, the fields below were stored as part of the
|
2014-05-06 12:12:18 -04:00
|
|
|
* PGPROC. However, benchmarking revealed that packing these particular
|
2011-11-25 08:02:10 -05:00
|
|
|
* members into a separate array as tightly as possible sped up GetSnapshotData
|
|
|
|
|
* considerably on systems with many CPU cores, by reducing the number of
|
|
|
|
|
* cache lines needing to be fetched. Thus, think very carefully before adding
|
|
|
|
|
* anything else here.
|
|
|
|
|
*/
|
|
|
|
|
typedef struct PGXACT
|
|
|
|
|
{
|
|
|
|
|
TransactionId xid; /* id of top-level transaction currently being
|
|
|
|
|
* executed by this proc, if running and XID
|
|
|
|
|
* is assigned; else InvalidTransactionId */
|
|
|
|
|
|
|
|
|
|
TransactionId xmin; /* minimal running XID as it was when we were
|
|
|
|
|
* starting our xact, excluding LAZY VACUUM:
|
|
|
|
|
* vacuum must not remove tuples deleted by
|
|
|
|
|
* xid >= xmin ! */
|
|
|
|
|
|
|
|
|
|
uint8 vacuumFlags; /* vacuum-related flags, see above */
|
|
|
|
|
bool overflowed;
|
2013-03-16 23:22:17 -04:00
|
|
|
bool delayChkpt; /* true if this proc delays checkpoint start;
|
|
|
|
|
* previously called InCommit */
|
1996-08-27 21:59:28 -04:00
|
|
|
|
2011-11-25 08:02:10 -05:00
|
|
|
uint8 nxids;
|
|
|
|
|
} PGXACT;
|
2000-11-28 18:27:57 -05:00
|
|
|
|
|
|
|
|
/*
|
2005-12-11 16:02:18 -05:00
|
|
|
* There is one ProcGlobal struct for the whole database cluster.
|
2000-11-28 18:27:57 -05:00
|
|
|
*/
|
2001-09-29 00:02:27 -04:00
|
|
|
typedef struct PROC_HDR
|
2000-11-28 18:27:57 -05:00
|
|
|
{
|
2011-05-28 19:52:00 -04:00
|
|
|
/* Array of PGPROC structures (not including dummies for prepared txns) */
|
|
|
|
|
PGPROC *allProcs;
|
2012-05-02 14:30:58 -04:00
|
|
|
/* Array of PGXACT structures (not including dummies for prepared txns) */
|
2011-11-25 08:02:10 -05:00
|
|
|
PGXACT *allPgXact;
|
2011-05-28 19:52:00 -04:00
|
|
|
/* Length of allProcs array */
|
|
|
|
|
uint32 allProcCount;
|
2002-06-11 09:40:53 -04:00
|
|
|
/* Head of list of free PGPROC structures */
|
2008-11-02 16:24:52 -05:00
|
|
|
PGPROC *freeProcs;
|
2007-04-16 14:30:04 -04:00
|
|
|
/* Head of list of autovacuum's free PGPROC structures */
|
2008-11-02 16:24:52 -05:00
|
|
|
PGPROC *autovacFreeProcs;
|
Background worker processes
Background workers are postmaster subprocesses that run arbitrary
user-specified code. They can request shared memory access as well as
backend database connections; or they can just use plain libpq frontend
database connections.
Modules listed in shared_preload_libraries can register background
workers in their _PG_init() function; this is early enough that it's not
necessary to provide an extra GUC option, because the necessary extra
resources can be allocated early on. Modules can install more than one
bgworker, if necessary.
Care is taken that these extra processes do not interfere with other
postmaster tasks: only one such process is started on each ServerLoop
iteration. This means a large number of them could be waiting to be
started up and postmaster is still able to quickly service external
connection requests. Also, shutdown sequence should not be impacted by
a worker process that's reasonably well behaved (i.e. promptly responds
to termination signals.)
The current implementation lets worker processes specify their start
time, i.e. at what point in the server startup process they are to be
started: right after postmaster start (in which case they mustn't ask
for shared memory access), when consistent state has been reached
(useful during recovery in a HOT standby server), or when recovery has
terminated (i.e. when normal backends are allowed).
In case of a bgworker crash, actions to take depend on registration
data: if shared memory was requested, then all other connections are
taken down (as well as other bgworkers), just like it were a regular
backend crashing. The bgworker itself is restarted, too, within a
configurable timeframe (which can be configured to be never).
More features to add to this framework can be imagined without much
effort, and have been discussed, but this seems good enough as a useful
unit already.
An elementary sample module is supplied.
Author: Álvaro Herrera
This patch is loosely based on prior patches submitted by KaiGai Kohei,
and unsubmitted code by Simon Riggs.
Reviewed by: KaiGai Kohei, Markus Wanner, Andres Freund,
Heikki Linnakangas, Simon Riggs, Amit Kapila
2012-12-06 12:57:52 -05:00
|
|
|
/* Head of list of bgworker free PGPROC structures */
|
|
|
|
|
PGPROC *bgworkerFreeProcs;
|
2015-08-06 11:52:51 -04:00
|
|
|
/* First pgproc waiting for group XID clear */
|
2016-02-11 08:55:24 -05:00
|
|
|
pg_atomic_uint32 procArrayGroupFirst;
|
Reduce idle power consumption of walwriter and checkpointer processes.
This patch modifies the walwriter process so that, when it has not found
anything useful to do for many consecutive wakeup cycles, it extends its
sleep time to reduce the server's idle power consumption. It reverts to
normal as soon as it's done any successful flushes. It's still true that
during any async commit, backends check for completed, unflushed pages of
WAL and signal the walwriter if there are any; so that in practice the
walwriter can get awakened and returned to normal operation sooner than the
sleep time might suggest.
Also, improve the checkpointer so that it uses a latch and a computed delay
time to not wake up at all except when it has something to do, replacing a
previous hardcoded 0.5 sec wakeup cycle. This also is primarily useful for
reducing the server's power consumption when idle.
In passing, get rid of the dedicated latch for signaling the walwriter in
favor of using its procLatch, since that comports better with possible
generic signal handlers using that latch. Also, fix a pre-existing bug
with failure to save/restore errno in walwriter's signal handlers.
Peter Geoghegan, somewhat simplified by Tom
2012-05-08 20:03:26 -04:00
|
|
|
/* WALWriter process's latch */
|
|
|
|
|
Latch *walwriterLatch;
|
|
|
|
|
/* Checkpointer process's latch */
|
|
|
|
|
Latch *checkpointerLatch;
|
2005-10-11 16:41:32 -04:00
|
|
|
/* Current shared estimate of appropriate spins_per_delay value */
|
|
|
|
|
int spins_per_delay;
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-18 20:32:45 -05:00
|
|
|
/* The proc of the Startup process, since not in ProcArray */
|
|
|
|
|
PGPROC *startupProc;
|
|
|
|
|
int startupProcPid;
|
2011-08-02 13:23:52 -04:00
|
|
|
/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
|
2010-01-23 11:37:12 -05:00
|
|
|
int startupBufferPinWaitBufId;
|
2000-11-28 18:27:57 -05:00
|
|
|
} PROC_HDR;
|
|
|
|
|
|
2011-05-28 19:52:00 -04:00
|
|
|
extern PROC_HDR *ProcGlobal;
|
|
|
|
|
|
2011-11-25 08:02:10 -05:00
|
|
|
extern PGPROC *PreparedXactProcs;
|
|
|
|
|
|
2016-08-15 18:09:55 -04:00
|
|
|
/* Accessor for PGPROC given a pgprocno. */
|
|
|
|
|
#define GetPGProcByNumber(n) (&ProcGlobal->allProcs[(n)])
|
|
|
|
|
|
2006-01-04 16:06:32 -05:00
|
|
|
/*
|
2007-03-07 08:35:03 -05:00
|
|
|
* We set aside some extra PGPROC structures for auxiliary processes,
|
2006-01-04 16:06:32 -05:00
|
|
|
* ie things that aren't full-fledged backends but need shmem access.
|
2009-02-23 04:28:50 -05:00
|
|
|
*
|
2011-11-01 13:14:47 -04:00
|
|
|
* Background writer, checkpointer and WAL writer run during normal operation.
|
|
|
|
|
* Startup process and WAL receiver also consume 2 slots, but WAL writer is
|
|
|
|
|
* launched only after startup has exited, so we only need 4 slots.
|
2006-01-04 16:06:32 -05:00
|
|
|
*/
|
2011-11-01 13:14:47 -04:00
|
|
|
#define NUM_AUXILIARY_PROCS 4
|
2003-11-19 10:55:08 -05:00
|
|
|
|
|
|
|
|
|
2002-10-31 16:34:17 -05:00
|
|
|
/* configurable options */
|
2001-09-29 00:02:27 -04:00
|
|
|
extern int DeadlockTimeout;
|
2002-10-31 16:34:17 -05:00
|
|
|
extern int StatementTimeout;
|
2013-03-16 23:22:17 -04:00
|
|
|
extern int LockTimeout;
|
2016-03-16 11:30:45 -04:00
|
|
|
extern int IdleInTransactionSessionTimeout;
|
2007-11-15 16:14:46 -05:00
|
|
|
extern bool log_lock_waits;
|
2001-09-29 00:02:27 -04:00
|
|
|
|
|
|
|
|
|
1996-08-27 21:59:28 -04:00
|
|
|
/*
|
|
|
|
|
* Function Prototypes
|
|
|
|
|
*/
|
2005-06-17 18:32:51 -04:00
|
|
|
extern int ProcGlobalSemas(void);
|
2005-08-20 19:26:37 -04:00
|
|
|
extern Size ProcGlobalShmemSize(void);
|
2005-06-17 18:32:51 -04:00
|
|
|
extern void InitProcGlobal(void);
|
2000-11-28 18:27:57 -05:00
|
|
|
extern void InitProcess(void);
|
2006-01-04 16:06:32 -05:00
|
|
|
extern void InitProcessPhase2(void);
|
2007-03-07 08:35:03 -05:00
|
|
|
extern void InitAuxiliaryProcess(void);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-18 20:32:45 -05:00
|
|
|
|
|
|
|
|
extern void PublishStartupProcessInformation(void);
|
2010-01-23 11:37:12 -05:00
|
|
|
extern void SetStartupBufferPinWaitBufId(int bufid);
|
2010-02-25 21:01:40 -05:00
|
|
|
extern int GetStartupBufferPinWaitBufId(void);
|
Allow read only connections during recovery, known as Hot Standby.
Enabled by recovery_connections = on (default) and forcing archive recovery using a recovery.conf. Recovery processing now emulates the original transactions as they are replayed, providing full locking and MVCC behaviour for read only queries. Recovery must enter consistent state before connections are allowed, so there is a delay, typically short, before connections succeed. Replay of recovering transactions can conflict and in some cases deadlock with queries during recovery; these result in query cancellation after max_standby_delay seconds have expired. Infrastructure changes have minor effects on normal running, though introduce four new types of WAL record.
New test mode "make standbycheck" allows regression tests of static command behaviour on a standby server while in recovery. Typical and extreme dynamic behaviours have been checked via code inspection and manual testing. Few port specific behaviours have been utilised, though primary testing has been on Linux only so far.
This commit is the basic patch. Additional changes will follow in this release to enhance some aspects of behaviour, notably improved handling of conflicts, deadlock detection and query cancellation. Changes to VACUUM FULL are also required.
Simon Riggs, with significant and lengthy review by Heikki Linnakangas, including streamlined redesign of snapshot creation and two-phase commit.
Important contributions from Florian Pflug, Mark Kirkwood, Merlin Moncure, Greg Stark, Gianni Ciolli, Gabriele Bartolini, Hannu Krosing, Robert Haas, Tatsuo Ishii, Hiroyuki Yamada plus support and feedback from many other community members.
2009-12-18 20:32:45 -05:00
|
|
|
|
2005-06-17 18:32:51 -04:00
|
|
|
extern bool HaveNFreeProcs(int n);
|
2004-07-16 23:32:14 -04:00
|
|
|
extern void ProcReleaseLocks(bool isCommit);
|
1997-09-07 01:04:48 -04:00
|
|
|
|
1997-09-08 17:56:23 -04:00
|
|
|
extern void ProcQueueInit(PROC_QUEUE *queue);
|
2005-12-11 16:02:18 -05:00
|
|
|
extern int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
|
2004-07-16 23:32:14 -04:00
|
|
|
extern PGPROC *ProcWakeup(PGPROC *proc, int waitStatus);
|
Try to reduce confusion about what is a lock method identifier, a lock
method control structure, or a table of control structures.
. Use type LOCKMASK where an int is not a counter.
. Get rid of INVALID_TABLEID, use INVALID_LOCKMETHOD instead.
. Use INVALID_LOCKMETHOD instead of (LOCKMETHOD) NULL, because
LOCKMETHOD is not a pointer.
. Define and use macro LockMethodIsValid.
. Rename LOCKMETHOD to LOCKMETHODID.
. Remove global variable LongTermTableId in lmgr.c, because it is
never used.
. Make LockTableId static in lmgr.c, because it is used nowhere else.
Why not remove it and use DEFAULT_LOCKMETHOD?
. Rename the lock method control structure from LOCKMETHODTABLE to
LockMethodData. Introduce a pointer type named LockMethod.
. Remove elog(FATAL) after InitLockTable() call in
CreateSharedMemoryAndSemaphores(), because if something goes wrong,
there is elog(FATAL) in LockMethodTableInit(), and if this doesn't
help, an elog(ERROR) in InitLockTable() is promoted to FATAL.
. Make InitLockTable() void, because its only caller does not use its
return value any more.
. Rename variables in lock.c to avoid statements like
LockMethodTable[NumLockMethods] = lockMethodTable;
lockMethodTable = LockMethodTable[lockmethod];
. Change LOCKMETHODID type to uint16 to fit into struct LOCKTAG.
. Remove static variables BITS_OFF and BITS_ON from lock.c, because
I agree to this doubt:
* XXX is a fetch from a static array really faster than a shift?
. Define and use macros LOCKBIT_ON/OFF.
Manfred Koizar
2003-12-01 16:59:25 -05:00
|
|
|
extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
|
2015-02-03 17:24:38 -05:00
|
|
|
extern void CheckDeadLockAlert(void);
|
2010-02-12 20:32:20 -05:00
|
|
|
extern bool IsWaitingForLock(void);
|
2012-04-18 11:17:30 -04:00
|
|
|
extern void LockErrorCleanup(void);
|
1996-08-27 21:59:28 -04:00
|
|
|
|
2016-10-04 10:50:13 -04:00
|
|
|
extern void ProcWaitForSignal(uint32 wait_event_info);
|
2005-05-19 17:35:48 -04:00
|
|
|
extern void ProcSendSignal(int pid);
|
2001-07-06 17:04:26 -04:00
|
|
|
|
2016-02-07 10:16:13 -05:00
|
|
|
extern void BecomeLockGroupLeader(void);
|
|
|
|
|
extern bool BecomeLockGroupMember(PGPROC *leader, int pid);
|
|
|
|
|
|
2001-11-05 12:46:40 -05:00
|
|
|
#endif /* PROC_H */
|