diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f74d7a2ab1a..b20c6a2886b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4723,8 +4723,6 @@ SetDataChecksumsOnInProgress(void) { uint64 barrier; - Assert(ControlFile != NULL); - /* * The state transition is performed in a critical section with * checkpoints held off to provide crash safety. @@ -4738,25 +4736,16 @@ SetDataChecksumsOnInProgress(void) XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON; SpinLockRelease(&XLogCtl->info_lck); - barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); - - MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; - END_CRIT_SECTION(); - - /* - * Update the controlfile before waiting since if we have an immediate - * shutdown while waiting we want to come back up with checksums enabled. - */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON; UpdateControlFile(); LWLockRelease(ControlFileLock); - /* - * Await state change in all backends to ensure that all backends are in - * "inprogress-on". Once done we know that all backends are writing data - * checksums. - */ + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); + + MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); + WaitForProcSignalBarrier(barrier); } @@ -4787,8 +4776,6 @@ SetDataChecksumsOn(void) { uint64 barrier; - Assert(ControlFile != NULL); - SpinLockAcquire(&XLogCtl->info_lck); /* @@ -4818,11 +4805,6 @@ SetDataChecksumsOn(void) XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_VERSION; SpinLockRelease(&XLogCtl->info_lck); - barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON); - - MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; - END_CRIT_SECTION(); - /* * Update the controlfile before waiting since if we have an immediate * shutdown while waiting we want to come back up with checksums enabled. @@ -4832,12 +4814,12 @@ SetDataChecksumsOn(void) UpdateControlFile(); LWLockRelease(ControlFileLock); - RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_ON); - /* - * Await state transition to "on" in all backends. When done we know that - * data checksums are both written and verified in all backends. - */ + MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); + + RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); WaitForProcSignalBarrier(barrier); } @@ -4859,8 +4841,6 @@ SetDataChecksumsOff(void) { uint64 barrier; - Assert(ControlFile != NULL); - SpinLockAcquire(&XLogCtl->info_lck); /* If data checksums are already disabled there is nothing to do */ @@ -4891,22 +4871,17 @@ SetDataChecksumsOff(void) XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF; SpinLockRelease(&XLogCtl->info_lck); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; END_CRIT_SECTION(); - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - ControlFile->data_checksum_version = PG_DATA_CHECKSUM_OFF; - UpdateControlFile(); - LWLockRelease(ControlFileLock); - RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); - - /* - * Update local state in all backends to ensure that any backend in - * "on" state is changed to "inprogress-off". - */ WaitForProcSignalBarrier(barrier); /* @@ -4935,18 +4910,17 @@ SetDataChecksumsOff(void) XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_OFF; SpinLockRelease(&XLogCtl->info_lck); - barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF); - - MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; - END_CRIT_SECTION(); - LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->data_checksum_version = PG_DATA_CHECKSUM_OFF; UpdateControlFile(); LWLockRelease(ControlFileLock); - RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); + barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_OFF); + MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + END_CRIT_SECTION(); + + RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | CHECKPOINT_FAST); WaitForProcSignalBarrier(barrier); } @@ -4961,6 +4935,7 @@ SetDataChecksumsOff(void) void InitLocalDataChecksumState(void) { + Assert(InterruptHoldoffCount > 0); SpinLockAcquire(&XLogCtl->info_lck); SetLocalDataChecksumState(XLogCtl->data_checksum_version); SpinLockRelease(&XLogCtl->info_lck); @@ -5427,7 +5402,6 @@ XLOGShmemInit(void *arg) /* Use the checksum info from control file */ XLogCtl->data_checksum_version = ControlFile->data_checksum_version; - SetLocalDataChecksumState(XLogCtl->data_checksum_version); SpinLockInit(&XLogCtl->Insert.insertpos_lck); @@ -6650,6 +6624,7 @@ StartupXLOG(void) ControlFile->state = DB_IN_PRODUCTION; SpinLockAcquire(&XLogCtl->info_lck); + ControlFile->data_checksum_version = XLogCtl->data_checksum_version; XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE; SpinLockRelease(&XLogCtl->info_lck); @@ -7518,7 +7493,9 @@ CreateCheckPoint(int flags) * Get the current data_checksum_version value from xlogctl, valid at the * time of the checkpoint. */ + SpinLockAcquire(&XLogCtl->info_lck); checkPoint.dataChecksumState = XLogCtl->data_checksum_version; + SpinLockRelease(&XLogCtl->info_lck); if (shutdown) { @@ -7639,10 +7616,6 @@ CreateCheckPoint(int flags) checkPoint.nextOid += TransamVariables->oidCount; LWLockRelease(OidGenLock); - SpinLockAcquire(&XLogCtl->info_lck); - checkPoint.dataChecksumState = XLogCtl->data_checksum_version; - SpinLockRelease(&XLogCtl->info_lck); - checkPoint.logicalDecodingEnabled = IsLogicalDecodingEnabled(); MultiXactGetCheckptMulti(shutdown, @@ -7792,9 +7765,6 @@ CreateCheckPoint(int flags) ControlFile->minRecoveryPoint = InvalidXLogRecPtr; ControlFile->minRecoveryPointTLI = 0; - /* make sure we start with the checksum version as of the checkpoint */ - ControlFile->data_checksum_version = checkPoint.dataChecksumState; - /* * Persist unloggedLSN value. It's reset on crash recovery, so this goes * unused on non-shutdown checkpoints, but seems useful to store it always @@ -8871,11 +8841,6 @@ xlog_redo(XLogReaderState *record) MultiXactAdvanceOldest(checkPoint.oldestMulti, checkPoint.oldestMultiDB); - SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = checkPoint.dataChecksumState; - SetLocalDataChecksumState(checkPoint.dataChecksumState); - SpinLockRelease(&XLogCtl->info_lck); - /* * No need to set oldestClogXid here as well; it'll be set when we * redo an xl_clog_truncate if it changed since initialization. @@ -8936,6 +8901,8 @@ xlog_redo(XLogReaderState *record) LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; ControlFile->data_checksum_version = checkPoint.dataChecksumState; + + UpdateControlFile(); LWLockRelease(ControlFileLock); /* @@ -8962,8 +8929,6 @@ xlog_redo(XLogReaderState *record) { CheckPoint checkPoint; TimeLineID replayTLI; - bool new_state = false; - int old_state; memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint)); /* In an ONLINE checkpoint, treat the XID counter as a minimum */ @@ -9002,8 +8967,6 @@ xlog_redo(XLogReaderState *record) /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; - old_state = ControlFile->data_checksum_version; - ControlFile->data_checksum_version = checkPoint.dataChecksumState; LWLockRelease(ControlFileLock); /* TLI should not change in an on-line checkpoint */ @@ -9015,18 +8978,6 @@ xlog_redo(XLogReaderState *record) RecoveryRestartPoint(&checkPoint, record); - /* - * If the data checksum state change we need to emit a barrier. - */ - SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = checkPoint.dataChecksumState; - if (checkPoint.dataChecksumState != old_state) - new_state = true; - SpinLockRelease(&XLogCtl->info_lck); - - if (new_state) - EmitAndWaitDataChecksumsBarrier(checkPoint.dataChecksumState); - /* * After replaying a checkpoint record, free all smgr objects. * Otherwise we would never do so for dropped relations, as the @@ -9195,6 +9146,7 @@ xlog_redo(XLogReaderState *record) SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->data_checksum_version = redo_rec.data_checksum_version; + SetLocalDataChecksumState(redo_rec.data_checksum_version); if (redo_rec.data_checksum_version != ControlFile->data_checksum_version) new_state = true; SpinLockRelease(&XLogCtl->info_lck); @@ -9268,6 +9220,11 @@ xlog2_redo(XLogReaderState *record) XLogCtl->data_checksum_version = state.new_checksum_state; SpinLockRelease(&XLogCtl->info_lck); + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->data_checksum_version = state.new_checksum_state; + UpdateControlFile(); + LWLockRelease(ControlFileLock); + /* * Block on a procsignalbarrier to await all processes having seen the * change to checksum status. Once the barrier has been passed we can diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c index 8fdc518b3a1..ba8c9add67a 100644 --- a/src/backend/postmaster/auxprocess.c +++ b/src/backend/postmaster/auxprocess.c @@ -68,6 +68,14 @@ AuxiliaryProcessMainCommon(void) BaseInit(); + /* + * Prevent consuming interrupts between setting ProcSignalInit and setting + * the initial local data checksum value. If a barrier is emitted, and + * absorbed, before local cached state is initialized the state transition + * can be invalid. + */ + HOLD_INTERRUPTS(); + ProcSignalInit(NULL, 0); /* @@ -88,6 +96,8 @@ AuxiliaryProcessMainCommon(void) */ InitLocalDataChecksumState(); + RESUME_INTERRUPTS(); + /* * Auxiliary processes don't run transactions, but they may need a * resource owner anyway to manage buffer pins acquired outside diff --git a/src/backend/postmaster/datachecksum_state.c b/src/backend/postmaster/datachecksum_state.c index 77d0316b5cb..4a8aa5b5ee2 100644 --- a/src/backend/postmaster/datachecksum_state.c +++ b/src/backend/postmaster/datachecksum_state.c @@ -263,8 +263,8 @@ static const ChecksumBarrierCondition checksum_barriers[7] = {PG_DATA_CHECKSUM_INPROGRESS_OFF, PG_DATA_CHECKSUM_VERSION}, /* - * If checksums are being enabled when launcher_exit is executed, state - * is set to off since we cannot reach on at that point. + * If checksums are being enabled when launcher_exit is executed, state is + * set to off since we cannot reach on at that point. */ {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_INPROGRESS_OFF}, }; @@ -1291,8 +1291,8 @@ DataChecksumsShmemRequest(void *arg) /* * DatabaseExists * - * Scans the system catalog to check if a database with the given Oid exist - * and returns true if it is found, else false. + * Scans the system catalog to check if a database with the given Oid exists + * and returns true if it is found else false. */ static bool DatabaseExists(Oid dboid) diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 6f074013aa9..ecf78b9a986 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -756,6 +756,14 @@ InitPostgres(const char *in_dbname, Oid dboid, */ SharedInvalBackendInit(false); + /* + * Prevent consuming interrupts between setting ProcSignalInit and setting + * the initial local data checksum value. If a barrier is emitted, and + * absorbed, before local cached state is initialized the state transition + * can be invalid. + */ + HOLD_INTERRUPTS(); + ProcSignalInit(MyCancelKey, MyCancelKeyLength); /* @@ -776,6 +784,8 @@ InitPostgres(const char *in_dbname, Oid dboid, */ InitLocalDataChecksumState(); + RESUME_INTERRUPTS(); + /* * Also set up timeout handlers needed for backend operation. We need * these in every case except bootstrap.