diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 263bc73f11e..8099b0d021f 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -1031,7 +1031,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) zerobuf.data, true); - PageSetChecksumInplace(page, lastblock); + PageSetChecksum(page, lastblock); smgrextend(RelationGetSmgr(rel), MAIN_FORKNUM, lastblock, zerobuf.data, false); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index e4a819efeeb..f2e10b82b7d 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -252,9 +252,9 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags) Assert(begininsert_called); /* - * Ordinarily, buffer should be exclusive-locked and marked dirty before - * we get here, otherwise we could end up violating one of the rules in - * access/transam/README. + * Ordinarily, the buffer should be exclusive-locked (or share-exclusive + * in case of hint bits) and marked dirty before we get here, otherwise we + * could end up violating one of the rules in access/transam/README. * * Some callers intentionally register a clean page and never update that * page's LSN; in that case they can pass the flag REGBUF_NO_CHANGE to @@ -262,8 +262,11 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags) */ #ifdef USE_ASSERT_CHECKING if (!(flags & REGBUF_NO_CHANGE)) - Assert(BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_EXCLUSIVE) && - BufferIsDirty(buffer)); + { + Assert(BufferIsDirty(buffer)); + Assert(BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_EXCLUSIVE) || + BufferIsLockedByMeInMode(buffer, BUFFER_LOCK_SHARE_EXCLUSIVE)); + } #endif if (block_id >= max_registered_block_id) @@ -1123,12 +1126,6 @@ XLogCheckBufferNeedsBackup(Buffer buffer) * buffer. The buffer already needs to have been marked dirty by * MarkBufferDirtyHint(). * - * We can't use the plain backup block mechanism since that relies on the - * Buffer being exclusively locked. Since some modifications (setting LSN, hint - * bits) are allowed in a sharelocked buffer that can lead to wal checksum - * failures. So instead we copy the page and insert the copied data as normal - * record data. - * * We only need to do something if page has not yet been full page written in * this checkpoint round. The LSN of the inserted wal record is returned if we * had to write, InvalidXLogRecPtr otherwise. @@ -1164,37 +1161,13 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) if (lsn <= RedoRecPtr) { int flags = 0; - PGAlignedBlock copied_buffer; - char *origdata = (char *) BufferGetBlock(buffer); - RelFileLocator rlocator; - ForkNumber forkno; - BlockNumber blkno; - - /* - * Copy buffer so we don't have to worry about concurrent hint bit or - * lsn updates. We assume pd_lower/upper cannot be changed without an - * exclusive lock, so the contents bkp are not racy. - */ - if (buffer_std) - { - /* Assume we can omit data between pd_lower and pd_upper */ - Page page = BufferGetPage(buffer); - uint16 lower = ((PageHeader) page)->pd_lower; - uint16 upper = ((PageHeader) page)->pd_upper; - - memcpy(copied_buffer.data, origdata, lower); - memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper); - } - else - memcpy(copied_buffer.data, origdata, BLCKSZ); XLogBeginInsert(); if (buffer_std) flags |= REGBUF_STANDARD; - BufferGetTag(buffer, &rlocator, &forkno, &blkno); - XLogRegisterBlock(0, &rlocator, forkno, blkno, copied_buffer.data, flags); + XLogRegisterBuffer(0, buffer, flags); recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT); } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e212f6110f2..f4f07c9a1c0 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4440,7 +4440,6 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, ErrorContextCallback errcallback; instr_time io_start; Block bufBlock; - char *bufToWrite; Assert(BufferLockHeldByMeInMode(buf, BUFFER_LOCK_EXCLUSIVE) || BufferLockHeldByMeInMode(buf, BUFFER_LOCK_SHARE_EXCLUSIVE)); @@ -4503,22 +4502,15 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, */ bufBlock = BufHdrGetBlock(buf); - /* - * Update page checksum if desired. Since we have only shared lock on the - * buffer, other processes might be updating hint bits in it, so we must - * copy the page to private storage if we do checksumming. - */ - bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); + /* Update page checksum if desired. */ + PageSetChecksum((Page) bufBlock, buf->tag.blockNum); io_start = pgstat_prepare_io_time(track_io_timing); - /* - * bufToWrite is either the shared buffer or a copy, as appropriate. - */ smgrwrite(reln, BufTagGetForkNum(&buf->tag), buf->tag.blockNum, - bufToWrite, + bufBlock, false); /* diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 404c6bccbdd..b69398c6375 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -199,7 +199,7 @@ FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln) reln = smgropen(BufTagGetRelFileLocator(&bufHdr->tag), MyProcNumber); - PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); + PageSetChecksum(localpage, bufHdr->tag.blockNum); io_start = pgstat_prepare_io_time(track_io_timing); diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index de85911e3ac..56f1f7ae9fc 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -1492,53 +1492,20 @@ PageIndexTupleOverwrite(Page page, OffsetNumber offnum, /* - * Set checksum for a page in shared buffers. + * Set checksum on a page. * - * If checksums are disabled, or if the page is not initialized, just return - * the input. Otherwise, we must make a copy of the page before calculating - * the checksum, to prevent concurrent modifications (e.g. setting hint bits) - * from making the final checksum invalid. It doesn't matter if we include or - * exclude hints during the copy, as long as we write a valid page and - * associated checksum. + * If the page is in shared buffers, it needs to be locked in at least + * share-exclusive mode. * - * Returns a pointer to the block-sized data that needs to be written. Uses - * statically-allocated memory, so the caller must immediately write the - * returned page and not refer to it again. - */ -char * -PageSetChecksumCopy(Page page, BlockNumber blkno) -{ - static char *pageCopy = NULL; - - /* If we don't need a checksum, just return the passed-in data */ - if (PageIsNew(page) || !DataChecksumsEnabled()) - return page; - - /* - * We allocate the copy space once and use it over on each subsequent - * call. The point of palloc'ing here, rather than having a static char - * array, is first to ensure adequate alignment for the checksumming code - * and second to avoid wasting space in processes that never call this. - */ - if (pageCopy == NULL) - pageCopy = MemoryContextAllocAligned(TopMemoryContext, - BLCKSZ, - PG_IO_ALIGN_SIZE, - 0); - - memcpy(pageCopy, page, BLCKSZ); - ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno); - return pageCopy; -} - -/* - * Set checksum for a page in private memory. + * If checksums are disabled, or if the page is not initialized, just + * return. Otherwise compute and set the checksum. * - * This must only be used when we know that no other process can be modifying - * the page buffer. + * In the past this needed to be done on a copy of the page, due to the + * possibility of e.g., hint bits being set concurrently. However, this is not + * necessary anymore as hint bits won't be set while IO is going on. */ void -PageSetChecksumInplace(Page page, BlockNumber blkno) +PageSetChecksum(Page page, BlockNumber blkno) { /* If we don't need a checksum, just return */ if (PageIsNew(page) || !DataChecksumsEnabled()) diff --git a/src/backend/storage/smgr/bulk_write.c b/src/backend/storage/smgr/bulk_write.c index 36b28824ec8..f3c24082a69 100644 --- a/src/backend/storage/smgr/bulk_write.c +++ b/src/backend/storage/smgr/bulk_write.c @@ -279,7 +279,7 @@ smgr_bulk_flush(BulkWriteState *bulkstate) BlockNumber blkno = pending_writes[i].blkno; Page page = pending_writes[i].buf->data; - PageSetChecksumInplace(page, blkno); + PageSetChecksum(page, blkno); if (blkno >= bulkstate->relsize) { diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 3de58ba4312..e5267b93fe6 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -537,7 +537,6 @@ extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems); extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum); extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, const void *newtup, Size newsize); -extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); -extern void PageSetChecksumInplace(Page page, BlockNumber blkno); +extern void PageSetChecksum(Page page, BlockNumber blkno); #endif /* BUFPAGE_H */ diff --git a/src/test/modules/test_aio/test_aio.c b/src/test/modules/test_aio/test_aio.c index b1aa8af9ec0..2ae4a559fab 100644 --- a/src/test/modules/test_aio/test_aio.c +++ b/src/test/modules/test_aio/test_aio.c @@ -288,7 +288,7 @@ modify_rel_block(PG_FUNCTION_ARGS) } else { - PageSetChecksumInplace(page, blkno); + PageSetChecksum(page, blkno); } smgrwrite(RelationGetSmgr(rel),