From fee2a2fa39834d8d5eaa981298fce9d2ed31546d Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Mon, 9 Sep 2019 21:32:42 +0000 Subject: [PATCH] Change synchonization rules for vm_page reference counting. There are several mechanisms by which a vm_page reference is held, preventing the page from being freed back to the page allocator. In particular, holding the page's object lock is sufficient to prevent the page from being freed; holding the busy lock or a wiring is sufficent as well. These references are protected by the page lock, which must therefore be acquired for many per-page operations. This results in false sharing since the page locks are external to the vm_page structures themselves and each lock protects multiple structures. Transition to using an atomically updated per-page reference counter. The object's reference is counted using a flag bit in the counter. A second flag bit is used to atomically block new references via pmap_extract_and_hold() while removing managed mappings of a page. Thus, the reference count of a page is guaranteed not to increase if the page is unbusied, unmapped, and the object's write lock is held. As a consequence of this, the page lock no longer protects a page's identity; operations which move pages between objects are now synchronized solely by the objects' locks. The vm_page_wire() and vm_page_unwire() KPIs are changed. The former requires that either the object lock or the busy lock is held. The latter no longer has a return value and may free the page if it releases the last reference to that page. vm_page_unwire_noq() behaves the same as before; the caller is responsible for checking its return value and freeing or enqueuing the page as appropriate. vm_page_wire_mapped() is introduced for use in pmap_extract_and_hold(). It fails if the page is concurrently being unmapped, typically triggering a fallback to the fault handler. vm_page_wire() no longer requires the page lock and vm_page_unwire() now internally acquires the page lock when releasing the last wiring of a page (since the page lock still protects a page's queue state). In particular, synchronization details are no longer leaked into the caller. The change excises the page lock from several frequently executed code paths. In particular, vm_object_terminate() no longer bounces between page locks as it releases an object's pages, and direct I/O and sendfile(SF_NOCACHE) completions no longer require the page lock. In these latter cases we now get linear scalability in the common scenario where different threads are operating on different files. __FreeBSD_version is bumped. The DRM ports have been updated to accomodate the KPI changes. Reviewed by: jeff (earlier version) Tested by: gallatin (earlier version), pho Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D20486 --- share/man/man9/Makefile | 4 +- share/man/man9/vm_page_wire.9 | 52 +- sys/amd64/amd64/efirt_machdep.c | 2 +- sys/amd64/amd64/pmap.c | 26 +- sys/amd64/sgx/sgx.c | 2 - sys/amd64/vmm/vmm.c | 2 - sys/arm/arm/pmap-v4.c | 20 +- sys/arm/arm/pmap-v6.c | 15 +- sys/arm/nvidia/drm2/tegra_bo.c | 2 - sys/arm64/arm64/efirt_machdep.c | 2 +- sys/arm64/arm64/pmap.c | 10 +- .../opensolaris/uts/common/fs/zfs/zfs_vnops.c | 12 +- sys/compat/linuxkpi/common/include/linux/mm.h | 7 +- sys/compat/linuxkpi/common/src/linux_compat.c | 2 - sys/compat/linuxkpi/common/src/linux_page.c | 4 - .../interface/vchiq_arm/vchiq_2835_arm.c | 5 +- sys/dev/agp/agp.c | 6 +- sys/dev/agp/agp_i810.c | 2 - sys/dev/cxgbe/tom/t4_cpl_io.c | 5 - sys/dev/cxgbe/tom/t4_ddp.c | 2 - sys/dev/drm2/ttm/ttm_bo_vm.c | 4 - sys/dev/drm2/ttm/ttm_page_alloc.c | 2 +- sys/dev/drm2/ttm/ttm_tt.c | 2 - sys/dev/md/md.c | 2 - sys/dev/netmap/netmap_freebsd.c | 2 - sys/dev/xen/gntdev/gntdev.c | 6 +- sys/dev/xen/privcmd/privcmd.c | 6 +- sys/fs/tmpfs/tmpfs_subr.c | 3 +- sys/i386/i386/pmap.c | 25 +- sys/kern/kern_exec.c | 12 +- sys/kern/kern_kcov.c | 3 - sys/kern/kern_sendfile.c | 10 +- sys/kern/sys_process.c | 5 +- sys/kern/uipc_mbuf.c | 4 - sys/kern/uipc_shm.c | 7 - sys/mips/mips/pmap.c | 14 +- sys/net/bpf_zerocopy.c | 5 +- sys/powerpc/aim/mmu_oea.c | 9 +- sys/powerpc/aim/mmu_oea64.c | 10 +- sys/powerpc/booke/pmap.c | 14 +- sys/riscv/riscv/pmap.c | 9 +- sys/sparc64/sparc64/pmap.c | 14 +- sys/sys/param.h | 2 +- sys/vm/device_pager.c | 4 - sys/vm/vm_fault.c | 78 +-- sys/vm/vm_glue.c | 6 - sys/vm/vm_object.c | 69 +-- sys/vm/vm_page.c | 578 +++++++++++------- sys/vm/vm_page.h | 93 ++- sys/vm/vm_pageout.c | 141 ++++- sys/vm/vm_swapout.c | 22 +- 51 files changed, 693 insertions(+), 650 deletions(-) diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index b6ea0743cdd..51da0d4c3fe 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -2223,7 +2223,9 @@ MLINKS+=vm_map_lookup.9 vm_map_lookup_done.9 MLINKS+=vm_map_max.9 vm_map_min.9 \ vm_map_max.9 vm_map_pmap.9 MLINKS+=vm_map_stack.9 vm_map_growstack.9 -MLINKS+=vm_map_wire.9 vm_map_unwire.9 +MLINKS+=vm_map_wire.9 vm_map_wire_mapped.9 \ + vm_page_wire.9 vm_page_unwire.9 \ + vm_page_wire.9 vm_page_unwire_noq.9 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \ vm_page_bits.9 vm_page_dirty.9 \ vm_page_bits.9 vm_page_is_valid.9 \ diff --git a/share/man/man9/vm_page_wire.9 b/share/man/man9/vm_page_wire.9 index 4722f71bdc6..e3f03ab6b1f 100644 --- a/share/man/man9/vm_page_wire.9 +++ b/share/man/man9/vm_page_wire.9 @@ -26,12 +26,13 @@ .\" .\" $FreeBSD$ .\" -.Dd July 13, 2001 +.Dd September 9, 2019 .Dt VM_PAGE_WIRE 9 .Os .Sh NAME .Nm vm_page_wire , -.Nm vm_page_unwire +.Nm vm_page_unwire , +.Nm vm_page_unwire_noq .Nd "wire and unwire pages" .Sh SYNOPSIS .In sys/param.h @@ -39,29 +40,44 @@ .In vm/vm_page.h .Ft void .Fn vm_page_wire "vm_page_t m" +.Ft bool +.Fn vm_page_wire_mapped "vm_page_t m" .Ft void -.Fn vm_page_unwire "vm_page_t m" "int activate" +.Fn vm_page_unwire "vm_page_t m" "int queue" +.Ft bool +.Fn vm_page_unwire_noq "vm_page_t m" .Sh DESCRIPTION The .Fn vm_page_wire -function increments the wire count on a page, and removes it from -whatever queue it is on. +and +.Fn vm_page_wire_mapped +function wire the page, prevent it from being reclaimed by the page +daemon or when its containing object is destroyed. +Both functions require that the page belong to an object. +The +.Fn vm_page_wire_mapped +function is for use by the +.Xr pmap 9 +layer following a lookup. +This function may fail if mappings of the page are concurrently +being destroyed, in which case it will return false. .Pp The .Fn vm_page_unwire -function releases one of the wirings on the page. -When -.Va write_count -reaches zero the page is placed back onto either the active queue -(if -.Fa activate -is non-zero) or onto the inactive queue (if -.Fa activate -is zero). -If the page is unmanaged -.Dv ( PG_UNMANAGED -is set) then the page is left on -.Dv PQ_NONE . +and +.Fn vm_page_unwire_noq +functions release a wiring of a page. +The +.Fn vm_page_unwire +function takes a queue index and will insert the page into the +corresponding page queue upon releasing its last wiring. +If the page does not belong to an object and no other references +to the page exist, +.Fn vm_page_unwire +will free the page. +.Fn vm_page_unwire_noq +releases the wiring and returns true if it was the last wiring +of the page. .Sh AUTHORS This manual page was written by .An Chad David Aq Mt davidc@acns.ab.ca . diff --git a/sys/amd64/amd64/efirt_machdep.c b/sys/amd64/amd64/efirt_machdep.c index 80ffa66f5ec..f0e15a935d2 100644 --- a/sys/amd64/amd64/efirt_machdep.c +++ b/sys/amd64/amd64/efirt_machdep.c @@ -74,7 +74,7 @@ efi_destroy_1t1_map(void) if (obj_1t1_pt != NULL) { VM_OBJECT_RLOCK(obj_1t1_pt); TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) - m->wire_count = 0; + m->wire_count = VPRC_OBJREF; vm_wire_sub(obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); vm_object_deallocate(obj_1t1_pt); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f1dfc3d9981..f2ebee2ca55 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -3071,31 +3071,23 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); + PMAP_LOCK(pmap); -retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = *pmap_pde_to_pte(pdep, va); - if ((pte & PG_V) && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pte & PG_V) != 0 && + ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_wire(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/amd64/sgx/sgx.c b/sys/amd64/sgx/sgx.c index 3d45b60de3e..ea18c967423 100644 --- a/sys/amd64/sgx/sgx.c +++ b/sys/amd64/sgx/sgx.c @@ -357,9 +357,7 @@ sgx_page_remove(struct sgx_softc *sc, vm_page_t p) vm_paddr_t pa; uint64_t offs; - vm_page_lock(p); (void)vm_page_remove(p); - vm_page_unlock(p); dprintf("%s: p->pidx %ld\n", __func__, p->pindex); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 6dfc62659a3..15a6e661b6a 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1002,9 +1002,7 @@ vm_gpa_release(void *cookie) { vm_page_t m = cookie; - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); } int diff --git a/sys/arm/arm/pmap-v4.c b/sys/arm/arm/pmap-v4.c index 81117c629a9..e1f411ccc83 100644 --- a/sys/arm/arm/pmap-v4.c +++ b/sys/arm/arm/pmap-v4.c @@ -3415,14 +3415,14 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; - vm_paddr_t pa, paddr; - vm_page_t m = NULL; + vm_paddr_t pa; + vm_page_t m; u_int l1idx; + l1idx = L1_IDX(va); - paddr = 0; + m = NULL; PMAP_LOCK(pmap); -retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* @@ -3434,11 +3434,10 @@ retry: pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) - goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } else { /* @@ -3466,15 +3465,12 @@ retry: pa = (pte & L2_L_FRAME) | (va & L2_L_OFFSET); else pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PMAP_UNLOCK(pmap); - PA_UNLOCK_COND(paddr); return (m); } diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 3ef0a96ecb6..1d82ebf48cb 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -1986,23 +1986,20 @@ pmap_extract(pmap_t pmap, vm_offset_t va) vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - vm_paddr_t pa, lockpa; + vm_paddr_t pa; pt1_entry_t pte1; pt2_entry_t pte2, *pte2p; vm_page_t m; - lockpa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pte1 = pte1_load(pmap_pte1(pmap, va)); if (pte1_is_section(pte1)) { if (!(pte1 & PTE1_RO) || !(prot & VM_PROT_WRITE)) { pa = pte1_pa(pte1) | (va & PTE1_OFFSET); - if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } else if (pte1_is_link(pte1)) { pte2p = pmap_pte2(pmap, va); @@ -2011,13 +2008,11 @@ retry: if (pte2_is_valid(pte2) && (!(pte2 & PTE2_RO) || !(prot & VM_PROT_WRITE))) { pa = pte2_pa(pte2); - if (vm_page_pa_tryrelock(pmap, pa, &lockpa)) - goto retry; m = PHYS_TO_VM_PAGE(pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(lockpa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/arm/nvidia/drm2/tegra_bo.c b/sys/arm/nvidia/drm2/tegra_bo.c index 1e721aacf36..0e244b9486f 100644 --- a/sys/arm/nvidia/drm2/tegra_bo.c +++ b/sys/arm/nvidia/drm2/tegra_bo.c @@ -65,11 +65,9 @@ tegra_bo_destruct(struct tegra_bo *bo) for (i = 0; i < bo->npages; i++) { m = bo->m[i]; cdev_pager_free_page(bo->cdev_pager, m); - vm_page_lock(m); m->flags &= ~PG_FICTITIOUS; vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(bo->cdev_pager); diff --git a/sys/arm64/arm64/efirt_machdep.c b/sys/arm64/arm64/efirt_machdep.c index 658f03696c2..7d9f8fbe31f 100644 --- a/sys/arm64/arm64/efirt_machdep.c +++ b/sys/arm64/arm64/efirt_machdep.c @@ -74,7 +74,7 @@ efi_destroy_1t1_map(void) if (obj_1t1_pt != NULL) { VM_OBJECT_RLOCK(obj_1t1_pt); TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) - m->wire_count = 0; + m->wire_count = VPRC_OBJREF; vm_wire_sub(obj_1t1_pt->resident_page_count); VM_OBJECT_RUNLOCK(obj_1t1_pt); vm_object_deallocate(obj_1t1_pt); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index c9e3c84d0e2..919537e86b8 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -1079,14 +1079,11 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *pte, tpte; vm_offset_t off; - vm_paddr_t pa; vm_page_t m; int lvl; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pte = pmap_pte(pmap, va, &lvl); if (pte != NULL) { tpte = pmap_load(pte); @@ -1111,14 +1108,11 @@ retry: default: off = 0; } - if (vm_page_pa_tryrelock(pmap, - (tpte & ~ATTR_MASK) | off, &pa)) - goto retry; m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c index 553c64a67a3..d49d7c5a4ae 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -481,9 +481,7 @@ page_wire(vnode_t *vp, int64_t start) } ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_lock(pp); vm_page_wire(pp); - vm_page_unlock(pp); } else pp = NULL; break; @@ -495,9 +493,7 @@ static void page_unwire(vm_page_t pp) { - vm_page_lock(pp); vm_page_unwire(pp, PQ_ACTIVE); - vm_page_unlock(pp); } /* @@ -591,16 +587,16 @@ mappedread_sf(vnode_t *vp, int nbytes, uio_t *uio) zfs_unmap_page(sf); zfs_vmobject_wlock(obj); vm_page_sunbusy(pp); - vm_page_lock(pp); if (error) { - if (!vm_page_wired(pp) && pp->valid == 0 && - !vm_page_busied(pp)) + if (!vm_page_busied(pp) && !vm_page_wired(pp) && + pp->valid == 0) vm_page_free(pp); } else { pp->valid = VM_PAGE_BITS_ALL; + vm_page_lock(pp); vm_page_activate(pp); + vm_page_unlock(pp); } - vm_page_unlock(pp); } else { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); vm_page_sunbusy(pp); diff --git a/sys/compat/linuxkpi/common/include/linux/mm.h b/sys/compat/linuxkpi/common/include/linux/mm.h index a9168416b28..28df3cfda10 100644 --- a/sys/compat/linuxkpi/common/include/linux/mm.h +++ b/sys/compat/linuxkpi/common/include/linux/mm.h @@ -227,9 +227,7 @@ mark_page_accessed(struct vm_page *page) static inline void get_page(struct vm_page *page) { - vm_page_lock(page); vm_page_wire(page); - vm_page_unlock(page); } extern long @@ -250,10 +248,7 @@ get_user_pages_remote(struct task_struct *, struct mm_struct *, static inline void put_page(struct vm_page *page) { - vm_page_lock(page); - if (vm_page_unwire(page, PQ_ACTIVE) && page->object == NULL) - vm_page_free(page); - vm_page_unlock(page); + vm_page_unwire(page, PQ_ACTIVE); } #define copy_highpage(to, from) pmap_copy_page(from, to) diff --git a/sys/compat/linuxkpi/common/src/linux_compat.c b/sys/compat/linuxkpi/common/src/linux_compat.c index e861eb79357..3ccdf87c377 100644 --- a/sys/compat/linuxkpi/common/src/linux_compat.c +++ b/sys/compat/linuxkpi/common/src/linux_compat.c @@ -511,9 +511,7 @@ linux_cdev_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_replace_checked(page, vm_obj, (*mres)->pindex, *mres); - vm_page_lock(*mres); vm_page_free(*mres); - vm_page_unlock(*mres); *mres = page; } page->valid = VM_PAGE_BITS_ALL; diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index c254addfc9b..83f87d5f331 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -154,10 +154,8 @@ linux_free_pages(vm_page_t page, unsigned int order) for (x = 0; x != npages; x++) { vm_page_t pgo = page + x; - vm_page_lock(pgo); if (vm_page_unwire_noq(pgo)) vm_page_free(pgo); - vm_page_unlock(pgo); } } else { vm_offset_t vaddr; @@ -295,10 +293,8 @@ linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) if (vm_pager_has_page(obj, pindex, NULL, NULL)) { rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(page); vm_page_unwire_noq(page); vm_page_free(page); - vm_page_unlock(page); VM_OBJECT_WUNLOCK(obj); return (ERR_PTR(-EINVAL)); } diff --git a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c index 8e9a519e270..279aacd0880 100644 --- a/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c +++ b/sys/contrib/vchiq/interface/vchiq_arm/vchiq_2835_arm.c @@ -377,10 +377,7 @@ vchiq_platform_handle_timeout(VCHIQ_STATE_T *state) static void pagelist_page_free(vm_page_t pp) { - vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); - vm_page_unlock(pp); + vm_page_unwire(pp, PQ_INACTIVE); } /* There is a potential problem with partial cache lines (pages?) diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c index 011c89afeb3..62664e8dfe8 100644 --- a/sys/dev/agp/agp.c +++ b/sys/dev/agp/agp.c @@ -616,9 +616,7 @@ bad: m = vm_page_lookup(mem->am_obj, OFF_TO_IDX(k)); if (k >= i) vm_page_xunbusy(m); - vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); @@ -653,9 +651,7 @@ agp_generic_unbind_memory(device_t dev, struct agp_memory *mem) VM_OBJECT_WLOCK(mem->am_obj); for (i = 0; i < mem->am_size; i += PAGE_SIZE) { m = vm_page_lookup(mem->am_obj, atop(i)); - vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(mem->am_obj); @@ -1003,7 +999,7 @@ agp_bind_pages(device_t dev, vm_page_t *pages, vm_size_t size, mtx_lock(&sc->as_lock); for (i = 0; i < size; i += PAGE_SIZE) { m = pages[OFF_TO_IDX(i)]; - KASSERT(m->wire_count > 0, + KASSERT(vm_page_wired(m), ("agp_bind_pages: page %p hasn't been wired", m)); /* diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c index 27d7f1114a0..501f78ca0a3 100644 --- a/sys/dev/agp/agp_i810.c +++ b/sys/dev/agp/agp_i810.c @@ -1795,9 +1795,7 @@ agp_i810_free_memory(device_t dev, struct agp_memory *mem) */ VM_OBJECT_WLOCK(mem->am_obj); m = vm_page_lookup(mem->am_obj, 0); - vm_page_lock(m); vm_page_unwire(m, PQ_INACTIVE); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(mem->am_obj); } else { contigfree(sc->argb_cursor, mem->am_size, M_AGP); diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index c698f0f7d69..5269cf3ad0f 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -1910,7 +1910,6 @@ aiotx_free_pgs(struct mbuf *m) { struct mbuf_ext_pgs *ext_pgs; struct kaiocb *job; - struct mtx *mtx; vm_page_t pg; MBUF_EXT_PGS_ASSERT(m); @@ -1921,14 +1920,10 @@ aiotx_free_pgs(struct mbuf *m) m->m_len, jobtotid(job)); #endif - mtx = NULL; for (int i = 0; i < ext_pgs->npgs; i++) { pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); - vm_page_change_lock(pg, &mtx); vm_page_unwire(pg, PQ_ACTIVE); } - if (mtx != NULL) - mtx_unlock(mtx); aiotx_free_job(job); } diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c index e460d2cb6a7..0d42a0289ed 100644 --- a/sys/dev/cxgbe/tom/t4_ddp.c +++ b/sys/dev/cxgbe/tom/t4_ddp.c @@ -114,9 +114,7 @@ free_pageset(struct tom_data *td, struct pageset *ps) for (i = 0; i < ps->npages; i++) { p = ps->pages[i]; - vm_page_lock(p); vm_page_unwire(p, PQ_INACTIVE); - vm_page_unlock(p); } mtx_lock(&ddp_orphan_pagesets_lock); TAILQ_INSERT_TAIL(&ddp_orphan_pagesets, ps, link); diff --git a/sys/dev/drm2/ttm/ttm_bo_vm.c b/sys/dev/drm2/ttm/ttm_bo_vm.c index 43d027fc5cd..6dc1fabab28 100644 --- a/sys/dev/drm2/ttm/ttm_bo_vm.c +++ b/sys/dev/drm2/ttm/ttm_bo_vm.c @@ -114,9 +114,7 @@ ttm_bo_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset, vm_object_pip_add(vm_obj, 1); if (*mres != NULL) { - vm_page_lock(*mres); (void)vm_page_remove(*mres); - vm_page_unlock(*mres); } retry: VM_OBJECT_WUNLOCK(vm_obj); @@ -261,9 +259,7 @@ reserve: vm_page_xbusy(m); if (*mres != NULL) { KASSERT(*mres != m, ("losing %p %p", *mres, m)); - vm_page_lock(*mres); vm_page_free(*mres); - vm_page_unlock(*mres); } *mres = m; diff --git a/sys/dev/drm2/ttm/ttm_page_alloc.c b/sys/dev/drm2/ttm/ttm_page_alloc.c index 1e905517544..fbb830405de 100644 --- a/sys/dev/drm2/ttm/ttm_page_alloc.c +++ b/sys/dev/drm2/ttm/ttm_page_alloc.c @@ -132,7 +132,7 @@ ttm_vm_page_free(vm_page_t m) { KASSERT(m->object == NULL, ("ttm page %p is owned", m)); - KASSERT(m->wire_count == 1, ("ttm lost wire %p", m)); + KASSERT(vm_page_wired(m), ("ttm lost wire %p", m)); KASSERT((m->flags & PG_FICTITIOUS) != 0, ("ttm lost fictitious %p", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("ttm got unmanaged %p", m)); m->flags &= ~PG_FICTITIOUS; diff --git a/sys/dev/drm2/ttm/ttm_tt.c b/sys/dev/drm2/ttm/ttm_tt.c index 1e2db3cd875..82aaddf4b1d 100644 --- a/sys/dev/drm2/ttm/ttm_tt.c +++ b/sys/dev/drm2/ttm/ttm_tt.c @@ -294,9 +294,7 @@ int ttm_tt_swapin(struct ttm_tt *ttm) rv = vm_pager_get_pages(obj, &from_page, 1, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(from_page); vm_page_free(from_page); - vm_page_unlock(from_page); ret = -EIO; goto err_ret; } diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index c9cd5a6e95a..110cbfdecc8 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1029,9 +1029,7 @@ md_swap_page_free(vm_page_t m) { vm_page_xunbusy(m); - vm_page_lock(m); vm_page_free(m); - vm_page_unlock(m); } static int diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index 59837840eed..42551df09c2 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -1052,9 +1052,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, VM_OBJECT_WUNLOCK(object); page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); - vm_page_lock(*mres); vm_page_free(*mres); - vm_page_unlock(*mres); *mres = page; vm_page_insert(page, object, pidx); } diff --git a/sys/dev/xen/gntdev/gntdev.c b/sys/dev/xen/gntdev/gntdev.c index ed42e177b86..667d46f333b 100644 --- a/sys/dev/xen/gntdev/gntdev.c +++ b/sys/dev/xen/gntdev/gntdev.c @@ -826,14 +826,12 @@ gntdev_gmap_pg_fault(vm_object_t object, vm_ooffset_t offset, int prot, KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); - KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); - KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); + KASSERT(vm_page_wired(page), ("page %p is not wired", page)); + KASSERT(!vm_page_busied(page), ("page %p is busy", page)); if (*mres != NULL) { oldm = *mres; - vm_page_lock(oldm); vm_page_free(oldm); - vm_page_unlock(oldm); *mres = NULL; } diff --git a/sys/dev/xen/privcmd/privcmd.c b/sys/dev/xen/privcmd/privcmd.c index e09886f42ad..3b6b2033e80 100644 --- a/sys/dev/xen/privcmd/privcmd.c +++ b/sys/dev/xen/privcmd/privcmd.c @@ -169,14 +169,12 @@ privcmd_pg_fault(vm_object_t object, vm_ooffset_t offset, KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); - KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); - KASSERT(vm_page_busied(page) == 0, ("page %p is busy", page)); + KASSERT(vm_page_wired(page), ("page %p not wired", page)); + KASSERT(!vm_page_busied(page), ("page %p is busy", page)); if (*mres != NULL) { oldm = *mres; - vm_page_lock(oldm); vm_page_free(oldm); - vm_page_unlock(oldm); *mres = NULL; } diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 9fe58cd4c13..32c929fefaa 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1418,7 +1418,6 @@ retry: goto retry; rv = vm_pager_get_pages(uobj, &m, 1, NULL, NULL); - vm_page_lock(m); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, @@ -1428,12 +1427,12 @@ retry: * current operation is not regarded * as an access. */ + vm_page_lock(m); vm_page_launder(m); vm_page_unlock(m); vm_page_xunbusy(m); } else { vm_page_free(m); - vm_page_unlock(m); if (ignerr) m = NULL; else { diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index b2a0ac89049..f07f500e897 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1690,35 +1690,24 @@ __CONCAT(PMTYPE, extract_and_hold)(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pd_entry_t pde; pt_entry_t pte; vm_page_t m; - vm_paddr_t pa; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & - PG_PS_FRAME) | (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | + (va & PDRMASK)); } else { pte = pmap_pte_ufast(pmap, va, pde); if (pte != 0 && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, - &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pa); - } + ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); } - if (m != NULL) - vm_page_wire(m); + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index ed8f55c4afa..2dd04d2417d 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -981,10 +981,8 @@ exec_map_first_page(struct image_params *imgp) if (ma[0]->valid != VM_PAGE_BITS_ALL) { vm_page_xbusy(ma[0]); if (!vm_pager_has_page(object, 0, NULL, &after)) { - vm_page_lock(ma[0]); vm_page_unwire_noq(ma[0]); vm_page_free(ma[0]); - vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1008,13 +1006,9 @@ exec_map_first_page(struct image_params *imgp) initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { - for (i = 0; i < initial_pagein; i++) { - vm_page_lock(ma[i]); - if (i == 0) - vm_page_unwire_noq(ma[i]); + vm_page_unwire_noq(ma[0]); + for (i = 0; i < initial_pagein; i++) vm_page_free(ma[i]); - vm_page_unlock(ma[i]); - } VM_OBJECT_WUNLOCK(object); return (EIO); } @@ -1039,9 +1033,7 @@ exec_unmap_first_page(struct image_params *imgp) m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); } } diff --git a/sys/kern/kern_kcov.c b/sys/kern/kern_kcov.c index 238529f98a7..e84b403cd31 100644 --- a/sys/kern/kern_kcov.c +++ b/sys/kern/kern_kcov.c @@ -408,10 +408,7 @@ kcov_free(struct kcov_info *info) VM_OBJECT_WLOCK(info->bufobj); m = vm_page_lookup(info->bufobj, 0); for (i = 0; i < info->bufsize / PAGE_SIZE; i++) { - vm_page_lock(m); vm_page_unwire_noq(m); - vm_page_unlock(m); - m = vm_page_next(m); } VM_OBJECT_WUNLOCK(info->bufobj); diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index b350fa85ac3..5e1c49dca99 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -415,11 +415,8 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off, &sendfile_iodone, sfio); if (rv != VM_PAGER_OK) { for (j = i; j < i + count; j++) { - if (pa[j] != bogus_page) { - vm_page_lock(pa[j]); + if (pa[j] != bogus_page) vm_page_unwire(pa[j], PQ_INACTIVE); - vm_page_unlock(pa[j]); - } } VM_OBJECT_WUNLOCK(obj); return (EIO); @@ -932,11 +929,8 @@ retry_space: m != NULL ? SFB_NOWAIT : SFB_CATCH); if (sf == NULL) { SFSTAT_INC(sf_allocfail); - for (int j = i; j < npages; j++) { - vm_page_lock(pa[j]); + for (int j = i; j < npages; j++) vm_page_unwire(pa[j], PQ_INACTIVE); - vm_page_unlock(pa[j]); - } if (m == NULL) softerr = ENOBUFS; fixspace(npages, i, off, &space); diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index d2b5ea2434a..8e8dae7083e 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -312,10 +312,7 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Release the page. */ - vm_page_lock(m); - if (vm_page_unwire(m, PQ_ACTIVE) && m->object == NULL) - vm_page_free(m); - vm_page_unlock(m); + vm_page_unwire(m, PQ_ACTIVE); } while (error == 0 && uio->uio_resid > 0); diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index c9f7e4ca01b..c97042f6dfa 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -1621,10 +1621,6 @@ mb_free_mext_pgs(struct mbuf *m) ext_pgs = m->m_ext.ext_pgs; for (int i = 0; i < ext_pgs->npgs; i++) { pg = PHYS_TO_VM_PAGE(ext_pgs->pa[i]); - /* - * Note: page is not locked, as it has no - * object and is not on any queues. - */ vm_page_unwire_noq(pg); vm_page_free(pg); } diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index de8451f951d..4c5e67524a1 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -198,10 +198,8 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) printf( "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n", obj, idx, m->valid, rv); - vm_page_lock(m); vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(obj); return (EIO); } @@ -217,9 +215,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio) vm_pager_page_unswapped(m); VM_OBJECT_WUNLOCK(obj); } - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); return (error); } @@ -474,7 +470,6 @@ retry: goto retry; rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); - vm_page_lock(m); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, @@ -485,11 +480,9 @@ retry: * as an access. */ vm_page_launder(m); - vm_page_unlock(m); vm_page_xunbusy(m); } else { vm_page_free(m); - vm_page_unlock(m); VM_OBJECT_WUNLOCK(object); return (EIO); } diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index fa5d10ca145..072618f793a 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -796,26 +796,22 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t pte, *ptep; - vm_paddr_t pa, pte_pa; + vm_paddr_t pa; vm_page_t m; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: ptep = pmap_pte(pmap, va); if (ptep != NULL) { pte = *ptep; if (pte_test(&pte, PTE_V) && (!pte_test(&pte, PTE_RO) || (prot & VM_PROT_WRITE) == 0)) { - pte_pa = TLBLO_PTE_TO_PA(pte); - if (vm_page_pa_tryrelock(pmap, pte_pa, &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pte_pa); - vm_page_wire(m); + pa = TLBLO_PTE_TO_PA(pte); + m = PHYS_TO_VM_PAGE(pa); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c index 83c953fa48e..d528237e114 100644 --- a/sys/net/bpf_zerocopy.c +++ b/sys/net/bpf_zerocopy.c @@ -115,10 +115,7 @@ static void zbuf_page_free(vm_page_t pp) { - vm_page_lock(pp); - if (vm_page_unwire(pp, PQ_INACTIVE) && pp->object == NULL) - vm_page_free(pp); - vm_page_unlock(pp); + vm_page_unwire(pp, PQ_INACTIVE); } /* diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 32c5b9e5d9a..9eacac27707 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1264,22 +1264,17 @@ moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 193a5e4c556..7ad86d5f189 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1578,21 +1578,15 @@ moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF); if (pvo != NULL && (pvo->pvo_pte.prot & prot) == prot) { - if (vm_page_pa_tryrelock(pmap, - pvo->pvo_pte.pa & LPTE_RPGN, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 26b09661ffd..140b1367325 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -2790,12 +2790,9 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_t *pte; vm_page_t m; uint32_t pte_wbit; - vm_paddr_t pa; - + m = NULL; - pa = 0; PMAP_LOCK(pmap); -retry: pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) @@ -2803,15 +2800,12 @@ retry: else pte_wbit = PTE_UW; - if ((*pte & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) - goto retry; + if ((*pte & pte_wbit) != 0 || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(PTE_PA(pte)); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 4d093853b2b..bbda832ff88 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -870,24 +870,19 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *l3p, l3; vm_paddr_t phys; - vm_paddr_t pa; vm_page_t m; - pa = 0; m = NULL; PMAP_LOCK(pmap); -retry: l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((l3 & PTE_W) != 0 || (prot & VM_PROT_WRITE) == 0) { phys = PTE_TO_PHYS(l3); - if (vm_page_pa_tryrelock(pmap, phys, &pa)) - goto retry; m = PHYS_TO_VM_PAGE(phys); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 77648ab1d69..436c15623a6 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -846,19 +846,15 @@ pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot) { struct tte *tp; vm_page_t m; - vm_paddr_t pa; m = NULL; - pa = 0; PMAP_LOCK(pm); -retry: if (pm == kernel_pmap) { if (va >= VM_MIN_DIRECT_ADDRESS) { tp = NULL; m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va)); - (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va), - &pa); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } else { tp = tsb_kvtotte(va); if ((tp->tte_data & TD_V) == 0) @@ -868,12 +864,10 @@ retry: tp = tsb_tte_lookup(pm, va); if (tp != NULL && ((tp->tte_data & TD_SW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa)) - goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp)); - vm_page_wire(m); + if (!vm_page_wire_mapped(m)) + m = NULL; } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pm); return (m); } diff --git a/sys/sys/param.h b/sys/sys/param.h index 1be48682984..22fb94934e8 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300046 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300047 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 7dce4778f95..fbd47200a03 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -235,9 +235,7 @@ cdev_pager_free_page(vm_object_t object, vm_page_t m) if (object->type == OBJT_MGTDEVICE) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("unmanaged %p", m)); pmap_remove_all(m); - vm_page_lock(m); (void)vm_page_remove(m); - vm_page_unlock(m); } else if (object->type == OBJT_DEVICE) dev_pager_free_page(object, m); } @@ -393,9 +391,7 @@ old_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); vm_page_replace_checked(page, object, (*mres)->pindex, *mres); - vm_page_lock(*mres); vm_page_free(*mres); - vm_page_unlock(*mres); *mres = page; } page->valid = VM_PAGE_BITS_ALL; diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 4efa5486a28..ac66b79b5f0 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -186,9 +186,7 @@ unlock_and_deallocate(struct faultstate *fs) VM_OBJECT_WUNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_WLOCK(fs->first_object); - vm_page_lock(fs->first_m); vm_page_free(fs->first_m); - vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_WUNLOCK(fs->first_object); fs->first_m = NULL; @@ -263,18 +261,6 @@ vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, vm_pager_page_unswapped(m); } -static void -vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m) -{ - - if (m_hold != NULL) { - *m_hold = m; - vm_page_lock(m); - vm_page_wire(m); - vm_page_unlock(m); - } -} - /* * Unlocks fs.first_object and fs.map on success. */ @@ -335,7 +321,10 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); if (rv != KERN_SUCCESS) return (rv); - vm_fault_fill_hold(m_hold, m); + if (m_hold != NULL) { + *m_hold = m; + vm_page_wire(m); + } vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); @@ -511,11 +500,12 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, VM_OBJECT_WLOCK(fs->first_object); m_mtx = NULL; for (i = 0; i < npages; i++) { - vm_page_change_lock(&m[i], &m_mtx); - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(&m[i]); - else + } else { + vm_page_change_lock(&m[i], &m_mtx); vm_page_activate(&m[i]); + } if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_wire(&m[i]); @@ -576,7 +566,6 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, struct faultstate fs; struct vnode *vp; struct domainset *dset; - struct mtx *mtx; vm_object_t next_object, retry_object; vm_offset_t e_end, e_start; vm_pindex_t retry_pindex; @@ -745,9 +734,7 @@ RetryFault_oom: VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } - vm_page_lock(fs.first_m); vm_page_free(fs.first_m); - vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; @@ -1030,12 +1017,10 @@ readrest: * an error. */ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { - vm_page_lock(fs.m); if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else - vm_page_xunbusy_maybelocked(fs.m); - vm_page_unlock(fs.m); + vm_page_xunbusy(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return (rv == VM_PAGER_ERROR ? KERN_FAILURE : @@ -1053,12 +1038,10 @@ readrest: * that we are. */ if (fs.object != fs.first_object) { - vm_page_lock(fs.m); if (!vm_page_wired(fs.m)) vm_page_free(fs.m); else - vm_page_xunbusy_maybelocked(fs.m); - vm_page_unlock(fs.m); + vm_page_xunbusy(fs.m); fs.m = NULL; } } @@ -1169,23 +1152,11 @@ readrest: * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { - /* - * Keep the page wired to ensure that it is not - * freed by another thread, such as the page - * daemon, while it is disassociated from an - * object. - */ - mtx = NULL; - vm_page_change_lock(fs.m, &mtx); - vm_page_wire(fs.m); + (void)vm_page_remove(fs.m); - vm_page_change_lock(fs.first_m, &mtx); vm_page_replace_checked(fs.m, fs.first_object, fs.first_pindex, fs.first_m); vm_page_free(fs.first_m); - vm_page_change_lock(fs.m, &mtx); - vm_page_unwire(fs.m, PQ_ACTIVE); - mtx_unlock(mtx); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* @@ -1211,13 +1182,8 @@ readrest: fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { - vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); - vm_page_unlock(fs.first_m); - - vm_page_lock(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); - vm_page_unlock(fs.m); } /* * We no longer need the old page or object. @@ -1350,21 +1316,22 @@ readrest: faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR, false); VM_OBJECT_WLOCK(fs.object); - vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ - if ((fault_flags & VM_FAULT_WIRE) != 0) + if ((fault_flags & VM_FAULT_WIRE) != 0) { vm_page_wire(fs.m); - else + } else { + vm_page_lock(fs.m); vm_page_activate(fs.m); + vm_page_unlock(fs.m); + } if (m_hold != NULL) { *m_hold = fs.m; vm_page_wire(fs.m); } - vm_page_unlock(fs.m); vm_page_xunbusy(fs.m); /* @@ -1633,13 +1600,8 @@ vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, return (count); error: for (mp = ma; mp < ma + count; mp++) - if (*mp != NULL) { - vm_page_lock(*mp); - if (vm_page_unwire(*mp, PQ_INACTIVE) && - (*mp)->object == NULL) - vm_page_free(*mp); - vm_page_unlock(*mp); - } + if (*mp != NULL) + vm_page_unwire(*mp, PQ_INACTIVE); return (-1); } @@ -1835,12 +1797,8 @@ again: if (upgrade) { if (src_m != dst_m) { - vm_page_lock(src_m); vm_page_unwire(src_m, PQ_INACTIVE); - vm_page_unlock(src_m); - vm_page_lock(dst_m); vm_page_wire(dst_m); - vm_page_unlock(dst_m); } else { KASSERT(vm_page_wired(dst_m), ("dst_m %p is not wired", dst_m)); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 2ff1940f1aa..e531b13aef2 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -229,10 +229,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) vm_page_xbusy(m); rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(m); vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); m = NULL; goto out; } @@ -270,9 +268,7 @@ vm_imgact_unmap_page(struct sf_buf *sf) m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock(m); vm_page_unwire(m, PQ_ACTIVE); - vm_page_unlock(m); } void @@ -380,10 +376,8 @@ vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); - vm_page_lock(m); vm_page_unwire_noq(m); vm_page_free(m); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); vm_object_deallocate(ksobj); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 6971f82c3f4..ff63f6fdce5 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -674,12 +674,9 @@ static void vm_object_terminate_pages(vm_object_t object) { vm_page_t p, p_next; - struct mtx *mtx; VM_OBJECT_ASSERT_WLOCKED(object); - mtx = NULL; - /* * Free any remaining pageable pages. This also removes them from the * paging queues. However, don't free wired pages, just remove them @@ -688,20 +685,16 @@ vm_object_terminate_pages(vm_object_t object) */ TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { vm_page_assert_unbusied(p); - if ((object->flags & OBJ_UNMANAGED) == 0) - /* - * vm_page_free_prep() only needs the page - * lock for managed pages. - */ - vm_page_change_lock(p, &mtx); + KASSERT(p->object == object && + (p->ref_count & VPRC_OBJREF) != 0, + ("vm_object_terminate_pages: page %p is inconsistent", p)); + p->object = NULL; - if (vm_page_wired(p)) - continue; - VM_CNT_INC(v_pfree); - vm_page_free(p); + if (vm_page_drop(p, VPRC_OBJREF) == VPRC_OBJREF) { + VM_CNT_INC(v_pfree); + vm_page_free(p); + } } - if (mtx != NULL) - mtx_unlock(mtx); /* * If the object contained any pages, then reset it to an empty state. @@ -1158,13 +1151,9 @@ next_page: /* * If the page is not in a normal state, skip it. */ - if (tm->valid != VM_PAGE_BITS_ALL) + if (tm->valid != VM_PAGE_BITS_ALL || + vm_page_wired(tm)) goto next_pindex; - vm_page_lock(tm); - if (vm_page_wired(tm)) { - vm_page_unlock(tm); - goto next_pindex; - } KASSERT((tm->flags & PG_FICTITIOUS) == 0, ("vm_object_madvise: page %p is fictitious", tm)); KASSERT((tm->oflags & VPO_UNMANAGED) == 0, @@ -1172,6 +1161,7 @@ next_page: if (vm_page_busied(tm)) { if (object != tobject) VM_OBJECT_WUNLOCK(tobject); + vm_page_lock(tm); VM_OBJECT_WUNLOCK(object); if (advice == MADV_WILLNEED) { /* @@ -1184,6 +1174,7 @@ next_page: vm_page_busy_sleep(tm, "madvpo", false); goto relookup; } + vm_page_lock(tm); vm_page_advise(tm, advice); vm_page_unlock(tm); vm_object_madvise_freespace(tobject, advice, tm->pindex, 1); @@ -1537,16 +1528,10 @@ vm_object_collapse_scan(vm_object_t object, int op) swap_pager_freespace(backing_object, p->pindex, 1); - /* - * Page is out of the parent object's range, we can - * simply destroy it. - */ - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); - vm_page_unlock(p); continue; } @@ -1583,12 +1568,10 @@ vm_object_collapse_scan(vm_object_t object, int op) if (backing_object->type == OBJT_SWAP) swap_pager_freespace(backing_object, p->pindex, 1); - vm_page_lock(p); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); if (vm_page_remove(p)) vm_page_free(p); - vm_page_unlock(p); continue; } @@ -1889,7 +1872,14 @@ again: VM_OBJECT_WLOCK(object); goto again; } + if (vm_page_busied(p)) { + VM_OBJECT_WUNLOCK(object); + vm_page_busy_sleep(p, "vmopar", false); + VM_OBJECT_WLOCK(object); + goto again; + } if (vm_page_wired(p)) { +wired: if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) pmap_remove_all(p); @@ -1899,23 +1889,19 @@ again: } continue; } - if (vm_page_busied(p)) { - VM_OBJECT_WUNLOCK(object); - vm_page_busy_sleep(p, "vmopar", false); - VM_OBJECT_WLOCK(object); - goto again; - } KASSERT((p->flags & PG_FICTITIOUS) == 0, ("vm_object_page_remove: page %p is fictitious", p)); if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { if ((options & OBJPR_NOTMAPPED) == 0 && - object->ref_count != 0) - pmap_remove_write(p); + object->ref_count != 0 && + !vm_page_try_remove_write(p)) + goto wired; if (p->dirty != 0) continue; } - if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0) - pmap_remove_all(p); + if ((options & OBJPR_NOTMAPPED) == 0 && + object->ref_count != 0 && !vm_page_try_remove_all(p)) + goto wired; vm_page_free(p); } if (mtx != NULL) @@ -1989,9 +1975,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) if (m->valid != VM_PAGE_BITS_ALL) { rv = vm_pager_get_pages(object, &m, 1, NULL, NULL); if (rv != VM_PAGER_OK) { - vm_page_lock(m); vm_page_free(m); - vm_page_unlock(m); break; } } @@ -2205,8 +2189,8 @@ again: tm = m; m = TAILQ_NEXT(m, listq); } - vm_page_lock(tm); if (vm_page_xbusied(tm)) { + vm_page_lock(tm); for (tobject = object; locked_depth >= 1; locked_depth--) { t1object = tobject->backing_object; @@ -2217,7 +2201,6 @@ again: goto again; } vm_page_unwire(tm, queue); - vm_page_unlock(tm); next_page: pindex++; } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 64831ffc41a..45e8ccc8904 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -164,6 +164,7 @@ static int vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, vm_page_t mpred); static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred); +static void vm_page_mvqueue(vm_page_t m, uint8_t queue); static int vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, vm_paddr_t high); static int vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, @@ -509,7 +510,7 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind) { m->object = NULL; - m->wire_count = 0; + m->ref_count = 0; m->busy_lock = VPB_UNBUSIED; m->flags = m->aflags = 0; m->phys_addr = pa; @@ -1103,17 +1104,11 @@ vm_page_change_lock(vm_page_t m, struct mtx **mtx) void vm_page_unhold_pages(vm_page_t *ma, int count) { - struct mtx *mtx; - mtx = NULL; for (; count != 0; count--) { - vm_page_change_lock(*ma, &mtx); - if (vm_page_unwire(*ma, PQ_ACTIVE) && (*ma)->object == NULL) - vm_page_free(*ma); + vm_page_unwire(*ma, PQ_ACTIVE); ma++; } - if (mtx != NULL) - mtx_unlock(mtx); } vm_page_t @@ -1176,7 +1171,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr) /* Fictitious pages don't use "order" or "pool". */ m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_SINGLE_EXCLUSIVER; - m->wire_count = 1; + /* Fictitious pages are unevictable. */ + m->ref_count = 1; pmap_page_init(m); memattr: pmap_page_set_memattr(m, memattr); @@ -1371,10 +1367,11 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, ("vm_page_insert_after: msucc doesn't succeed pindex")); /* - * Record the object/offset pair in this page + * Record the object/offset pair in this page. */ m->object = object; m->pindex = pindex; + m->ref_count |= VPRC_OBJREF; /* * Now link into the object's ordered list of backed pages. @@ -1382,6 +1379,7 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex, if (vm_radix_insert(&object->rtree, m)) { m->object = NULL; m->pindex = 0; + m->ref_count &= ~VPRC_OBJREF; return (1); } vm_page_insert_radixdone(m, object, mpred); @@ -1406,11 +1404,13 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object != NULL && m->object == object, ("vm_page_insert_radixdone: page %p has inconsistent object", m)); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("vm_page_insert_radixdone: page %p is missing object ref", m)); if (mpred != NULL) { KASSERT(mpred->object == object, - ("vm_page_insert_after: object doesn't contain mpred")); + ("vm_page_insert_radixdone: object doesn't contain mpred")); KASSERT(mpred->pindex < m->pindex, - ("vm_page_insert_after: mpred doesn't precede pindex")); + ("vm_page_insert_radixdone: mpred doesn't precede pindex")); } if (mpred != NULL) @@ -1438,25 +1438,19 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred) } /* - * vm_page_remove: - * - * Removes the specified page from its containing object, but does not - * invalidate any backing storage. Return true if the page may be safely - * freed and false otherwise. - * - * The object must be locked. The page must be locked if it is managed. + * Do the work to remove a page from its object. The caller is responsible for + * updating the page's fields to reflect this removal. */ -bool -vm_page_remove(vm_page_t m) +static void +vm_page_object_remove(vm_page_t m) { vm_object_t object; vm_page_t mrem; object = m->object; - - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT((m->ref_count & VPRC_OBJREF) != 0, + ("page %p is missing its object ref", m)); if (vm_page_xbusied(m)) vm_page_xunbusy_maybelocked(m); mrem = vm_radix_remove(&object->rtree, m->pindex); @@ -1477,9 +1471,24 @@ vm_page_remove(vm_page_t m) */ if (object->resident_page_count == 0 && object->type == OBJT_VNODE) vdrop(object->handle); +} +/* + * vm_page_remove: + * + * Removes the specified page from its containing object, but does not + * invalidate any backing storage. Returns true if the object's reference + * was the last reference to the page, and false otherwise. + * + * The object must be locked. + */ +bool +vm_page_remove(vm_page_t m) +{ + + vm_page_object_remove(m); m->object = NULL; - return (!vm_page_wired(m)); + return (vm_page_drop(m, VPRC_OBJREF) == VPRC_OBJREF); } /* @@ -1560,8 +1569,6 @@ vm_page_prev(vm_page_t m) /* * Uses the page mnew as a replacement for an existing page at index * pindex which must be already present in the object. - * - * The existing page must not be on a paging queue. */ vm_page_t vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) @@ -1569,10 +1576,8 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) vm_page_t mold; VM_OBJECT_ASSERT_WLOCKED(object); - KASSERT(mnew->object == NULL, + KASSERT(mnew->object == NULL && (mnew->ref_count & VPRC_OBJREF) == 0, ("vm_page_replace: page %p already in object", mnew)); - KASSERT(mnew->queue == PQ_NONE || vm_page_wired(mnew), - ("vm_page_replace: new page %p is on a paging queue", mnew)); /* * This function mostly follows vm_page_insert() and @@ -1582,6 +1587,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) mnew->object = object; mnew->pindex = pindex; + atomic_set_int(&mnew->ref_count, VPRC_OBJREF); mold = vm_radix_replace(&object->rtree, mnew); KASSERT(mold->queue == PQ_NONE, ("vm_page_replace: old page %p is on a paging queue", mold)); @@ -1591,6 +1597,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex) TAILQ_REMOVE(&object->memq, mold, listq); mold->object = NULL; + atomic_clear_int(&mold->ref_count, VPRC_OBJREF); vm_page_xunbusy_maybelocked(mold); /* @@ -1628,6 +1635,7 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) VM_OBJECT_ASSERT_WLOCKED(new_object); + KASSERT(m->ref_count != 0, ("vm_page_rename: page %p has no refs", m)); mpred = vm_radix_lookup_le(&new_object->rtree, new_pindex); KASSERT(mpred == NULL || mpred->pindex != new_pindex, ("vm_page_rename: pindex already renamed")); @@ -1649,13 +1657,12 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) * the listq iterator is tainted. */ m->pindex = opidx; - vm_page_lock(m); - (void)vm_page_remove(m); + vm_page_object_remove(m); /* Return back to the new pindex to complete vm_page_insert(). */ m->pindex = new_pindex; m->object = new_object; - vm_page_unlock(m); + vm_page_insert_radixdone(m, new_object, mpred); vm_page_dirty(m); return (0); @@ -1872,7 +1879,7 @@ found: * page is inserted into the object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } m->act_count = 0; @@ -1880,7 +1887,7 @@ found: if (vm_page_insert_after(m, object, pindex, mpred)) { if (req & VM_ALLOC_WIRED) { vm_wire_sub(1); - m->wire_count = 0; + m->ref_count = 0; } KASSERT(m->object == NULL, ("page %p has object", m)); m->oflags = VPO_UNMANAGED; @@ -2072,7 +2079,7 @@ found: m->flags = (m->flags | PG_NODUMP) & flags; m->busy_lock = busy_lock; if ((req & VM_ALLOC_WIRED) != 0) - m->wire_count = 1; + m->ref_count = 1; m->act_count = 0; m->oflags = oflags; if (object != NULL) { @@ -2085,7 +2092,7 @@ found: for (m = m_ret; m < &m_ret[npages]; m++) { if (m <= mpred && (req & VM_ALLOC_WIRED) != 0) - m->wire_count = 0; + m->ref_count = 0; m->oflags = VPO_UNMANAGED; m->busy_lock = VPB_UNBUSIED; /* Don't change PG_ZERO. */ @@ -2119,7 +2126,7 @@ vm_page_alloc_check(vm_page_t m) KASSERT(m->queue == PQ_NONE && (m->aflags & PGA_QUEUE_STATE_MASK) == 0, ("page %p has unexpected queue %d, flags %#x", m, m->queue, (m->aflags & PGA_QUEUE_STATE_MASK))); - KASSERT(!vm_page_wired(m), ("page %p is wired", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); KASSERT(!vm_page_busied(m), ("page %p is busy", m)); KASSERT(m->dirty == 0, ("page %p is dirty", m)); KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, @@ -2203,7 +2210,7 @@ again: * not belong to an object. */ vm_wire_add(1); - m->wire_count = 1; + m->ref_count = 1; } /* Unmanaged pages don't use "act_count". */ m->oflags = VPO_UNMANAGED; @@ -2296,8 +2303,8 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end, for (m = m_start; m < m_end && run_len < npages; m += m_inc) { KASSERT((m->flags & PG_MARKER) == 0, ("page %p is PG_MARKER", m)); - KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1, - ("fictitious page %p has invalid wire count", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->ref_count >= 1, + ("fictitious page %p has invalid ref count", m)); /* * If the current page would be the start of a run, check its @@ -2354,9 +2361,6 @@ retry: */ VM_OBJECT_RUNLOCK(object); goto retry; - } else if (vm_page_wired(m)) { - run_ext = 0; - goto unlock; } } /* Don't care: PG_NODUMP, PG_ZERO. */ @@ -2374,7 +2378,8 @@ retry: vm_reserv_size(level)) - pa); #endif } else if (object->memattr == VM_MEMATTR_DEFAULT && - vm_page_queue(m) != PQ_NONE && !vm_page_busied(m)) { + vm_page_queue(m) != PQ_NONE && !vm_page_busied(m) && + !vm_page_wired(m)) { /* * The page is allocated but eligible for * relocation. Extend the current run by one @@ -2390,7 +2395,6 @@ retry: run_ext = 1; } else run_ext = 0; -unlock: VM_OBJECT_RUNLOCK(object); #if VM_NRESERVLEVEL > 0 } else if (level >= 0) { @@ -2494,6 +2498,9 @@ vm_page_reclaim_run(int req_class, int domain, u_long npages, vm_page_t m_run, */ vm_page_change_lock(m, &m_mtx); retry: + /* + * Racily check for wirings. Races are handled below. + */ if (vm_page_wired(m)) error = EBUSY; else if ((object = m->object) != NULL) { @@ -2511,9 +2518,6 @@ retry: */ VM_OBJECT_WUNLOCK(object); goto retry; - } else if (vm_page_wired(m)) { - error = EBUSY; - goto unlock; } } /* Don't care: PG_NODUMP, PG_ZERO. */ @@ -2524,7 +2528,7 @@ retry: else if (object->memattr != VM_MEMATTR_DEFAULT) error = EINVAL; else if (vm_page_queue(m) != PQ_NONE && - !vm_page_busied(m)) { + !vm_page_busied(m) && !vm_page_wired(m)) { KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, ("page %p has an unexpected memattr", m)); @@ -2573,8 +2577,6 @@ retry: error = ENOMEM; goto unlock; } - KASSERT(!vm_page_wired(m_new), - ("page %p is wired", m_new)); /* * Replace "m" with the new page. For @@ -2582,8 +2584,11 @@ retry: * and dequeued. Finally, change "m" * as if vm_page_free() was called. */ - if (object->ref_count != 0) - pmap_remove_all(m); + if (object->ref_count != 0 && + !vm_page_try_remove_all(m)) { + error = EBUSY; + goto unlock; + } m_new->aflags = m->aflags & ~PGA_QUEUE_STATE_MASK; KASSERT(m_new->oflags == VPO_UNMANAGED, @@ -3146,8 +3151,7 @@ vm_page_pqbatch_submit(vm_page_t m, uint8_t queue) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("page %p is unmanaged", m)); - KASSERT(mtx_owned(vm_page_lockptr(m)) || - (m->object == NULL && (m->aflags & PGA_DEQUEUE) != 0), + KASSERT(mtx_owned(vm_page_lockptr(m)) || m->object == NULL, ("missing synchronization for page %p", m)); KASSERT(queue < PQ_COUNT, ("invalid queue %d", queue)); @@ -3284,7 +3288,7 @@ vm_page_dequeue_deferred_free(vm_page_t m) { uint8_t queue; - KASSERT(m->object == NULL, ("page %p has an object reference", m)); + KASSERT(m->ref_count == 0, ("page %p has references", m)); if ((m->aflags & PGA_DEQUEUE) != 0) return; @@ -3440,6 +3444,12 @@ bool vm_page_free_prep(vm_page_t m) { + /* + * Synchronize with threads that have dropped a reference to this + * page. + */ + atomic_thread_fence_acq(); + #if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP) if (PMAP_HAS_DMAP && (m->flags & PG_ZERO) != 0) { uint64_t *p; @@ -3450,11 +3460,10 @@ vm_page_free_prep(vm_page_t m) m, i, (uintmax_t)*p)); } #endif - if ((m->oflags & VPO_UNMANAGED) == 0) { - vm_page_lock_assert(m, MA_OWNED); + if ((m->oflags & VPO_UNMANAGED) == 0) KASSERT(!pmap_page_is_mapped(m), ("vm_page_free_prep: freeing mapped page %p", m)); - } else + else KASSERT(m->queue == PQ_NONE, ("vm_page_free_prep: unmanaged page %p is queued", m)); VM_CNT_INC(v_tfree); @@ -3462,16 +3471,28 @@ vm_page_free_prep(vm_page_t m) if (vm_page_sbusied(m)) panic("vm_page_free_prep: freeing busy page %p", m); - if (m->object != NULL) - (void)vm_page_remove(m); + if (m->object != NULL) { + vm_page_object_remove(m); + + /* + * The object reference can be released without an atomic + * operation. + */ + KASSERT((m->flags & PG_FICTITIOUS) != 0 || + m->ref_count == VPRC_OBJREF, + ("vm_page_free_prep: page %p has unexpected ref_count %u", + m, m->ref_count)); + m->object = NULL; + m->ref_count -= VPRC_OBJREF; + } /* * If fictitious remove object association and * return. */ if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("fictitious page %p is not wired", m)); + KASSERT(m->ref_count == 1, + ("fictitious page %p is referenced", m)); KASSERT(m->queue == PQ_NONE, ("fictitious page %p is queued", m)); return (false); @@ -3488,8 +3509,8 @@ vm_page_free_prep(vm_page_t m) m->valid = 0; vm_page_undirty(m); - if (vm_page_wired(m) != 0) - panic("vm_page_free_prep: freeing wired page %p", m); + if (m->ref_count != 0) + panic("vm_page_free_prep: page %p has references", m); /* * Restore the default memory attribute to the page. @@ -3571,161 +3592,190 @@ vm_page_free_pages_toq(struct spglist *free, bool update_wire_count) } /* - * vm_page_wire: - * - * Mark this page as wired down. If the page is fictitious, then - * its wire count must remain one. - * - * The page must be locked. + * Mark this page as wired down, preventing reclamation by the page daemon + * or when the containing object is destroyed. */ void vm_page_wire(vm_page_t m) { + u_int old; - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_wire: fictitious page %p's wire count isn't one", - m)); - return; - } - if (!vm_page_wired(m)) { - KASSERT((m->oflags & VPO_UNMANAGED) == 0 || - m->queue == PQ_NONE, - ("vm_page_wire: unmanaged page %p is queued", m)); + KASSERT(m->object != NULL, + ("vm_page_wire: page %p does not belong to an object", m)); + if (!vm_page_busied(m)) + VM_OBJECT_ASSERT_LOCKED(m->object); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || + VPRC_WIRE_COUNT(m->ref_count) >= 1, + ("vm_page_wire: fictitious page %p has zero wirings", m)); + + old = atomic_fetchadd_int(&m->ref_count, 1); + KASSERT(VPRC_WIRE_COUNT(old) != VPRC_WIRE_COUNT_MAX, + ("vm_page_wire: counter overflow for page %p", m)); + if (VPRC_WIRE_COUNT(old) == 0) vm_wire_add(1); - } - m->wire_count++; - KASSERT(m->wire_count != 0, ("vm_page_wire: wire_count overflow m=%p", m)); } /* - * vm_page_unwire: - * + * Attempt to wire a mapped page following a pmap lookup of that page. + * This may fail if a thread is concurrently tearing down mappings of the page. + */ +bool +vm_page_wire_mapped(vm_page_t m) +{ + u_int old; + + old = m->ref_count; + do { + KASSERT(old > 0, + ("vm_page_wire_mapped: wiring unreferenced page %p", m)); + if ((old & VPRC_BLOCKED) != 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old + 1)); + + if (VPRC_WIRE_COUNT(old) == 0) + vm_wire_add(1); + return (true); +} + +/* * Release one wiring of the specified page, potentially allowing it to be - * paged out. Returns TRUE if the number of wirings transitions to zero and - * FALSE otherwise. + * paged out. * * Only managed pages belonging to an object can be paged out. If the number * of wirings transitions to zero and the page is eligible for page out, then - * the page is added to the specified paging queue (unless PQ_NONE is - * specified, in which case the page is dequeued if it belongs to a paging - * queue). - * - * If a page is fictitious, then its wire count must always be one. + * the page is added to the specified paging queue. If the released wiring + * represented the last reference to the page, the page is freed. * * A managed page must be locked. */ -bool +void vm_page_unwire(vm_page_t m, uint8_t queue) { - bool unwired; + u_int old; + bool locked; - KASSERT(queue < PQ_COUNT || queue == PQ_NONE, - ("vm_page_unwire: invalid queue %u request for page %p", - queue, m)); - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); + KASSERT(queue < PQ_COUNT, + ("vm_page_unwire: invalid queue %u request for page %p", queue, m)); - unwired = vm_page_unwire_noq(m); - if (!unwired || (m->oflags & VPO_UNMANAGED) != 0 || m->object == NULL) - return (unwired); - - if (vm_page_queue(m) == queue) { - if (queue == PQ_ACTIVE) - vm_page_reference(m); - else if (queue != PQ_NONE) - vm_page_requeue(m); - } else { - vm_page_dequeue(m); - if (queue != PQ_NONE) { - vm_page_enqueue(m, queue); - if (queue == PQ_ACTIVE) - /* Initialize act_count. */ - vm_page_activate(m); - } + if ((m->oflags & VPO_UNMANAGED) != 0) { + if (vm_page_unwire_noq(m) && m->ref_count == 0) + vm_page_free(m); + return; + } + + /* + * Update LRU state before releasing the wiring reference. + * We only need to do this once since we hold the page lock. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + locked = false; + do { + KASSERT(VPRC_WIRE_COUNT(old) > 0, + ("vm_page_unwire: wire count underflow for page %p", m)); + if (!locked && VPRC_WIRE_COUNT(old) == 1) { + vm_page_lock(m); + locked = true; + if (queue == PQ_ACTIVE && vm_page_queue(m) == PQ_ACTIVE) + vm_page_reference(m); + else + vm_page_mvqueue(m, queue); + } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + /* + * Release the lock only after the wiring is released, to ensure that + * the page daemon does not encounter and dequeue the page while it is + * still wired. + */ + if (locked) + vm_page_unlock(m); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); } - return (unwired); } /* - * - * vm_page_unwire_noq: - * * Unwire a page without (re-)inserting it into a page queue. It is up * to the caller to enqueue, requeue, or free the page as appropriate. - * In most cases, vm_page_unwire() should be used instead. + * In most cases involving managed pages, vm_page_unwire() should be used + * instead. */ bool vm_page_unwire_noq(vm_page_t m) { + u_int old; - if ((m->oflags & VPO_UNMANAGED) == 0) - vm_page_assert_locked(m); - if ((m->flags & PG_FICTITIOUS) != 0) { - KASSERT(m->wire_count == 1, - ("vm_page_unwire: fictitious page %p's wire count isn't one", m)); - return (false); - } - if (!vm_page_wired(m)) - panic("vm_page_unwire: page %p's wire count is zero", m); - m->wire_count--; - if (m->wire_count == 0) { - vm_wire_sub(1); - return (true); - } else + old = vm_page_drop(m, 1); + KASSERT(VPRC_WIRE_COUNT(old) != 0, + ("vm_page_unref: counter underflow for page %p", m)); + KASSERT((m->flags & PG_FICTITIOUS) == 0 || VPRC_WIRE_COUNT(old) > 1, + ("vm_page_unref: missing ref on fictitious page %p", m)); + + if (VPRC_WIRE_COUNT(old) > 1) return (false); + vm_wire_sub(1); + return (true); } /* - * vm_page_activate: + * Ensure that the page is in the specified page queue. If the page is + * active or being moved to the active queue, ensure that its act_count is + * at least ACT_INIT but do not otherwise mess with it. Otherwise, ensure that + * the page is at the tail of its page queue. * - * Put the specified page on the active list (if appropriate). - * Ensure that act_count is at least ACT_INIT but do not otherwise - * mess with it. + * The page may be wired. The caller should release its wiring reference + * before releasing the page lock, otherwise the page daemon may immediately + * dequeue the page. * - * The page must be locked. + * A managed page must be locked. + */ +static __always_inline void +vm_page_mvqueue(vm_page_t m, const uint8_t nqueue) +{ + + vm_page_assert_locked(m); + KASSERT((m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_mvqueue: page %p is unmanaged", m)); + + if (vm_page_queue(m) != nqueue) { + vm_page_dequeue(m); + vm_page_enqueue(m, nqueue); + } else if (nqueue != PQ_ACTIVE) { + vm_page_requeue(m); + } + + if (nqueue == PQ_ACTIVE && m->act_count < ACT_INIT) + m->act_count = ACT_INIT; +} + +/* + * Put the specified page on the active list (if appropriate). */ void vm_page_activate(vm_page_t m) { - vm_page_assert_locked(m); - - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - if (vm_page_queue(m) == PQ_ACTIVE) { - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - return; - } - - vm_page_dequeue(m); - if (m->act_count < ACT_INIT) - m->act_count = ACT_INIT; - vm_page_enqueue(m, PQ_ACTIVE); + vm_page_mvqueue(m, PQ_ACTIVE); } /* * Move the specified page to the tail of the inactive queue, or requeue * the page if it is already in the inactive queue. - * - * The page must be locked. */ void vm_page_deactivate(vm_page_t m) { - vm_page_assert_locked(m); - - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - - if (!vm_page_inactive(m)) { - vm_page_dequeue(m); - vm_page_enqueue(m, PQ_INACTIVE); - } else - vm_page_requeue(m); + vm_page_mvqueue(m, PQ_INACTIVE); } /* @@ -3733,18 +3783,13 @@ vm_page_deactivate(vm_page_t m) * bypassing LRU. A marker page is used to maintain FIFO ordering. * As with regular enqueues, we use a per-CPU batch queue to reduce * contention on the page queue lock. - * - * The page must be locked. */ -void -vm_page_deactivate_noreuse(vm_page_t m) +static void +_vm_page_deactivate_noreuse(vm_page_t m) { vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) - return; - if (!vm_page_inactive(m)) { vm_page_dequeue(m); m->queue = PQ_INACTIVE; @@ -3754,31 +3799,31 @@ vm_page_deactivate_noreuse(vm_page_t m) vm_page_pqbatch_submit(m, PQ_INACTIVE); } +void +vm_page_deactivate_noreuse(vm_page_t m) +{ + + KASSERT(m->object != NULL, + ("vm_page_deactivate_noreuse: page %p has no object", m)); + + if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_wired(m)) + _vm_page_deactivate_noreuse(m); +} + /* - * vm_page_launder - * - * Put a page in the laundry, or requeue it if it is already there. + * Put a page in the laundry, or requeue it if it is already there. */ void vm_page_launder(vm_page_t m) { - vm_page_assert_locked(m); - if (vm_page_wired(m) || (m->oflags & VPO_UNMANAGED) != 0) + if ((m->oflags & VPO_UNMANAGED) != 0 || vm_page_wired(m)) return; - - if (vm_page_in_laundry(m)) - vm_page_requeue(m); - else { - vm_page_dequeue(m); - vm_page_enqueue(m, PQ_LAUNDRY); - } + vm_page_mvqueue(m, PQ_LAUNDRY); } /* - * vm_page_unswappable - * - * Put a page in the PQ_UNSWAPPABLE holding queue. + * Put a page in the PQ_UNSWAPPABLE holding queue. */ void vm_page_unswappable(vm_page_t m) @@ -3796,6 +3841,8 @@ static void vm_page_release_toq(vm_page_t m, int flags) { + vm_page_assert_locked(m); + /* * Use a check of the valid bits to determine whether we should * accelerate reclamation of the page. The object lock might not be @@ -3807,11 +3854,11 @@ vm_page_release_toq(vm_page_t m, int flags) * inactive queue so that is reclaimed sooner. */ if ((flags & (VPR_TRYFREE | VPR_NOREUSE)) != 0 || m->valid == 0) - vm_page_deactivate_noreuse(m); + _vm_page_deactivate_noreuse(m); else if (vm_page_active(m)) vm_page_reference(m); else - vm_page_deactivate(m); + vm_page_mvqueue(m, PQ_INACTIVE); } /* @@ -3821,41 +3868,63 @@ void vm_page_release(vm_page_t m, int flags) { vm_object_t object; - bool freed; + u_int old; + bool locked; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release: page %p is unmanaged", m)); - vm_page_lock(m); - if (m->object != NULL) - VM_OBJECT_ASSERT_UNLOCKED(m->object); - if (vm_page_unwire_noq(m)) { - if ((object = m->object) == NULL) { - vm_page_free(m); - } else { - freed = false; - if ((flags & VPR_TRYFREE) != 0 && !vm_page_busied(m) && - /* Depends on type stability. */ - VM_OBJECT_TRYWLOCK(object)) { - /* - * Only free unmapped pages. The busy test from - * before the object was locked cannot be relied - * upon. - */ - if ((object->ref_count == 0 || - !pmap_page_is_mapped(m)) && m->dirty == 0 && - !vm_page_busied(m)) { - vm_page_free(m); - freed = true; - } - VM_OBJECT_WUNLOCK(object); + if ((flags & VPR_TRYFREE) != 0) { + for (;;) { + object = (vm_object_t)atomic_load_ptr(&m->object); + if (object == NULL) + break; + /* Depends on type-stability. */ + if (vm_page_busied(m) || !VM_OBJECT_TRYWLOCK(object)) { + object = NULL; + break; } - - if (!freed) - vm_page_release_toq(m, flags); + if (object == m->object) + break; + VM_OBJECT_WUNLOCK(object); + } + if (__predict_true(object != NULL)) { + vm_page_release_locked(m, flags); + VM_OBJECT_WUNLOCK(object); + return; } } - vm_page_unlock(m); + + /* + * Update LRU state before releasing the wiring reference. + * Use a release store when updating the reference count to + * synchronize with vm_page_free_prep(). + */ + old = m->ref_count; + locked = false; + do { + KASSERT(VPRC_WIRE_COUNT(old) > 0, + ("vm_page_unwire: wire count underflow for page %p", m)); + if (!locked && VPRC_WIRE_COUNT(old) == 1) { + vm_page_lock(m); + locked = true; + vm_page_release_toq(m, flags); + } + } while (!atomic_fcmpset_rel_int(&m->ref_count, &old, old - 1)); + + /* + * Release the lock only after the wiring is released, to ensure that + * the page daemon does not encounter and dequeue the page while it is + * still wired. + */ + if (locked) + vm_page_unlock(m); + + if (VPRC_WIRE_COUNT(old) == 1) { + vm_wire_sub(1); + if (old == 1) + vm_page_free(m); + } } /* See vm_page_release(). */ @@ -3867,17 +3936,70 @@ vm_page_release_locked(vm_page_t m, int flags) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("vm_page_release_locked: page %p is unmanaged", m)); - vm_page_lock(m); if (vm_page_unwire_noq(m)) { if ((flags & VPR_TRYFREE) != 0 && (m->object->ref_count == 0 || !pmap_page_is_mapped(m)) && m->dirty == 0 && !vm_page_busied(m)) { vm_page_free(m); } else { + vm_page_lock(m); vm_page_release_toq(m, flags); + vm_page_unlock(m); } } - vm_page_unlock(m); +} + +static bool +vm_page_try_blocked_op(vm_page_t m, void (*op)(vm_page_t)) +{ + u_int old; + + KASSERT(m->object != NULL && (m->oflags & VPO_UNMANAGED) == 0, + ("vm_page_try_blocked_op: page %p has no object", m)); + KASSERT(!vm_page_busied(m), + ("vm_page_try_blocked_op: page %p is busy", m)); + VM_OBJECT_ASSERT_LOCKED(m->object); + + old = m->ref_count; + do { + KASSERT(old != 0, + ("vm_page_try_blocked_op: page %p has no references", m)); + if (VPRC_WIRE_COUNT(old) != 0) + return (false); + } while (!atomic_fcmpset_int(&m->ref_count, &old, old | VPRC_BLOCKED)); + + (op)(m); + + /* + * If the object is read-locked, new wirings may be created via an + * object lookup. + */ + old = vm_page_drop(m, VPRC_BLOCKED); + KASSERT(!VM_OBJECT_WOWNED(m->object) || + old == (VPRC_BLOCKED | VPRC_OBJREF), + ("vm_page_try_blocked_op: unexpected refcount value %u for %p", + old, m)); + return (true); +} + +/* + * Atomically check for wirings and remove all mappings of the page. + */ +bool +vm_page_try_remove_all(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_all)); +} + +/* + * Atomically check for wirings and remove all writeable mappings of the page. + */ +bool +vm_page_try_remove_write(vm_page_t m) +{ + + return (vm_page_try_blocked_op(m, pmap_remove_write)); } /* @@ -3974,11 +4096,8 @@ retrylookup: VM_OBJECT_WLOCK(object); goto retrylookup; } else { - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4076,11 +4195,8 @@ retrylookup: VM_OBJECT_WLOCK(object); goto retrylookup; } - if ((allocflags & VM_ALLOC_WIRED) != 0) { - vm_page_lock(m); + if ((allocflags & VM_ALLOC_WIRED) != 0) vm_page_wire(m); - vm_page_unlock(m); - } if ((allocflags & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) vm_page_xbusy(m); @@ -4609,10 +4725,10 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) else m = (vm_page_t)addr; db_printf( - "page %p obj %p pidx 0x%jx phys 0x%jx q %d wire %d\n" + "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref %u\n" " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, - m->queue, m->wire_count, m->aflags, m->oflags, + m->queue, m->ref_count, m->aflags, m->oflags, m->flags, m->act_count, m->busy_lock, m->valid, m->dirty); } #endif /* DDB */ diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 2bc3edce785..03833d5189b 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -115,24 +115,23 @@ * the implementation of read-modify-write operations on the * field is encapsulated in vm_page_clear_dirty_mask(). * - * The page structure contains two counters which prevent page reuse. - * Both counters are protected by the page lock (P). The hold - * counter counts transient references obtained via a pmap lookup, and - * is also used to prevent page reclamation in situations where it is - * undesirable to block other accesses to the page. The wire counter - * is used to implement mlock(2) and is non-zero for pages containing - * kernel memory. Pages that are wired or held will not be reclaimed - * or laundered by the page daemon, but are treated differently during - * a page queue scan: held pages remain at their position in the queue, - * while wired pages are removed from the queue and must later be - * re-enqueued appropriately by the unwiring thread. It is legal to - * call vm_page_free() on a held page; doing so causes it to be removed - * from its object and page queue, and the page is released to the - * allocator once the last hold reference is dropped. In contrast, - * wired pages may not be freed. - * - * In some pmap implementations, the wire count of a page table page is - * used to track the number of populated entries. + * The ref_count field tracks references to the page. References that + * prevent the page from being reclaimable are called wirings and are + * counted in the low bits of ref_count. The containing object's + * reference, if one exists, is counted using the VPRC_OBJREF bit in the + * ref_count field. Additionally, the VPRC_BLOCKED bit is used to + * atomically check for wirings and prevent new wirings via + * pmap_extract_and_hold(). When a page belongs to an object, it may be + * wired only when the object is locked, or the page is busy, or by + * pmap_extract_and_hold(). As a result, if the object is locked and the + * page is not busy (or is exclusively busied by the current thread), and + * the page is unmapped, its wire count will not increase. The ref_count + * field is updated using atomic operations in most cases, except when it + * is known that no other references to the page exist, such as in the page + * allocator. A page may be present in the page queues, or even actively + * scanned by the page daemon, without an explicitly counted referenced. + * The page daemon must therefore handle the possibility of a concurrent + * free of the page. * * The busy lock is an embedded reader-writer lock which protects the * page's contents and identity (i.e., its tuple) and @@ -204,11 +203,14 @@ struct vm_page { } memguard; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ - vm_object_t object; /* which object am I in (O,P) */ + vm_object_t object; /* which object am I in (O) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_paddr_t phys_addr; /* physical address of page (C) */ struct md_page md; /* machine dependent stuff */ - u_int wire_count; /* wired down maps refs (P) */ + union { + u_int wire_count; + u_int ref_count; /* page references */ + }; volatile u_int busy_lock; /* busy owners lock */ uint16_t flags; /* page PG_* flags (P) */ uint8_t order; /* index of the buddy queue (F) */ @@ -225,6 +227,26 @@ struct vm_page { vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ }; +/* + * Special bits used in the ref_count field. + * + * ref_count is normally used to count wirings that prevent the page from being + * reclaimed, but also supports several special types of references that do not + * prevent reclamation. Accesses to the ref_count field must be atomic unless + * the page is unallocated. + * + * VPRC_OBJREF is the reference held by the containing object. It can set or + * cleared only when the corresponding object's write lock is held. + * + * VPRC_BLOCKED is used to atomically block wirings via pmap lookups while + * attempting to tear down all mappings of a given page. The page lock and + * object write lock must both be held in order to set or clear this bit. + */ +#define VPRC_BLOCKED 0x40000000u /* mappings are being removed */ +#define VPRC_OBJREF 0x80000000u /* object reference, cleared with (O) */ +#define VPRC_WIRE_COUNT(c) ((c) & ~(VPRC_BLOCKED | VPRC_OBJREF)) +#define VPRC_WIRE_COUNT_MAX (~(VPRC_BLOCKED | VPRC_OBJREF)) + /* * Page flags stored in oflags: * @@ -585,13 +607,16 @@ int vm_page_sleep_if_busy(vm_page_t m, const char *msg); vm_offset_t vm_page_startup(vm_offset_t vaddr); void vm_page_sunbusy(vm_page_t m); void vm_page_swapqueue(vm_page_t m, uint8_t oldq, uint8_t newq); +bool vm_page_try_remove_all(vm_page_t m); +bool vm_page_try_remove_write(vm_page_t m); int vm_page_trysbusy(vm_page_t m); void vm_page_unhold_pages(vm_page_t *ma, int count); void vm_page_unswappable(vm_page_t m); -bool vm_page_unwire(vm_page_t m, uint8_t queue); +void vm_page_unwire(vm_page_t m, uint8_t queue); bool vm_page_unwire_noq(vm_page_t m); void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr); -void vm_page_wire (vm_page_t); +void vm_page_wire(vm_page_t); +bool vm_page_wire_mapped(vm_page_t m); void vm_page_xunbusy_hard(vm_page_t m); void vm_page_xunbusy_maybelocked(vm_page_t m); void vm_page_set_validclean (vm_page_t, int, int); @@ -868,16 +893,36 @@ vm_page_in_laundry(vm_page_t m) return (queue == PQ_LAUNDRY || queue == PQ_UNSWAPPABLE); } +/* + * vm_page_drop: + * + * Release a reference to a page and return the old reference count. + */ +static inline u_int +vm_page_drop(vm_page_t m, u_int val) +{ + + /* + * Synchronize with vm_page_free_prep(): ensure that all updates to the + * page structure are visible before it is freed. + */ + atomic_thread_fence_rel(); + return (atomic_fetchadd_int(&m->ref_count, -val)); +} + /* * vm_page_wired: * - * Return true if a reference prevents the page from being reclaimable. + * Perform a racy check to determine whether a reference prevents the page + * from being reclaimable. If the page's object is locked, and the page is + * unmapped and unbusied or exclusively busied by the current thread, no + * new wirings may be created. */ static inline bool vm_page_wired(vm_page_t m) { - return (m->wire_count > 0); + return (VPRC_WIRE_COUNT(m->ref_count) > 0); } #endif /* _KERNEL */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 19d9ba2240c..c7f03129d07 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -305,7 +305,9 @@ vm_pageout_collect_batch(struct scan_state *ss, const bool dequeue) vm_pagequeue_unlock(pq); } -/* Return the next page to be scanned, or NULL if the scan is complete. */ +/* + * Return the next page to be scanned, or NULL if the scan is complete. + */ static __always_inline vm_page_t vm_pageout_next(struct scan_state *ss, const bool dequeue) { @@ -328,16 +330,11 @@ vm_pageout_cluster(vm_page_t m) vm_pindex_t pindex; int ib, is, page_base, pageout_count; - vm_page_assert_locked(m); object = m->object; VM_OBJECT_ASSERT_WLOCKED(object); pindex = m->pindex; vm_page_assert_unbusied(m); - KASSERT(!vm_page_wired(m), ("page %p is wired", m)); - - pmap_remove_write(m); - vm_page_unlock(m); mc[vm_pageout_page_count] = pb = ps = m; pageout_count = 1; @@ -363,7 +360,8 @@ more: ib = 0; break; } - if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) { + if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) { ib = 0; break; } @@ -373,12 +371,11 @@ more: break; } vm_page_lock(p); - if (vm_page_wired(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); ib = 0; break; } - pmap_remove_write(p); vm_page_unlock(p); mc[--page_base] = pb = p; ++pageout_count; @@ -393,17 +390,17 @@ more: } while (pageout_count < vm_pageout_page_count && pindex + is < object->size) { - if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) + if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p) || + vm_page_wired(p)) break; vm_page_test_dirty(p); if (p->dirty == 0) break; vm_page_lock(p); - if (vm_page_wired(p) || !vm_page_in_laundry(p)) { + if (!vm_page_in_laundry(p) || !vm_page_try_remove_write(p)) { vm_page_unlock(p); break; } - pmap_remove_write(p); vm_page_unlock(p); mc[page_base + pageout_count] = ps = p; ++pageout_count; @@ -648,16 +645,26 @@ vm_pageout_clean(vm_page_t m, int *numpagedout) } /* - * The page may have been busied or referenced while the object - * and page locks were released. + * The page may have been busied while the object and page + * locks were released. */ - if (vm_page_busied(m) || vm_page_wired(m)) { + if (vm_page_busied(m)) { vm_page_unlock(m); error = EBUSY; goto unlock_all; } } + /* + * Remove all writeable mappings, failing if the page is wired. + */ + if (!vm_page_try_remove_write(m)) { + vm_page_unlock(m); + error = EBUSY; + goto unlock_all; + } + vm_page_unlock(m); + /* * If a page is dirty, then it is either being washed * (but not yet cleaned) or it is still in the @@ -732,7 +739,9 @@ scan: recheck: /* * The page may have been disassociated from the queue - * while locks were dropped. + * or even freed while locks were dropped. We thus must be + * careful whenever modifying page state. Once the object lock + * has been acquired, we have a stable reference to the page. */ if (vm_page_queue(m) != queue) continue; @@ -749,7 +758,9 @@ recheck: /* * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); @@ -759,8 +770,13 @@ recheck: if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - object = m->object; - if (!VM_OBJECT_TRYWLOCK(object)) { + + /* + * A page's object pointer may be set to NULL before + * the object lock is acquired. + */ + object = (vm_object_t)atomic_load_ptr(&m->object); + if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); @@ -768,10 +784,30 @@ recheck: goto recheck; } } + if (__predict_false(m->object == NULL)) + /* + * The page has been removed from its object. + */ + continue; + KASSERT(m->object == object, ("page %p does not belong to %p", + m, object)); if (vm_page_busied(m)) continue; + /* + * Re-check for wirings now that we hold the object lock and + * have verified that the page is unbusied. If the page is + * mapped, it may still be wired by pmap lookups. The call to + * vm_page_try_remove_all() below atomically checks for such + * wirings and removes mappings. If the page is unmapped, the + * wire count is guaranteed not to increase. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; + } + /* * Invalid pages can be easily freed. They cannot be * mapped; vm_page_free() asserts this. @@ -839,8 +875,10 @@ recheck: */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* @@ -1132,6 +1170,7 @@ vm_pageout_scan_active(struct vm_domain *vmd, int page_shortage) { struct scan_state ss; struct mtx *mtx; + vm_object_t object; vm_page_t m, marker; struct vm_pagequeue *pq; long min_scan; @@ -1192,7 +1231,9 @@ act_scan: /* * The page may have been disassociated from the queue - * while locks were dropped. + * or even freed while locks were dropped. We thus must be + * careful whenever modifying page state. Once the object lock + * has been acquired, we have a stable reference to the page. */ if (vm_page_queue(m) != PQ_ACTIVE) continue; @@ -1205,6 +1246,17 @@ act_scan: continue; } + /* + * A page's object pointer may be set to NULL before + * the object lock is acquired. + */ + object = (vm_object_t)atomic_load_ptr(&m->object); + if (__predict_false(object == NULL)) + /* + * The page has been removed from its object. + */ + continue; + /* * Check to see "how much" the page has been used. * @@ -1224,7 +1276,7 @@ act_scan: * This race delays the detection of a new reference. At * worst, we will deactivate and reactivate the page. */ - if (m->object->ref_count != 0) + if (object->ref_count != 0) act_delta = pmap_ts_referenced(m); else act_delta = 0; @@ -1400,7 +1452,9 @@ vm_pageout_scan_inactive(struct vm_domain *vmd, int shortage, recheck: /* * The page may have been disassociated from the queue - * while locks were dropped. + * or even freed while locks were dropped. We thus must be + * careful whenever modifying page state. Once the object lock + * has been acquired, we have a stable reference to the page. */ if (vm_page_queue(m) != PQ_INACTIVE) { addl_page_shortage++; @@ -1419,7 +1473,9 @@ recheck: /* * Wired pages may not be freed. Complete their removal * from the queue now to avoid needless revisits during - * future scans. + * future scans. This check is racy and must be reverified once + * we hold the object lock and have verified that the page + * is not busy. */ if (vm_page_wired(m)) { vm_page_dequeue_deferred(m); @@ -1429,8 +1485,13 @@ recheck: if (object != m->object) { if (object != NULL) VM_OBJECT_WUNLOCK(object); - object = m->object; - if (!VM_OBJECT_TRYWLOCK(object)) { + + /* + * A page's object pointer may be set to NULL before + * the object lock is acquired. + */ + object = (vm_object_t)atomic_load_ptr(&m->object); + if (object != NULL && !VM_OBJECT_TRYWLOCK(object)) { mtx_unlock(mtx); /* Depends on type-stability. */ VM_OBJECT_WLOCK(object); @@ -1438,6 +1499,13 @@ recheck: goto recheck; } } + if (__predict_false(m->object == NULL)) + /* + * The page has been removed from its object. + */ + continue; + KASSERT(m->object == object, ("page %p does not belong to %p", + m, object)); if (vm_page_busied(m)) { /* @@ -1452,6 +1520,19 @@ recheck: goto reinsert; } + /* + * Re-check for wirings now that we hold the object lock and + * have verified that the page is unbusied. If the page is + * mapped, it may still be wired by pmap lookups. The call to + * vm_page_try_remove_all() below atomically checks for such + * wirings and removes mappings. If the page is unmapped, the + * wire count is guaranteed not to increase. + */ + if (__predict_false(vm_page_wired(m))) { + vm_page_dequeue_deferred(m); + continue; + } + /* * Invalid pages can be easily freed. They cannot be * mapped, vm_page_free() asserts this. @@ -1508,8 +1589,10 @@ recheck: */ if (object->ref_count != 0) { vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); + if (m->dirty == 0 && !vm_page_try_remove_all(m)) { + vm_page_dequeue_deferred(m); + continue; + } } /* diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c index 42b320fb92d..2557dc6f4e5 100644 --- a/sys/vm/vm_swapout.c +++ b/sys/vm/vm_swapout.c @@ -207,16 +207,20 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, goto unlock_return; if (should_yield()) goto unlock_return; - if (vm_page_busied(p)) + + /* + * The page may acquire a wiring after this check. + * The page daemon handles wired pages, so there is + * no harm done if a wiring appears while we are + * attempting to deactivate the page. + */ + if (vm_page_busied(p) || vm_page_wired(p)) continue; VM_CNT_INC(v_pdpages); - vm_page_lock(p); - if (vm_page_wired(p) || - !pmap_page_exists_quick(pmap, p)) { - vm_page_unlock(p); + if (!pmap_page_exists_quick(pmap, p)) continue; - } act_delta = pmap_ts_referenced(p); + vm_page_lock(p); if ((p->aflags & PGA_REFERENCED) != 0) { if (act_delta == 0) act_delta = 1; @@ -234,7 +238,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { - pmap_remove_all(p); + (void)vm_page_try_remove_all(p); vm_page_deactivate(p); } } else { @@ -244,7 +248,7 @@ vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object, p->act_count += ACT_ADVANCE; } } else if (vm_page_inactive(p)) - pmap_remove_all(p); + (void)vm_page_try_remove_all(p); vm_page_unlock(p); } if ((backing_object = object->backing_object) == NULL) @@ -556,9 +560,7 @@ vm_thread_swapout(struct thread *td) if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); vm_page_dirty(m); - vm_page_lock(m); vm_page_unwire(m, PQ_LAUNDRY); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(ksobj); }