arm64: Support the L3 ATTR_CONTIGUOUS page size in pagesizes[]

Update pagesizes[] to include the L3 ATTR_CONTIGUOUS (L3C) page size,
which is 64KB when the base page size is 4KB and 2MB when the base page
size is 16KB.

Add support for L3C pages to shm_create_largepage().

Add support for creating L3C page mappings to pmap_enter(psind=1).

Add support for reporting L3C page mappings to mincore(2) and
procstat(8).

Update vm_fault_soft_fast() and vm_fault_populate() to handle multiple
superpage sizes.

Declare arm64 as supporting two superpage reservation sizes, and
simulate two superpage reservation sizes, updating the vm_page's psind
field to reflect the correct page size from pagesizes[].  (The next
patch in this series will replace this simulation.  This patch is
already big enough.)

Co-authored-by:	Eliot Solomon <ehs3@rice.edu>
Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D45766
This commit is contained in:
Alan Cox 2024-07-12 02:44:56 -05:00
parent f0d1236f0f
commit 3e00c11a4f
17 changed files with 344 additions and 97 deletions

View file

@ -218,7 +218,7 @@ is 8 bytes on all supported architectures except i386.
.Ss Page Size
.Bl -column -offset indent "Architecture" "Page Sizes"
.It Sy Architecture Ta Sy Page Sizes
.It aarch64 Ta 4K, 2M, 1G
.It aarch64 Ta 4K, 64K, 2M, 1G
.It amd64 Ta 4K, 2M, 1G
.It armv7 Ta 4K, 1M
.It i386 Ta 4K, 2M (PAE), 4M

View file

@ -1631,11 +1631,14 @@ pmap_init(void)
if (superpages_enabled) {
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
("pmap_init: can't assign to pagesizes[1]"));
pagesizes[1] = L2_SIZE;
pagesizes[1] = L3C_SIZE;
KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
("pmap_init: can't assign to pagesizes[2]"));
pagesizes[2] = L2_SIZE;
if (L1_BLOCKS_SUPPORTED) {
KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
("pmap_init: can't assign to pagesizes[2]"));
pagesizes[2] = L1_SIZE;
KASSERT(MAXPAGESIZES > 3 && pagesizes[3] == 0,
("pmap_init: can't assign to pagesizes[3]"));
pagesizes[3] = L1_SIZE;
}
}
@ -4959,7 +4962,7 @@ static int
pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
int psind)
{
pd_entry_t *l0p, *l1p, *l2p, newpte, origpte;
pd_entry_t *l0p, *l1p, *l2p, *l3p, newpte, origpte, *tl3p;
vm_page_t mp;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@ -4973,9 +4976,11 @@ restart:
newpte = pte;
if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte))
return (KERN_PROTECTION_FAILURE);
if (psind == 2) {
if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
KASSERT(pagesizes[psind] == L1_SIZE,
("pagesizes[%d] != L1_SIZE", psind));
l0p = pmap_l0(pmap, va);
if ((pmap_load(l0p) & ATTR_DESCR_VALID) == 0) {
mp = _pmap_alloc_l3(pmap, pmap_l0_pindex(va), NULL);
@ -5005,7 +5010,9 @@ restart:
("va %#lx changing 1G phys page l1 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l1p, newpte);
} else /* (psind == 1) */ {
} else if (psind == 2) {
KASSERT(pagesizes[psind] == L2_SIZE,
("pagesizes[%d] != L2_SIZE", psind));
l2p = pmap_l2(pmap, va);
if (l2p == NULL) {
mp = _pmap_alloc_l3(pmap, pmap_l1_pindex(va), NULL);
@ -5034,6 +5041,40 @@ restart:
("va %#lx changing 2M phys page l2 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l2p, newpte);
} else /* (psind == 1) */ {
KASSERT(pagesizes[psind] == L3C_SIZE,
("pagesizes[%d] != L3C_SIZE", psind));
l2p = pmap_l2(pmap, va);
if (l2p == NULL || (pmap_load(l2p) & ATTR_DESCR_VALID) == 0) {
mp = _pmap_alloc_l3(pmap, pmap_l2_pindex(va), NULL);
if (mp == NULL) {
if ((flags & PMAP_ENTER_NOSLEEP) != 0)
return (KERN_RESOURCE_SHORTAGE);
PMAP_UNLOCK(pmap);
vm_wait(NULL);
PMAP_LOCK(pmap);
goto restart;
}
mp->ref_count += L3C_ENTRIES - 1;
l3p = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp));
l3p = &l3p[pmap_l3_index(va)];
} else {
l3p = pmap_l2_to_l3(l2p, va);
if ((pmap_load(l3p) & ATTR_DESCR_VALID) == 0) {
mp = PTE_TO_VM_PAGE(pmap_load(l2p));
mp->ref_count += L3C_ENTRIES;
}
}
for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
origpte = pmap_load(tl3p);
KASSERT((origpte & ATTR_DESCR_VALID) == 0 ||
((origpte & ATTR_CONTIGUOUS) != 0 &&
PTE_TO_PHYS(origpte) == PTE_TO_PHYS(newpte)),
("va %#lx changing 64K phys page l3 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(tl3p, newpte);
newpte += L3_SIZE;
}
}
dsb(ishst);
@ -5072,7 +5113,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
vm_paddr_t opa, pa;
vm_page_t mpte, om;
bool nosleep;
int lvl, rv;
int full_lvl, lvl, rv;
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
@ -5128,24 +5169,47 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) {
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
new_l3 &= ~L3_PAGE;
if (psind == 2) {
if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
new_l3 &= ~L3_PAGE;
new_l3 |= L1_BLOCK;
} else /* (psind == 1) */
} else if (psind == 2) {
new_l3 &= ~L3_PAGE;
new_l3 |= L2_BLOCK;
} else /* (psind == 1) */
new_l3 |= ATTR_CONTIGUOUS;
rv = pmap_enter_largepage(pmap, va, new_l3, flags, psind);
goto out;
}
if (psind == 1) {
if (psind == 2) {
/* Assert the required virtual and physical alignment. */
KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned"));
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
KASSERT(m->psind > 1, ("pmap_enter: m->psind < psind"));
rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK,
flags, m, &lock);
goto out;
}
mpte = NULL;
if (psind == 1) {
KASSERT((va & L3C_OFFSET) == 0, ("pmap_enter: va unaligned"));
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
rv = pmap_enter_l3c(pmap, va, new_l3 | ATTR_CONTIGUOUS, flags,
m, &mpte, &lock);
#if VM_NRESERVLEVEL > 0
/*
* Attempt L2 promotion, if both the PTP and a level 1
* reservation are fully populated.
*/
if (rv == KERN_SUCCESS &&
(mpte == NULL || mpte->ref_count == NL3PG) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 1) {
pde = pmap_l2(pmap, va);
(void)pmap_promote_l2(pmap, pde, va, mpte, &lock);
}
#endif
goto out;
}
/*
* In the case that a page table page is not
@ -5365,15 +5429,14 @@ validate:
* are aligned with each other and an underlying reservation has the
* neighboring L3 pages allocated. The first condition is simply an
* optimization that recognizes some eventual promotion failures early
* at a lower run-time cost. Then, if both the page table page and
* the reservation are fully populated, attempt L2 promotion.
* at a lower run-time cost. Then, if both a level 1 reservation and
* the PTP are fully populated, attempt L2 promotion.
*/
if ((va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_is_populated(m, L3C_ENTRIES) &&
(full_lvl = vm_reserv_level_iffullpop(m)) >= 0 &&
pmap_promote_l3c(pmap, l3, va) &&
(mpte == NULL || mpte->ref_count == NL3PG) &&
vm_reserv_level_iffullpop(m) == 0)
full_lvl == 1 && (mpte == NULL || mpte->ref_count == NL3PG))
(void)pmap_promote_l2(pmap, pde, va, mpte, &lock);
#endif
@ -5667,6 +5730,8 @@ pmap_enter_l3c(pmap_t pmap, vm_offset_t va, pt_entry_t l3e, u_int flags,
("pmap_enter_l3c: va is not aligned"));
KASSERT(!VA_IS_CLEANMAP(va) || (l3e & ATTR_SW_MANAGED) == 0,
("pmap_enter_l3c: managed mapping within the clean submap"));
KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
("pmap_enter_l3c: l3e is missing ATTR_CONTIGUOUS"));
/*
* If the L3 PTP is not resident, we attempt to create it here.
@ -5873,14 +5938,12 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
m->psind == 1 && pmap_ps_enabled(pmap) &&
m->psind == 2 && pmap_ps_enabled(pmap) &&
((rv = pmap_enter_l2_rx(pmap, va, m, prot, &lock)) ==
KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[L2_SIZE / PAGE_SIZE - 1];
else if ((va & L3C_OFFSET) == 0 && va + L3C_SIZE <= end &&
(VM_PAGE_TO_PHYS(m) & L3C_OFFSET) == 0 &&
vm_reserv_is_populated(m, L3C_ENTRIES) &&
pmap_ps_enabled(pmap) &&
m->psind >= 1 && pmap_ps_enabled(pmap) &&
((rv = pmap_enter_l3c_rx(pmap, va, m, &mpte, prot,
&lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[L3C_ENTRIES - 1];
@ -5932,7 +5995,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
{
pt_entry_t *l1, *l2, *l3, l3_val;
vm_paddr_t pa;
int lvl;
int full_lvl, lvl;
KASSERT(!VA_IS_CLEANMAP(va) ||
(m->oflags & VPO_UNMANAGED) != 0,
@ -6063,18 +6126,17 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
* are aligned with each other and an underlying reservation has the
* neighboring L3 pages allocated. The first condition is simply an
* optimization that recognizes some eventual promotion failures early
* at a lower run-time cost. Then, attempt L2 promotion, if both the
* PTP and the reservation are fully populated.
* at a lower run-time cost. Then, attempt L2 promotion, if both a
* level 1 reservation and the PTP are fully populated.
*/
if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
(va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_is_populated(m, L3C_ENTRIES) &&
(full_lvl = vm_reserv_level_iffullpop(m)) >= 0 &&
pmap_promote_l3c(pmap, l3, va) &&
(mpte == NULL || mpte->ref_count == NL3PG) &&
vm_reserv_level_iffullpop(m) == 0) {
full_lvl == 1 && (mpte == NULL || mpte->ref_count == NL3PG)) {
if (l2 == NULL)
l2 = pmap_pde(pmap, va, &lvl);
l2 = pmap_l2(pmap, va);
/*
* If promotion succeeds, then the next call to this function
@ -8566,7 +8628,7 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
{
pt_entry_t *pte, tpte;
vm_paddr_t mask, pa;
int lvl, val;
int lvl, psind, val;
bool managed;
PMAP_ASSERT_STAGE1(pmap);
@ -8578,21 +8640,22 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap)
switch (lvl) {
case 3:
mask = L3_OFFSET;
psind = (tpte & ATTR_CONTIGUOUS) != 0 ? 1 : 0;
break;
case 2:
mask = L2_OFFSET;
psind = 2;
break;
case 1:
mask = L1_OFFSET;
psind = 3;
break;
default:
panic("pmap_mincore: invalid level %d", lvl);
}
managed = (tpte & ATTR_SW_MANAGED) != 0;
val = MINCORE_INCORE;
if (lvl != 3)
val |= MINCORE_PSIND(3 - lvl);
val = MINCORE_INCORE | MINCORE_PSIND(psind);
if ((managed && pmap_pte_dirty(pmap, tpte)) || (!managed &&
(tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)))
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
@ -9128,18 +9191,37 @@ pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
{
vm_offset_t superpage_offset;
if (size < L2_SIZE)
if (size < L3C_SIZE)
return;
if (object != NULL && (object->flags & OBJ_COLORED) != 0)
offset += ptoa(object->pg_color);
/*
* Considering the object's physical alignment, is the mapping large
* enough to encompass an L2 (2MB/32MB) superpage ...
*/
superpage_offset = offset & L2_OFFSET;
if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
(*addr & L2_OFFSET) == superpage_offset)
if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) >= L2_SIZE) {
/*
* If the virtual and physical alignments differ, then
* increase the virtual address so that the alignments match.
*/
if ((*addr & L2_OFFSET) < superpage_offset)
*addr = (*addr & ~L2_OFFSET) + superpage_offset;
else if ((*addr & L2_OFFSET) > superpage_offset)
*addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) +
superpage_offset;
return;
if ((*addr & L2_OFFSET) < superpage_offset)
*addr = (*addr & ~L2_OFFSET) + superpage_offset;
else
*addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
}
/* ... or an L3C (64KB/2MB) superpage? */
superpage_offset = offset & L3C_OFFSET;
if (size - ((L3C_SIZE - superpage_offset) & L3C_OFFSET) >= L3C_SIZE) {
if ((*addr & L3C_OFFSET) < superpage_offset)
*addr = (*addr & ~L3C_OFFSET) + superpage_offset;
else if ((*addr & L3C_OFFSET) > superpage_offset)
*addr = ((*addr + L3C_OFFSET) & ~L3C_OFFSET) +
superpage_offset;
}
}
/**

View file

@ -97,7 +97,7 @@
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK (PAGE_SIZE - 1)
#define MAXPAGESIZES 3 /* maximum number of supported page sizes */
#define MAXPAGESIZES 4 /* maximum number of supported page sizes */
#ifndef KSTACK_PAGES
#if defined(KASAN) || defined(KMSAN)

View file

@ -114,25 +114,34 @@
#endif
/*
* Enable superpage reservations: 1 level.
* Enable superpage reservations: 2 levels.
*/
#ifndef VM_NRESERVLEVEL
#define VM_NRESERVLEVEL 1
#define VM_NRESERVLEVEL 2
#endif
/*
* Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and
* 2048 pages when PAGE_SIZE is 16KB.
* Level 0 reservations consist of 16 pages when PAGE_SIZE is 4KB, and 128
* pages when PAGE_SIZE is 16KB. Level 1 reservations consist of 32 64KB
* pages when PAGE_SIZE is 4KB, and 16 2M pages when PAGE_SIZE is 16KB.
*/
#ifndef VM_LEVEL_0_ORDER
#if PAGE_SIZE == PAGE_SIZE_4K
#define VM_LEVEL_0_ORDER 9
#ifndef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER 4
#endif
#ifndef VM_LEVEL_1_ORDER
#define VM_LEVEL_1_ORDER 5
#endif
#elif PAGE_SIZE == PAGE_SIZE_16K
#define VM_LEVEL_0_ORDER 11
#ifndef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER 7
#endif
#ifndef VM_LEVEL_1_ORDER
#define VM_LEVEL_1_ORDER 4
#endif
#else
#error Unsupported page size
#endif
#endif
/**
* Address space layout.

View file

@ -1360,8 +1360,12 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
if ((map->flags & MAP_ASLR) != 0) {
maxv1 = maxv / 2 + addr / 2;
error = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
(MAXPAGESIZES > 1 && pagesizes[1] != 0) ?
pagesizes[1] : pagesizes[0], &anon_loc);
#if VM_NRESERVLEVEL > 0
pagesizes[VM_NRESERVLEVEL] != 0 ?
/* Align anon_loc to the largest superpage size. */
pagesizes[VM_NRESERVLEVEL] :
#endif
pagesizes[0], &anon_loc);
if (error != 0)
goto ret;
map->anon_loc = anon_loc;

View file

@ -58,6 +58,8 @@
#include <sys/systm.h>
#include <sys/unistd.h>
#include <vm/vm_param.h>
SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Sysctl internal magic");
SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW | CTLFLAG_CAPRD | CTLFLAG_MPSAFE, 0,
@ -242,7 +244,11 @@ SYSCTL_PROC(_hw, HW_USERMEM, usermem,
SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0,
"Amount of physical memory (in pages)");
u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
#if VM_NRESERVLEVEL > 0
_Static_assert(MAXPAGESIZES > VM_NRESERVLEVEL, "MAXPAGESIZES is too small");
#endif
u_long __read_mostly pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
static int
sysctl_hw_pagesizes(SYSCTL_HANDLER_ARGS)

View file

@ -2542,6 +2542,7 @@ kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
vm_offset_t addr;
vm_paddr_t pa;
vm_pindex_t pi, pi_adv, pindex;
int incore;
*super = false;
*resident_count = 0;
@ -2577,10 +2578,15 @@ kern_proc_vmmap_resident(vm_map_t map, vm_map_entry_t entry,
}
m_adv = NULL;
if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
(addr & (pagesizes[1] - 1)) == 0 &&
(pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
(addr & (pagesizes[1] - 1)) == 0 && (incore =
pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
*super = true;
pi_adv = atop(pagesizes[1]);
/*
* The virtual page might be smaller than the physical
* page, so we use the page size reported by the pmap
* rather than m->psind.
*/
pi_adv = atop(pagesizes[incore >> MINCORE_PSIND_SHIFT]);
} else {
/*
* We do not test the found page on validity.

View file

@ -1589,9 +1589,20 @@ shm_mmap_large(struct shmfd *shmfd, vm_map_t map, vm_offset_t *addr,
if (align == 0) {
align = pagesizes[shmfd->shm_lp_psind];
} else if (align == MAP_ALIGNED_SUPER) {
if (shmfd->shm_lp_psind != 1)
/*
* MAP_ALIGNED_SUPER is only supported on superpage sizes,
* i.e., [1, VM_NRESERVLEVEL]. shmfd->shm_lp_psind < 1 is
* handled above.
*/
if (
#if VM_NRESERVLEVEL > 0
shmfd->shm_lp_psind > VM_NRESERVLEVEL
#else
shmfd->shm_lp_psind > 1
#endif
)
return (EINVAL);
align = pagesizes[1];
align = pagesizes[shmfd->shm_lp_psind];
} else {
align >>= MAP_ALIGNMENT_SHIFT;
align = 1ULL << align;

View file

@ -175,7 +175,9 @@
#define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */
#define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */
#define MINCORE_SUPER 0x60 /* Page is a "super" page */
#define MINCORE_PSIND(i) (((i) << 5) & MINCORE_SUPER) /* Page size */
#define MINCORE_PSIND_SHIFT 5
#define MINCORE_PSIND(i) (((i) << MINCORE_PSIND_SHIFT) & MINCORE_SUPER)
/* Page size */
/*
* Anonymous object constant for shm_open().

View file

@ -77,6 +77,9 @@ vm_domainset_iter_init(struct vm_domainset_iter *di, struct domainset *ds,
* reservation boundary.
*/
pindex += obj->pg_color;
#if VM_NRESERVLEVEL > 1
pindex >>= VM_LEVEL_1_ORDER;
#endif
pindex >>= VM_LEVEL_0_ORDER;
} else
#endif

View file

@ -380,12 +380,10 @@ vm_fault_soft_fast(struct faultstate *fs)
psind = 0;
#if VM_NRESERVLEVEL > 0
if ((m->flags & PG_FICTITIOUS) == 0 &&
(m_super = vm_reserv_to_superpage(m)) != NULL &&
rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start &&
roundup2(vaddr + 1, pagesizes[m_super->psind]) <= fs->entry->end &&
(vaddr & (pagesizes[m_super->psind] - 1)) == (VM_PAGE_TO_PHYS(m) &
(pagesizes[m_super->psind] - 1)) &&
pmap_ps_enabled(fs->map->pmap)) {
(m_super = vm_reserv_to_superpage(m)) != NULL) {
psind = m_super->psind;
KASSERT(psind > 0,
("psind %d of m_super %p < 1", psind, m_super));
flags = PS_ALL_VALID;
if ((fs->prot & VM_PROT_WRITE) != 0) {
/*
@ -398,9 +396,23 @@ vm_fault_soft_fast(struct faultstate *fs)
if ((fs->first_object->flags & OBJ_UNMANAGED) == 0)
flags |= PS_ALL_DIRTY;
}
if (vm_page_ps_test(m_super, flags, m)) {
while (rounddown2(vaddr, pagesizes[psind]) < fs->entry->start ||
roundup2(vaddr + 1, pagesizes[psind]) > fs->entry->end ||
(vaddr & (pagesizes[psind] - 1)) !=
(VM_PAGE_TO_PHYS(m) & (pagesizes[psind] - 1)) ||
!vm_page_ps_test(m_super, psind, flags, m) ||
!pmap_ps_enabled(fs->map->pmap)) {
psind--;
if (psind == 0)
break;
m_super += rounddown2(m - m_super,
atop(pagesizes[psind]));
KASSERT(m_super->psind >= psind,
("psind %d of m_super %p < %d", m_super->psind,
m_super, psind));
}
if (psind > 0) {
m_map = m_super;
psind = m_super->psind;
vaddr = rounddown2(vaddr, pagesizes[psind]);
/* Preset the modified bit for dirty superpages. */
if ((flags & PS_ALL_DIRTY) != 0)
@ -615,10 +627,10 @@ vm_fault_populate(struct faultstate *fs)
vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset;
psind = m->psind;
if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
while (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last ||
!pmap_ps_enabled(fs->map->pmap)))
psind = 0;
psind--;
npages = atop(pagesizes[psind]);
for (i = 0; i < npages; i++) {

View file

@ -106,7 +106,10 @@
#include <machine/cpu.h>
#if VM_NRESERVLEVEL > 0
#if VM_NRESERVLEVEL > 1
#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER + \
PAGE_SHIFT)
#elif VM_NRESERVLEVEL > 0
#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
#else
#define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT)

View file

@ -120,7 +120,10 @@ SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD,
#endif
"Max kernel address");
#if VM_NRESERVLEVEL > 0
#if VM_NRESERVLEVEL > 1
#define KVA_QUANTUM_SHIFT (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER + \
PAGE_SHIFT)
#elif VM_NRESERVLEVEL > 0
#define KVA_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
#else
/* On non-superpage architectures we want large import sizes. */

View file

@ -1993,8 +1993,15 @@ out:
return (result);
}
#if VM_NRESERVLEVEL == 1
static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
#elif VM_NRESERVLEVEL == 2
static const int aslr_pages_rnd_64[3] = {0x1000, 0x1000, 0x10};
static const int aslr_pages_rnd_32[3] = {0x100, 0x100, 0x4};
#else
#error "Unsupported VM_NRESERVLEVEL"
#endif
static int cluster_anon = 1;
SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
@ -2190,9 +2197,23 @@ again:
* Find space for allocation, including
* gap needed for later randomization.
*/
pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
(find_space == VMFS_SUPER_SPACE || find_space ==
VMFS_OPTIMAL_SPACE) ? 1 : 0;
pidx = 0;
#if VM_NRESERVLEVEL > 0
if ((find_space == VMFS_SUPER_SPACE ||
find_space == VMFS_OPTIMAL_SPACE) &&
pagesizes[VM_NRESERVLEVEL] != 0) {
/*
* Do not pointlessly increase the space that
* is requested from vm_map_findspace().
* pmap_align_superpage() will only change a
* mapping's alignment if that mapping is at
* least a superpage in size.
*/
pidx = VM_NRESERVLEVEL;
while (pidx > 0 && length < pagesizes[pidx])
pidx--;
}
#endif
gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
(max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
@ -2656,6 +2677,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
vm_offset_t start;
vm_page_t p, p_start;
vm_pindex_t mask, psize, threshold, tmpidx;
int psind;
if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
return;
@ -2710,13 +2732,17 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
p_start = p;
}
/* Jump ahead if a superpage mapping is possible. */
if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
(pagesizes[p->psind] - 1)) == 0) {
mask = atop(pagesizes[p->psind]) - 1;
if (tmpidx + mask < psize &&
vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
p += mask;
threshold += mask;
for (psind = p->psind; psind > 0; psind--) {
if (((addr + ptoa(tmpidx)) &
(pagesizes[psind] - 1)) == 0) {
mask = atop(pagesizes[psind]) - 1;
if (tmpidx + mask < psize &&
vm_page_ps_test(p, psind,
PS_ALL_VALID, NULL)) {
p += mask;
threshold += mask;
break;
}
}
}
} else if (p_start != NULL) {

View file

@ -5562,7 +5562,7 @@ vm_page_is_valid(vm_page_t m, int base, int size)
* (super)page and false otherwise.
*/
bool
vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m)
vm_page_ps_test(vm_page_t m, int psind, int flags, vm_page_t skip_m)
{
vm_object_t object;
int i, npages;
@ -5571,7 +5571,9 @@ vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m)
if (skip_m != NULL && skip_m->object != object)
return (false);
VM_OBJECT_ASSERT_LOCKED(object);
npages = atop(pagesizes[m->psind]);
KASSERT(psind <= m->psind,
("psind %d > psind %d of m %p", psind, m->psind, m));
npages = atop(pagesizes[psind]);
/*
* The physically contiguous pages that make up a superpage, i.e., a

View file

@ -657,7 +657,7 @@ void vm_page_pqbatch_submit(vm_page_t m, uint8_t queue);
bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old,
vm_page_astate_t new);
vm_page_t vm_page_prev(vm_page_t m);
bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m);
bool vm_page_ps_test(vm_page_t m, int psind, int flags, vm_page_t skip_m);
void vm_page_putfake(vm_page_t m);
void vm_page_readahead_finish(vm_page_t m);
int vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,

View file

@ -77,6 +77,29 @@
#if VM_NRESERVLEVEL > 0
/*
* Temporarily simulate two-level reservations. Effectively, VM_LEVEL_0_* is
* level 1, and VM_SUBLEVEL_0_* is level 0.
*/
#if VM_NRESERVLEVEL == 2
#undef VM_NRESERVLEVEL
#define VM_NRESERVLEVEL 1
#if VM_LEVEL_0_ORDER == 4
#undef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER (4 + VM_LEVEL_1_ORDER)
#define VM_SUBLEVEL_0_NPAGES (1 << 4)
#elif VM_LEVEL_0_ORDER == 7
#undef VM_LEVEL_0_ORDER
#define VM_LEVEL_0_ORDER (7 + VM_LEVEL_1_ORDER)
#define VM_SUBLEVEL_0_NPAGES (1 << 7)
#else
#error "Unsupported level 0 reservation size"
#endif
#define VM_LEVEL_0_PSIND 2
#else
#define VM_LEVEL_0_PSIND 1
#endif
#ifndef VM_LEVEL_0_ORDER_MAX
#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER
#endif
@ -381,6 +404,27 @@ vm_reserv_insert(vm_reserv_t rv, vm_object_t object, vm_pindex_t pindex)
vm_reserv_object_unlock(object);
}
#ifdef VM_SUBLEVEL_0_NPAGES
static inline bool
vm_reserv_is_sublevel_full(vm_reserv_t rv, int index)
{
_Static_assert(VM_SUBLEVEL_0_NPAGES == 16 ||
VM_SUBLEVEL_0_NPAGES == 128,
"vm_reserv_is_sublevel_full: unsupported VM_SUBLEVEL_0_NPAGES");
/* An equivalent bit_ntest() compiles to more instructions. */
switch (VM_SUBLEVEL_0_NPAGES) {
case 16:
return (((uint16_t *)rv->popmap)[index / 16] == UINT16_MAX);
case 128:
index = rounddown2(index, 128) / 64;
return (((uint64_t *)rv->popmap)[index] == UINT64_MAX &&
((uint64_t *)rv->popmap)[index + 1] == UINT64_MAX);
default:
__unreachable();
}
}
#endif
/*
* Reduces the given reservation's population count. If the population count
* becomes zero, the reservation is destroyed. Additionally, moves the
@ -406,11 +450,15 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
rv, rv->domain));
if (rv->popcnt == VM_LEVEL_0_NPAGES) {
KASSERT(rv->pages->psind == 1,
KASSERT(rv->pages->psind == VM_LEVEL_0_PSIND,
("vm_reserv_depopulate: reserv %p is already demoted",
rv));
rv->pages->psind = 0;
rv->pages->psind = VM_LEVEL_0_PSIND - 1;
}
#ifdef VM_SUBLEVEL_0_NPAGES
if (vm_reserv_is_sublevel_full(rv, index))
rv->pages[rounddown2(index, VM_SUBLEVEL_0_NPAGES)].psind = 0;
#endif
bit_clear(rv->popmap, index);
rv->popcnt--;
if ((unsigned)(ticks - rv->lasttick) >= PARTPOPSLOP ||
@ -522,12 +570,17 @@ vm_reserv_populate(vm_reserv_t rv, int index)
index));
KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
("vm_reserv_populate: reserv %p is already full", rv));
KASSERT(rv->pages->psind == 0,
KASSERT(rv->pages->psind >= 0 &&
rv->pages->psind < VM_LEVEL_0_PSIND,
("vm_reserv_populate: reserv %p is already promoted", rv));
KASSERT(rv->domain < vm_ndomains,
("vm_reserv_populate: reserv %p's domain is corrupted %d",
rv, rv->domain));
bit_set(rv->popmap, index);
#ifdef VM_SUBLEVEL_0_NPAGES
if (vm_reserv_is_sublevel_full(rv, index))
rv->pages[rounddown2(index, VM_SUBLEVEL_0_NPAGES)].psind = 1;
#endif
rv->popcnt++;
if ((unsigned)(ticks - rv->lasttick) < PARTPOPSLOP &&
rv->inpartpopq && rv->popcnt != VM_LEVEL_0_NPAGES)
@ -542,10 +595,10 @@ vm_reserv_populate(vm_reserv_t rv, int index)
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvd[rv->domain].partpop, rv, partpopq);
} else {
KASSERT(rv->pages->psind == 0,
KASSERT(rv->pages->psind == VM_LEVEL_0_PSIND - 1,
("vm_reserv_populate: reserv %p is already promoted",
rv));
rv->pages->psind = 1;
rv->pages->psind = VM_LEVEL_0_PSIND;
}
vm_reserv_domain_unlock(rv->domain);
}
@ -889,13 +942,18 @@ out:
static void
vm_reserv_break(vm_reserv_t rv)
{
vm_page_t m;
int hi, lo, pos;
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
vm_reserv_remove(rv);
rv->pages->psind = 0;
m = rv->pages;
#ifdef VM_SUBLEVEL_0_NPAGES
for (; m < rv->pages + VM_LEVEL_0_NPAGES; m += VM_SUBLEVEL_0_NPAGES)
#endif
m->psind = 0;
hi = lo = -1;
pos = 0;
for (;;) {
@ -1089,7 +1147,11 @@ vm_reserv_level(vm_page_t m)
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
#ifdef VM_SUBLEVEL_0_NPAGES
return (rv->object != NULL ? 1 : -1);
#else
return (rv->object != NULL ? 0 : -1);
#endif
}
/*
@ -1102,7 +1164,15 @@ vm_reserv_level_iffullpop(vm_page_t m)
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1);
if (rv->popcnt == VM_LEVEL_0_NPAGES) {
#ifdef VM_SUBLEVEL_0_NPAGES
return (1);
} else if (rv->pages != NULL &&
vm_reserv_is_sublevel_full(rv, m - rv->pages)) {
#endif
return (0);
}
return (-1);
}
/*
@ -1357,6 +1427,10 @@ vm_reserv_size(int level)
switch (level) {
case 0:
#ifdef VM_SUBLEVEL_0_NPAGES
return (VM_SUBLEVEL_0_NPAGES * PAGE_SIZE);
case 1:
#endif
return (VM_LEVEL_0_SIZE);
case -1:
return (PAGE_SIZE);
@ -1432,12 +1506,16 @@ vm_reserv_to_superpage(vm_page_t m)
VM_OBJECT_ASSERT_LOCKED(m->object);
rv = vm_reserv_from_page(m);
if (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES)
m = rv->pages;
else
m = NULL;
return (m);
if (rv->object == m->object) {
if (rv->popcnt == VM_LEVEL_0_NPAGES)
return (rv->pages);
#ifdef VM_SUBLEVEL_0_NPAGES
if (vm_reserv_is_sublevel_full(rv, m - rv->pages))
return (rv->pages + rounddown2(m - rv->pages,
VM_SUBLEVEL_0_NPAGES));
#endif
}
return (NULL);
}
#endif /* VM_NRESERVLEVEL > 0 */