This is xnu-11215.1.10. See this file in:
/*
* Copyright (c) 2000-2022 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include <mach_assert.h>
#include <vm/pmap.h>
#include <vm/vm_map_xnu.h>
#include <vm/vm_kern_xnu.h>
#include <vm/vm_page_internal.h>
#include <kern/ledger.h>
#include <kern/zalloc_internal.h>
#include <i386/pmap_internal.h>
void pmap_remove_range(
pmap_t pmap,
vm_map_offset_t va,
pt_entry_t *spte,
pt_entry_t *epte);
static void pmap_remove_range_options(
pmap_t pmap,
vm_map_offset_t va,
pt_entry_t *spte,
pt_entry_t *epte,
int options);
void pmap_reusable_range(
pmap_t pmap,
vm_map_offset_t va,
pt_entry_t *spte,
pt_entry_t *epte,
boolean_t reusable);
pt_entry_t *PTE_corrupted_ptr;
#if DEVELOPMENT || DEBUG
int pmap_inject_pte_corruption;
uint32_t pmap_update_clear_pte_count;
uint32_t pmap_update_invalid_pte_count;
#endif
/*
* The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
* on a NBPDE boundary.
*/
uint64_t
pmap_shared_region_size_min(__unused pmap_t pmap)
{
return NBPDE;
}
uint64_t
pmap_commpage_size_min(__unused pmap_t pmap)
{
return NBPDE;
}
/*
* kern_return_t pmap_nest(grand, subord, va_start, size)
*
* grand = the pmap that we will nest subord into
* subord = the pmap that goes into the grand
* va_start = start of range in pmap to be inserted
* size = Size of nest area (up to 16TB)
*
* Inserts a pmap into another. This is used to implement shared segments.
*
* Note that we depend upon higher level VM locks to insure that things don't change while
* we are doing this. For example, VM should not be doing any pmap enters while it is nesting
* or do 2 nests at once.
*/
/*
* This routine can nest subtrees either at the PDPT level (1GiB) or at the
* PDE level (2MiB). We currently disallow disparate offsets for the "subord"
* container and the "grand" parent. A minor optimization to consider for the
* future: make the "subord" truly a container rather than a full-fledged
* pagetable hierarchy which can be unnecessarily sparse (DRK).
*/
kern_return_t
pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, uint64_t size)
{
vm_map_offset_t vaddr;
pd_entry_t *pde, *npde;
unsigned int i;
uint64_t num_pde;
assert(!is_ept_pmap(grand));
assert(!is_ept_pmap(subord));
if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
(va_start & (pmap_shared_region_size_min(grand) - 1)) ||
((size >> 28) > 65536)) { /* Max size we can nest is 16TB */
return KERN_INVALID_VALUE;
}
if (size == 0) {
panic("pmap_nest: size is invalid - %016llX", size);
}
PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
VM_KERNEL_ADDRHIDE(va_start));
vaddr = (vm_map_offset_t)va_start;
num_pde = size >> PDESHIFT;
PMAP_LOCK_EXCLUSIVE(subord);
subord->pm_shared = TRUE;
for (i = 0; i < num_pde;) {
if (((vaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG) {
npde = pmap64_pdpt(subord, vaddr);
while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
PMAP_UNLOCK_EXCLUSIVE(subord);
pmap_expand_pdpt(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
PMAP_LOCK_EXCLUSIVE(subord);
npde = pmap64_pdpt(subord, vaddr);
}
*npde |= INTEL_PDPTE_NESTED;
vaddr += NBPDPT;
i += (uint32_t)NPDEPG;
} else {
npde = pmap_pde(subord, vaddr);
while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
PMAP_UNLOCK_EXCLUSIVE(subord);
pmap_expand(subord, vaddr, PMAP_EXPAND_OPTIONS_NONE);
PMAP_LOCK_EXCLUSIVE(subord);
npde = pmap_pde(subord, vaddr);
}
vaddr += NBPDE;
i++;
}
}
PMAP_UNLOCK_EXCLUSIVE(subord);
vaddr = (vm_map_offset_t)va_start;
PMAP_LOCK_EXCLUSIVE(grand);
for (i = 0; i < num_pde;) {
pd_entry_t tpde;
if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG)) {
npde = pmap64_pdpt(subord, vaddr);
if (npde == 0) {
panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
}
tpde = *npde;
pde = pmap64_pdpt(grand, vaddr);
if (0 == pde) {
PMAP_UNLOCK_EXCLUSIVE(grand);
pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
PMAP_LOCK_EXCLUSIVE(grand);
pde = pmap64_pdpt(grand, vaddr);
}
if (pde == 0) {
panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr);
}
pmap_store_pte(FALSE, pde, tpde);
vaddr += NBPDPT;
i += (uint32_t) NPDEPG;
} else {
npde = pmap_pde(subord, vaddr);
if (npde == 0) {
panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
}
tpde = *npde;
pde = pmap_pde(grand, vaddr);
if (0 == pde) {
PMAP_UNLOCK_EXCLUSIVE(grand);
pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
PMAP_LOCK_EXCLUSIVE(grand);
pde = pmap_pde(grand, vaddr);
}
if (pde == 0) {
panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
}
vaddr += NBPDE;
pmap_store_pte(FALSE, pde, tpde);
i++;
}
}
PMAP_UNLOCK_EXCLUSIVE(grand);
PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
return KERN_SUCCESS;
}
/*
* kern_return_t pmap_unnest(grand, vaddr)
*
* grand = the pmap that we will un-nest subord from
* vaddr = start of range in pmap to be unnested
*
* Removes a pmap from another. This is used to implement shared segments.
*/
kern_return_t
pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size)
{
pd_entry_t *pde;
unsigned int i;
uint64_t num_pde;
addr64_t va_start, va_end;
uint64_t npdpt = PMAP_INVALID_PDPTNUM;
PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
if ((size & (pmap_shared_region_size_min(grand) - 1)) ||
(vaddr & (pmap_shared_region_size_min(grand) - 1))) {
panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...",
grand, vaddr, size);
}
assert(!is_ept_pmap(grand));
/* align everything to PDE boundaries */
va_start = vaddr & ~(NBPDE - 1);
if (os_add_overflow(vaddr, size + NBPDE - 1, &va_end)) {
panic("pmap_unnest: Overflow when calculating range end: s=0x%llx sz=0x%llx\n", vaddr, size);
}
va_end &= ~(NBPDE - 1);
size = va_end - va_start;
PMAP_LOCK_EXCLUSIVE(grand);
num_pde = size >> PDESHIFT;
vaddr = va_start;
for (i = 0; i < num_pde;) {
if (pdptnum(grand, vaddr) != npdpt) {
npdpt = pdptnum(grand, vaddr);
pde = pmap64_pdpt(grand, vaddr);
if (pde && (*pde & INTEL_PDPTE_NESTED)) {
pmap_store_pte(FALSE, pde, (pd_entry_t)0);
i += (uint32_t) NPDEPG;
vaddr += NBPDPT;
continue;
}
}
pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
if (pde == 0) {
panic("pmap_unnest: no pde, grand %p vaddr 0x%llx", grand, vaddr);
}
pmap_store_pte(FALSE, pde, (pd_entry_t)0);
i++;
vaddr += NBPDE;
}
PMAP_UPDATE_TLBS(grand, va_start, va_end);
PMAP_UNLOCK_EXCLUSIVE(grand);
PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
return KERN_SUCCESS;
}
kern_return_t
pmap_unnest_options(
pmap_t grand,
addr64_t vaddr,
__unused uint64_t size,
__unused unsigned int options)
{
return pmap_unnest(grand, vaddr, size);
}
/* Invoked by the Mach VM to determine the platform specific unnest region */
boolean_t
pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e)
{
pd_entry_t *pdpte;
boolean_t rval = FALSE;
PMAP_LOCK_EXCLUSIVE(p);
pdpte = pmap64_pdpt(p, *s);
if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
*s &= ~(NBPDPT - 1);
rval = TRUE;
}
pdpte = pmap64_pdpt(p, *e);
if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
*e = ((*e + NBPDPT) & ~(NBPDPT - 1));
rval = TRUE;
}
PMAP_UNLOCK_EXCLUSIVE(p);
return rval;
}
pmap_paddr_t
pmap_find_pa(pmap_t pmap, addr64_t va)
{
pt_entry_t *ptp;
pd_entry_t *pdep;
pd_entry_t pde;
pt_entry_t pte;
boolean_t is_ept, locked = FALSE;
pmap_paddr_t pa = 0;
is_ept = is_ept_pmap(pmap);
if ((pmap != kernel_pmap) && not_in_kdp) {
PMAP_LOCK_EXCLUSIVE(pmap);
locked = TRUE;
} else {
mp_disable_preemption();
}
if (os_ref_get_count(&pmap->ref_count) == 0) {
goto pfp_exit;
}
pdep = pmap_pde(pmap, va);
if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
if (pde & PTE_PS) {
pa = pte_to_pa(pde) + (va & I386_LPGMASK);
} else {
ptp = pmap_pte(pmap, va);
if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
pa = pte_to_pa(pte) + (va & PAGE_MASK);
}
}
}
pfp_exit:
if (locked) {
PMAP_UNLOCK_EXCLUSIVE(pmap);
} else {
mp_enable_preemption();
}
return pa;
}
/*
* pmap_find_phys returns the (4K) physical page number containing a
* given virtual address in a given pmap.
* Note that pmap_pte may return a pde if this virtual address is
* mapped by a large page and this is taken into account in order
* to return the correct page number in this case.
*/
ppnum_t
pmap_find_phys(pmap_t pmap, addr64_t va)
{
ppnum_t ppn = 0;
pmap_paddr_t pa = 0;
pa = pmap_find_pa(pmap, va);
ppn = (ppnum_t) i386_btop(pa);
return ppn;
}
ppnum_t
pmap_find_phys_nofault(pmap_t pmap, addr64_t va)
{
if ((pmap == kernel_pmap) ||
((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map)))) {
return pmap_find_phys(pmap, va);
}
return 0;
}
/*
* pmap_get_prot returns the equivalent Vm page protections
* set on a given address, 'va'. This function is used in the
* ml_static_verify_page_protections() routine which is used
* by the kext loading code to validate that the TEXT segment
* of a kext is mapped executable.
*/
kern_return_t
pmap_get_prot(pmap_t pmap, addr64_t va, vm_prot_t *protp)
{
pt_entry_t *ptp;
pd_entry_t *pdep;
pd_entry_t pde;
pt_entry_t pte;
boolean_t is_ept, locked = FALSE;
kern_return_t retval = KERN_FAILURE;
vm_prot_t prot = 0;
is_ept = is_ept_pmap(pmap);
if ((pmap != kernel_pmap) && not_in_kdp) {
PMAP_LOCK_EXCLUSIVE(pmap);
locked = TRUE;
} else {
mp_disable_preemption();
}
if (os_ref_get_count(&pmap->ref_count) == 0) {
goto pfp_exit;
}
pdep = pmap_pde(pmap, va);
if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
if (pde & PTE_PS) {
prot = VM_PROT_READ;
if (pde & PTE_WRITE(is_ept)) {
prot |= VM_PROT_WRITE;
}
if (PTE_IS_EXECUTABLE(is_ept, pde)) {
prot |= VM_PROT_EXECUTE;
}
retval = KERN_SUCCESS;
} else {
ptp = pmap_pte(pmap, va);
if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
prot = VM_PROT_READ;
if (pte & PTE_WRITE(is_ept)) {
prot |= VM_PROT_WRITE;
}
if (PTE_IS_EXECUTABLE(is_ept, pte)) {
prot |= VM_PROT_EXECUTE;
}
retval = KERN_SUCCESS;
}
}
}
pfp_exit:
if (locked) {
PMAP_UNLOCK_EXCLUSIVE(pmap);
} else {
mp_enable_preemption();
}
if (protp) {
*protp = prot;
}
return retval;
}
/*
* Update cache attributes for all extant managed mappings.
* Assumes PV for this page is locked, and that the page
* is managed. We assume that this physical page may be mapped in
* both EPT and normal Intel PTEs, so we convert the attributes
* to the corresponding format for each pmap.
*
* We assert that the passed set of attributes is a subset of the
* PHYS_CACHEABILITY_MASK.
*/
void
pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes)
{
pv_rooted_entry_t pv_h, pv_e;
pv_hashed_entry_t pvh_e, nexth;
vm_map_offset_t vaddr;
pmap_t pmap;
pt_entry_t *ptep;
boolean_t is_ept;
unsigned ept_attributes;
assert(IS_MANAGED_PAGE(pn));
assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
/* We don't support the PAT bit for EPT PTEs */
if (attributes & INTEL_PTE_NCACHE) {
ept_attributes = INTEL_EPT_NCACHE;
} else {
ept_attributes = INTEL_EPT_WB;
}
pv_h = pai_to_pvh(pn);
/* TODO: translate the PHYS_* bits to PTE bits, while they're
* currently identical, they may not remain so
* Potential optimization (here and in page_protect),
* parallel shootdowns, check for redundant
* attribute modifications.
*/
/*
* Alter attributes on all mappings
*/
if (pv_h->pmap != PMAP_NULL) {
pv_e = pv_h;
pvh_e = (pv_hashed_entry_t)pv_e;
do {
pmap = pv_e->pmap;
vaddr = PVE_VA(pv_e);
ptep = pmap_pte(pmap, vaddr);
if (0 == ptep) {
panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
}
is_ept = is_ept_pmap(pmap);
nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
if (!is_ept) {
pmap_update_pte(is_ept, ptep, PHYS_CACHEABILITY_MASK, attributes, true);
} else {
pmap_update_pte(is_ept, ptep, INTEL_EPT_CACHE_MASK, ept_attributes, true);
}
PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
pvh_e = nexth;
} while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
}
}
void
x86_filter_TLB_coherency_interrupts(boolean_t dofilter)
{
assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
if (dofilter) {
CPU_CR3_MARK_INACTIVE();
} else {
CPU_CR3_MARK_ACTIVE();
mfence();
pmap_update_interrupt();
}
}
/*
* Insert the given physical page (p) at
* the specified virtual address (v) in the
* target physical map with the protection requested.
*
* If specified, the page will be wired down, meaning
* that the related pte cannot be reclaimed.
*
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*/
kern_return_t
pmap_enter(
pmap_t pmap,
vm_map_offset_t vaddr,
ppnum_t pn,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
boolean_t wired,
pmap_mapping_type_t mapping_type)
{
return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL, mapping_type);
}
#define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
static inline void PTE_LOCK_LOCK(pt_entry_t *);
static inline void PTE_LOCK_UNLOCK(pt_entry_t *);
void
PTE_LOCK_LOCK(pt_entry_t *lpte)
{
pt_entry_t pte;
plretry:
while ((pte = __c11_atomic_load((_Atomic pt_entry_t *)lpte, memory_order_relaxed)) & PTE_LOCK(0)) {
__builtin_ia32_pause();
}
if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t *)lpte, &pte, pte | PTE_LOCK(0), memory_order_acquire_smp, TRUE)) {
return;
}
goto plretry;
}
void
PTE_LOCK_UNLOCK(pt_entry_t *lpte)
{
__c11_atomic_fetch_and((_Atomic pt_entry_t *)lpte, ~PTE_LOCK(0), memory_order_release_smp);
}
kern_return_t
pmap_enter_options_addr(
pmap_t pmap,
vm_map_address_t v,
pmap_paddr_t pa,
vm_prot_t prot,
vm_prot_t fault_type,
unsigned int flags,
boolean_t wired,
unsigned int options,
__unused void *arg,
pmap_mapping_type_t mapping_type)
{
return pmap_enter_options(pmap, v, intel_btop(pa), prot, fault_type, flags, wired, options, arg, mapping_type);
}
kern_return_t
pmap_enter_options(
pmap_t pmap,
vm_map_offset_t vaddr,
ppnum_t pn,
vm_prot_t prot,
__unused vm_prot_t fault_type,
unsigned int flags,
boolean_t wired,
unsigned int options,
void *arg,
__unused pmap_mapping_type_t mapping_type)
{
pt_entry_t *pte = NULL;
pv_rooted_entry_t pv_h;
ppnum_t pai;
pv_hashed_entry_t pvh_e;
pv_hashed_entry_t pvh_new;
pt_entry_t template;
pmap_paddr_t old_pa;
pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
boolean_t need_tlbflush = FALSE;
boolean_t set_NX;
char oattr;
boolean_t old_pa_locked;
/* 2MiB mappings are confined to x86_64 by VM */
boolean_t superpage = flags & VM_MEM_SUPERPAGE;
vm_object_t delpage_pm_obj = NULL;
uint64_t delpage_pde_index = 0;
pt_entry_t old_pte;
kern_return_t kr = KERN_FAILURE;
boolean_t is_ept;
boolean_t is_altacct;
boolean_t ptelocked = FALSE;
pmap_intr_assert();
if (__improbable(pmap == PMAP_NULL)) {
return KERN_INVALID_ARGUMENT;
}
if (__improbable(pn == vm_page_guard_addr)) {
return KERN_INVALID_ARGUMENT;
}
is_ept = is_ept_pmap(pmap);
/* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
* unused value for that scenario.
*/
assert(pn != vm_page_fictitious_addr);
PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
prot);
if ((prot & VM_PROT_EXECUTE) || __improbable(is_ept && (prot & VM_PROT_UEXEC))) {
set_NX = FALSE;
} else {
set_NX = TRUE;
}
#if DEVELOPMENT || DEBUG
if (__improbable(set_NX && (!nx_enabled || !pmap->nx_enabled))) {
set_NX = FALSE;
}
if (__improbable(set_NX && (pmap == kernel_pmap) &&
((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) ||
(pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
set_NX = FALSE;
}
#endif
pvh_new = PV_HASHED_ENTRY_NULL;
Retry:
pvh_e = PV_HASHED_ENTRY_NULL;
PMAP_LOCK_SHARED(pmap);
/*
* Expand pmap to include this pte. Assume that
* pmap is always expanded to include enough hardware
* pages to map one VM page.
*/
if (__improbable(superpage)) {
while ((pte = pmap_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
/* need room for another pde entry */
PMAP_UNLOCK_SHARED(pmap);
kr = pmap_expand_pdpt(pmap, vaddr, options);
if (kr != KERN_SUCCESS) {
goto done1;
}
PMAP_LOCK_SHARED(pmap);
}
} else {
while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
/*
* Must unlock to expand the pmap
* going to grow pde level page(s)
*/
PMAP_UNLOCK_SHARED(pmap);
kr = pmap_expand(pmap, vaddr, options);
if (kr != KERN_SUCCESS) {
goto done1;
}
PMAP_LOCK_SHARED(pmap);
}
}
if (__improbable(options & PMAP_EXPAND_OPTIONS_NOENTER)) {
PMAP_UNLOCK_SHARED(pmap);
kr = KERN_SUCCESS;
goto done1;
}
if (__improbable(superpage && *pte && !(*pte & PTE_PS))) {
/*
* There is still an empty page table mapped that
* was used for a previous base page mapping.
* Remember the PDE and the PDE index, so that we
* can free the page at the end of this function.
*/
delpage_pde_index = pdeidx(pmap, vaddr);
delpage_pm_obj = pmap->pm_obj;
pmap_store_pte(is_ept, pte, 0);
}
PTE_LOCK_LOCK(pte);
ptelocked = TRUE;
old_pa = pte_to_pa(*pte);
pai = pa_index(old_pa);
old_pa_locked = FALSE;
if (old_pa == 0 &&
PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr)) {
/*
* "pmap" should be locked at this point, so this should
* not race with another pmap_enter() or pmap_remove_range().
*/
assert(pmap != kernel_pmap);
/* one less "compressed" */
pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
PAGE_SIZE);
if (*pte & PTE_COMPRESSED_ALT) {
pmap_ledger_debit(
pmap,
task_ledgers.alternate_accounting_compressed,
PAGE_SIZE);
} else {
/* was part of the footprint */
pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
PAGE_SIZE);
}
/* marker will be cleared below */
}
/*
* if we have a previous managed page, lock the pv entry now. after
* we lock it, check to see if someone beat us to the lock and if so
* drop the lock
*/
if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
LOCK_PVH(pai);
old_pa_locked = TRUE;
old_pa = pte_to_pa(*pte);
if (0 == old_pa) {
UNLOCK_PVH(pai); /* another path beat us to it */
old_pa_locked = FALSE;
}
}
/*
* Special case if the incoming physical page is already mapped
* at this address.
*/
if (old_pa == pa) {
pt_entry_t old_attributes =
*pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept) | PTE_LOCK(is_ept));
/*
* May be changing its wired attribute or protection
*/
template = pa_to_pte(pa);
if (__probable(!is_ept)) {
template |= INTEL_PTE_VALID;
} else {
template |= INTEL_EPT_IPAT;
}
template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
/*
* We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
*/
if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
if (!(flags & VM_MEM_GUARDED)) {
template |= INTEL_PTE_PAT;
}
template |= INTEL_PTE_NCACHE;
}
if (pmap != kernel_pmap && !is_ept) {
template |= INTEL_PTE_USER;
}
if (prot & VM_PROT_READ) {
template |= PTE_READ(is_ept);
}
if (prot & VM_PROT_WRITE) {
template |= PTE_WRITE(is_ept);
if (is_ept && !pmap_ept_support_ad) {
template |= PTE_MOD(is_ept);
if (old_pa_locked) {
assert(IS_MANAGED_PAGE(pai));
pmap_phys_attributes[pai] |= PHYS_MODIFIED;
}
}
}
if (prot & VM_PROT_EXECUTE) {
assert(set_NX == 0);
template = pte_set_ex(template, is_ept);
}
if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
assert(set_NX == 0);
template = pte_set_uex(template);
}
if (set_NX) {
template = pte_remove_ex(template, is_ept);
}
if (wired) {
template |= PTE_WIRED;
if (!iswired(old_attributes)) {
pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
}
} else {
if (iswired(old_attributes)) {
pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
}
}
if (superpage) { /* this path can not be used */
template |= PTE_PS; /* to change the page size! */
}
if (old_attributes == template) {
goto dont_update_pte;
}
/* Determine delta, PV locked */
need_tlbflush =
((old_attributes ^ template) != PTE_WIRED);
/* Optimisation: avoid TLB flush when adding writability */
if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
if ((old_attributes ^ template) == PTE_WRITE(is_ept)) {
need_tlbflush = FALSE;
}
}
/* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
if (__improbable(is_ept && !pmap_ept_support_ad)) {
template |= PTE_REF(is_ept);
if (old_pa_locked) {
assert(IS_MANAGED_PAGE(pai));
pmap_phys_attributes[pai] |= PHYS_REFERENCED;
}
}
/* store modified PTE and preserve RC bits */
pt_entry_t npte, opte;
assert((*pte & PTE_LOCK(is_ept)) != 0);
do {
opte = *pte;
npte = template | (opte & (PTE_REF(is_ept) |
PTE_MOD(is_ept))) | PTE_LOCK(is_ept);
} while (!pmap_cmpx_pte(pte, opte, npte));
DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, opte, uint64_t, npte);
dont_update_pte:
if (old_pa_locked) {
UNLOCK_PVH(pai);
old_pa_locked = FALSE;
}
goto done2;
}
/*
* Outline of code from here:
* 1) If va was mapped, update TLBs, remove the mapping
* and remove old pvlist entry.
* 2) Add pvlist entry for new mapping
* 3) Enter new mapping.
*
* If the old physical page is not managed step 1) is skipped
* (except for updating the TLBs), and the mapping is
* overwritten at step 3). If the new physical page is not
* managed, step 2) is skipped.
*/
/* TODO: add opportunistic refmod collect */
if (old_pa != (pmap_paddr_t) 0) {
boolean_t was_altacct = FALSE;
/*
* Don't do anything to pages outside valid memory here.
* Instead convince the code that enters a new mapping
* to overwrite the old one.
*/
/* invalidate the PTE */
pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
/* propagate invalidate everywhere */
PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
/* remember reference and change */
old_pte = *pte;
oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
/* completely invalidate the PTE */
pmap_store_pte(is_ept, pte, PTE_LOCK(is_ept));
if (IS_MANAGED_PAGE(pai)) {
/*
* Remove the mapping from the pvlist for
* this physical page.
* We'll end up with either a rooted pv or a
* hashed pv
*/
pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
}
if (IS_MANAGED_PAGE(pai)) {
pmap_assert(old_pa_locked == TRUE);
pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
if (pmap != kernel_pmap) {
/* update ledgers */
if (was_altacct) {
assert(IS_INTERNAL_PAGE(pai));
pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
} else if (IS_REUSABLE_PAGE(pai)) {
assert(!was_altacct);
assert(IS_INTERNAL_PAGE(pai));
pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
/* was already not in phys_footprint */
} else if (IS_INTERNAL_PAGE(pai)) {
assert(!was_altacct);
assert(!IS_REUSABLE_PAGE(pai));
pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
} else {
/* not an internal page */
pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
}
}
if (iswired(*pte)) {
pmap_ledger_debit(pmap, task_ledgers.wired_mem,
PAGE_SIZE);
}
if (!is_ept) {
pmap_phys_attributes[pai] |= oattr;
} else {
pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
}
} else {
/*
* old_pa is not managed.
* Do removal part of accounting.
*/
if (pmap != kernel_pmap) {
#if 00
assert(pmap->stats.device > 0);
OSAddAtomic(-1, &pmap->stats.device);
#endif
}
if (iswired(*pte)) {
pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
}
}
}
/*
* if we had a previously managed paged locked, unlock it now
*/
if (old_pa_locked) {
UNLOCK_PVH(pai);
old_pa_locked = FALSE;
}
pai = pa_index(pa); /* now working with new incoming phys page */
if (IS_MANAGED_PAGE(pai)) {
/*
* Step 2) Enter the mapping in the PV list for this
* physical page.
*/
pv_h = pai_to_pvh(pai);
LOCK_PVH(pai);
if (pv_h->pmap == PMAP_NULL) {
/*
* No mappings yet, use rooted pv
*/
pv_h->va_and_flags = vaddr;
pv_h->pmap = pmap;
queue_init(&pv_h->qlink);
if (options & PMAP_OPTIONS_INTERNAL) {
pmap_phys_attributes[pai] |= PHYS_INTERNAL;
} else {
pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
}
if (options & PMAP_OPTIONS_REUSABLE) {
pmap_phys_attributes[pai] |= PHYS_REUSABLE;
} else {
pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
}
if ((options & PMAP_OPTIONS_ALT_ACCT) &&
IS_INTERNAL_PAGE(pai)) {
pv_h->va_and_flags |= PVE_IS_ALTACCT;
is_altacct = TRUE;
} else {
pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
is_altacct = FALSE;
}
} else {
/*
* Add new pv_hashed_entry after header.
*/
if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
pvh_e = pvh_new;
pvh_new = PV_HASHED_ENTRY_NULL;
} else if (PV_HASHED_ENTRY_NULL == pvh_e) {
PV_HASHED_ALLOC(&pvh_e);
if (PV_HASHED_ENTRY_NULL == pvh_e) {
/*
* the pv list is empty. if we are on
* the kernel pmap we'll use one of
* the special private kernel pv_e's,
* else, we need to unlock
* everything, zalloc a pv_e, and
* restart bringing in the pv_e with
* us.
*/
if (kernel_pmap == pmap) {
PV_HASHED_KERN_ALLOC(&pvh_e);
} else {
UNLOCK_PVH(pai);
PTE_LOCK_UNLOCK(pte);
PMAP_UNLOCK_SHARED(pmap);
pmap_pv_throttle(pmap);
pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
goto Retry;
}
}
}
if (PV_HASHED_ENTRY_NULL == pvh_e) {
panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
}
pvh_e->va_and_flags = vaddr;
pvh_e->pmap = pmap;
pvh_e->ppn = pn;
if ((options & PMAP_OPTIONS_ALT_ACCT) &&
IS_INTERNAL_PAGE(pai)) {
pvh_e->va_and_flags |= PVE_IS_ALTACCT;
is_altacct = TRUE;
} else {
pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
is_altacct = FALSE;
}
pv_hash_add(pvh_e, pv_h);
/*
* Remember that we used the pvlist entry.
*/
pvh_e = PV_HASHED_ENTRY_NULL;
}
/*
* only count the mapping
* for 'managed memory'
*/
pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
if (pmap != kernel_pmap) {
/* update ledgers */
if (is_altacct) {
/* internal but also alternate accounting */
assert(IS_INTERNAL_PAGE(pai));
pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
/* alternate accounting, so not in footprint */
} else if (IS_REUSABLE_PAGE(pai)) {
assert(!is_altacct);
assert(IS_INTERNAL_PAGE(pai));
pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
/* internal but reusable: not in footprint */
} else if (IS_INTERNAL_PAGE(pai)) {
assert(!is_altacct);
assert(!IS_REUSABLE_PAGE(pai));
/* internal: add to footprint */
pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
} else {
/* not internal: not in footprint */
pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
}
}
} else if (last_managed_page == 0) {
/* Account for early mappings created before "managed pages"
* are determined. Consider consulting the available DRAM map.
*/
pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
if (pmap != kernel_pmap) {
#if 00
OSAddAtomic(+1, &pmap->stats.device);
PMAP_STATS_PEAK(pmap->stats.device);
#endif
}
}
/*
* Step 3) Enter the mapping.
*
* Build a template to speed up entering -
* only the pfn changes.
*/
template = pa_to_pte(pa);
if (!is_ept) {
template |= INTEL_PTE_VALID;
} else {
template |= INTEL_EPT_IPAT;
}
/*
* DRK: It may be worth asserting on cache attribute flags that diverge
* from the existing physical page attributes.
*/
template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
/*
* We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
*/
if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
if (!(flags & VM_MEM_GUARDED)) {
template |= INTEL_PTE_PAT;
}
template |= INTEL_PTE_NCACHE;
}
if (pmap != kernel_pmap && !is_ept) {
template |= INTEL_PTE_USER;
}
if (prot & VM_PROT_READ) {
template |= PTE_READ(is_ept);
}
if (prot & VM_PROT_WRITE) {
template |= PTE_WRITE(is_ept);
if (is_ept && !pmap_ept_support_ad) {
template |= PTE_MOD(is_ept);
if (IS_MANAGED_PAGE(pai)) {
pmap_phys_attributes[pai] |= PHYS_MODIFIED;
}
}
}
if (prot & VM_PROT_EXECUTE) {
assert(set_NX == 0);
template = pte_set_ex(template, is_ept);
}
if (__improbable(is_ept && (prot & VM_PROT_UEXEC))) {
assert(set_NX == 0);
template = pte_set_uex(template);
}
if (set_NX) {
template = pte_remove_ex(template, is_ept);
}
if (wired) {
template |= INTEL_PTE_WIRED;
pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
}
if (__improbable(superpage)) {
template |= INTEL_PTE_PS;
}
/* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
if (__improbable(is_ept && !pmap_ept_support_ad)) {
template |= PTE_REF(is_ept);
if (IS_MANAGED_PAGE(pai)) {
pmap_phys_attributes[pai] |= PHYS_REFERENCED;
}
}
template |= PTE_LOCK(is_ept);
pmap_store_pte(is_ept, pte, template);
DTRACE_VM3(set_pte, uint64_t, vaddr, uint64_t, 0, uint64_t, template);
/*
* if this was a managed page we delayed unlocking the pv until here
* to prevent pmap_page_protect et al from finding it until the pte
* has been stored
*/
if (IS_MANAGED_PAGE(pai)) {
UNLOCK_PVH(pai);
}
done2:
if (need_tlbflush == TRUE) {
if (options & PMAP_OPTIONS_NOFLUSH) {
PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
} else {
PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
}
}
if (ptelocked) {
PTE_LOCK_UNLOCK(pte);
}
PMAP_UNLOCK_SHARED(pmap);
if (pvh_e != PV_HASHED_ENTRY_NULL) {
PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
}
if (pvh_new != PV_HASHED_ENTRY_NULL) {
PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
}
if (delpage_pm_obj) {
vm_page_t m;
vm_object_lock(delpage_pm_obj);
m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
if (m == VM_PAGE_NULL) {
panic("pmap_enter: pte page not in object");
}
VM_PAGE_FREE(m);
vm_object_unlock(delpage_pm_obj);
OSAddAtomic(-1, &inuse_ptepages_count);
PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
}
kr = KERN_SUCCESS;
done1:
if (__improbable((kr == KERN_SUCCESS) && (pmap == kernel_pmap) &&
zone_spans_ro_va(vaddr, vaddr + PAGE_SIZE))) {
pmap_page_protect((ppnum_t)atop_kernel(kvtophys(vaddr)), VM_PROT_READ);
}
PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
return kr;
}
/*
* Remove a range of hardware page-table entries.
* The entries given are the first (inclusive)
* and last (exclusive) entries for the VM pages.
* The virtual address is the va for the first pte.
*
* The pmap must be locked.
* If the pmap is not the kernel pmap, the range must lie
* entirely within one pte-page. This is NOT checked.
* Assumes that the pte-page exists.
*/
void
pmap_remove_range(
pmap_t pmap,
vm_map_offset_t start_vaddr,
pt_entry_t *spte,
pt_entry_t *epte)
{
pmap_remove_range_options(pmap, start_vaddr, spte, epte,
PMAP_OPTIONS_REMOVE);
}
static void
pmap_remove_range_options(
pmap_t pmap,
vm_map_offset_t start_vaddr,
pt_entry_t *spte,
pt_entry_t *epte,
int options)
{
pt_entry_t *cpte;
pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
pv_hashed_entry_t pvh_e;
int pvh_cnt = 0;
int num_removed, num_unwired, num_found, num_invalid;
int ledgers_external, ledgers_reusable, ledgers_internal, ledgers_alt_internal;
uint64_t ledgers_compressed, ledgers_alt_compressed;
ppnum_t pai;
pmap_paddr_t pa;
vm_map_offset_t vaddr;
boolean_t is_ept = is_ept_pmap(pmap);
boolean_t was_altacct;
num_removed = 0;
num_unwired = 0;
num_found = 0;
num_invalid = 0;
ledgers_external = 0;
ledgers_reusable = 0;
ledgers_internal = 0;
ledgers_compressed = 0;
ledgers_alt_internal = 0;
ledgers_alt_compressed = 0;
/* invalidate the PTEs first to "freeze" them */
for (cpte = spte, vaddr = start_vaddr;
cpte < epte;
cpte++, vaddr += PAGE_SIZE_64) {
pt_entry_t p = *cpte;
pa = pte_to_pa(p);
if (pa == 0) {
if ((options & PMAP_OPTIONS_REMOVE) &&
(PTE_IS_COMPRESSED(p, cpte, pmap, vaddr))) {
assert(pmap != kernel_pmap);
/* one less "compressed"... */
ledgers_compressed++;
if (p & PTE_COMPRESSED_ALT) {
/* ... but it used to be "ALTACCT" */
ledgers_alt_compressed++;
}
/* clear marker(s) */
/* XXX probably does not need to be atomic! */
pmap_update_pte(is_ept, cpte, INTEL_PTE_COMPRESSED_MASK, 0, true);
}
continue;
}
num_found++;
if (iswired(p)) {
num_unwired++;
}
pai = pa_index(pa);
if (!IS_MANAGED_PAGE(pai)) {
/*
* Outside range of managed physical memory.
* Just remove the mappings.
*/
pmap_store_pte(is_ept, cpte, 0);
continue;
}
if ((p & PTE_VALID_MASK(is_ept)) == 0) {
num_invalid++;
}
/* invalidate the PTE */
pmap_update_pte(is_ept, cpte, PTE_VALID_MASK(is_ept), 0, true);
}
if (num_found == 0) {
/* nothing was changed: we're done */
goto update_counts;
}
/* propagate the invalidates to other CPUs */
PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
for (cpte = spte, vaddr = start_vaddr;
cpte < epte;
cpte++, vaddr += PAGE_SIZE_64) {
pa = pte_to_pa(*cpte);
if (pa == 0) {
check_pte_for_compressed_marker:
/*
* This PTE could have been replaced with a
* "compressed" marker after our first "freeze"
* loop above, so check again.
*/
if ((options & PMAP_OPTIONS_REMOVE) &&
(PTE_IS_COMPRESSED(*cpte, cpte, pmap, vaddr))) {
assert(pmap != kernel_pmap);
/* one less "compressed"... */
ledgers_compressed++;
if (*cpte & PTE_COMPRESSED_ALT) {
/* ... but it used to be "ALTACCT" */
ledgers_alt_compressed++;
}
pmap_store_pte(is_ept, cpte, 0);
}
continue;
}
pai = pa_index(pa);
LOCK_PVH(pai);
pa = pte_to_pa(*cpte);
if (pa == 0) {
UNLOCK_PVH(pai);
goto check_pte_for_compressed_marker;
}
/*
* Remove the mapping from the pvlist for this physical page.
*/
pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
num_removed++;
/* update ledgers */
if (was_altacct) {
/* internal and alternate accounting */
assert(IS_INTERNAL_PAGE(pai));
ledgers_internal++;
ledgers_alt_internal++;
} else if (IS_REUSABLE_PAGE(pai)) {
/* internal but reusable */
assert(!was_altacct);
assert(IS_INTERNAL_PAGE(pai));
ledgers_reusable++;
} else if (IS_INTERNAL_PAGE(pai)) {
/* internal */
assert(!was_altacct);
assert(!IS_REUSABLE_PAGE(pai));
ledgers_internal++;
} else {
/* not internal */
ledgers_external++;
}
/*
* Get the modify and reference bits, then
* nuke the entry in the page table
*/
/* remember reference and change */
if (!is_ept) {
pmap_phys_attributes[pai] |=
*cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
} else {
pmap_phys_attributes[pai] |=
ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
}
/* completely invalidate the PTE */
pmap_store_pte(is_ept, cpte, 0);
UNLOCK_PVH(pai);
if (pvh_e != PV_HASHED_ENTRY_NULL) {
pvh_e->qlink.next = (queue_entry_t) pvh_eh;
pvh_eh = pvh_e;
if (pvh_et == PV_HASHED_ENTRY_NULL) {
pvh_et = pvh_e;
}
pvh_cnt++;
}
/* We can encounter at most 'num_found' PTEs for this level
* Fewer may be encountered if some were replaced by
* compressed markers. No new valid PTEs can be created
* since the pmap lock is held exclusively.
*/
if (num_removed == num_found) {
break;
}
} /* for loop */
if (pvh_eh != PV_HASHED_ENTRY_NULL) {
PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
}
update_counts:
/*
* Update the counts
*/
#if TESTING
if (pmap->stats.resident_count < num_removed) {
panic("pmap_remove_range: resident_count");
}
#endif
if (num_removed) {
pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
}
if (pmap != kernel_pmap) {
if (ledgers_external) {
pmap_ledger_debit(pmap,
task_ledgers.external,
machine_ptob(ledgers_external));
}
if (ledgers_reusable) {
pmap_ledger_debit(pmap,
task_ledgers.reusable,
machine_ptob(ledgers_reusable));
}
if (ledgers_internal) {
pmap_ledger_debit(pmap,
task_ledgers.internal,
machine_ptob(ledgers_internal));
}
if (ledgers_compressed) {
pmap_ledger_debit(pmap,
task_ledgers.internal_compressed,
machine_ptob(ledgers_compressed));
}
if (ledgers_alt_internal) {
pmap_ledger_debit(pmap,
task_ledgers.alternate_accounting,
machine_ptob(ledgers_alt_internal));
}
if (ledgers_alt_compressed) {
pmap_ledger_debit(pmap,
task_ledgers.alternate_accounting_compressed,
machine_ptob(ledgers_alt_compressed));
}
uint64_t net_debit = (ledgers_internal - ledgers_alt_internal) + (ledgers_compressed - ledgers_alt_compressed);
if (net_debit) {
pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(net_debit));
}
}
if (num_unwired != 0) {
pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
}
return;
}
/*
* Remove the given range of addresses
* from the specified map.
*
* It is assumed that the start and end are properly
* rounded to the hardware page size.
*/
void
pmap_remove(
pmap_t map,
addr64_t s64,
addr64_t e64)
{
pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
}
#define PLCHECK_THRESHOLD (2)
void
pmap_remove_options(
pmap_t map,
addr64_t s64,
addr64_t e64,
int options)
{
pt_entry_t *pde;
pt_entry_t *spte, *epte;
addr64_t l64;
uint64_t deadline = 0;
boolean_t is_ept;
pmap_intr_assert();
if (map == PMAP_NULL || s64 == e64) {
return;
}
is_ept = is_ept_pmap(map);
PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
VM_KERNEL_ADDRHIDE(e64));
PMAP_LOCK_EXCLUSIVE(map);
uint32_t traverse_count = 0;
while (s64 < e64) {
pml4_entry_t *pml4e = pmap64_pml4(map, s64);
if ((pml4e == NULL) ||
((*pml4e & PTE_VALID_MASK(is_ept)) == 0)) {
if (os_add_overflow(s64, NBPML4, &s64)) {
/* wrap; clip s64 to e64 */
s64 = e64;
break;
}
s64 &= ~(PML4MASK);
continue;
}
pdpt_entry_t *pdpte = pmap64_pdpt(map, s64);
if ((pdpte == NULL) ||
((*pdpte & PTE_VALID_MASK(is_ept)) == 0)) {
if (os_add_overflow(s64, NBPDPT, &s64)) {
/* wrap; clip s64 to e64 */
s64 = e64;
break;
}
s64 &= ~(PDPTMASK);
continue;
}
if (os_add_overflow(s64, PDE_MAPPED_SIZE, &l64)) {
l64 = e64;
} else {
l64 &= ~(PDE_MAPPED_SIZE - 1);
if (l64 > e64) {
l64 = e64;
}
}
pde = pmap_pde(map, s64);
if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
if (*pde & PTE_PS) {
/*
* If we're removing a superpage, pmap_remove_range()
* must work on level 2 instead of level 1; and we're
* only passing a single level 2 entry instead of a
* level 1 range.
*/
spte = pde;
epte = spte + 1; /* excluded */
} else {
spte = pmap_pte(map, (s64 & ~(PDE_MAPPED_SIZE - 1)));
spte = &spte[ptenum(s64)];
epte = &spte[intel_btop(l64 - s64)];
}
pmap_remove_range_options(map, s64, spte, epte,
options);
}
s64 = l64;
if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
if (deadline == 0) {
deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
} else {
if (rdtsc64_nofence() > deadline) {
PMAP_UNLOCK_EXCLUSIVE(map);
__builtin_ia32_pause();
PMAP_LOCK_EXCLUSIVE(map);
deadline = rdtsc64_nofence() + max_preemption_latency_tsc;
}
}
}
}
PMAP_UNLOCK_EXCLUSIVE(map);
PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
}
void
pmap_page_protect(
ppnum_t pn,
vm_prot_t prot)
{
pmap_page_protect_options(pn, prot, 0, NULL);
}
/*
* Routine: pmap_page_protect_options
*
* Function:
* Lower the permission for all mappings to a given
* page.
*/
void
pmap_page_protect_options(
ppnum_t pn,
vm_prot_t prot,
unsigned int options,
void *arg)
{
pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
pv_hashed_entry_t nexth;
int pvh_cnt = 0;
pv_rooted_entry_t pv_h;
pv_rooted_entry_t pv_e;
pv_hashed_entry_t pvh_e;
pt_entry_t *pte;
int pai;
pmap_t pmap;
boolean_t remove;
pt_entry_t new_pte_value;
boolean_t is_ept;
pmap_intr_assert();
assert(pn != vm_page_fictitious_addr);
if (pn == vm_page_guard_addr) {
return;
}
pai = ppn_to_pai(pn);
if (!IS_MANAGED_PAGE(pai)) {
/*
* Not a managed page.
*/
return;
}
PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
/*
* Determine the new protection.
*/
switch (prot) {
case VM_PROT_READ:
case VM_PROT_READ | VM_PROT_EXECUTE:
remove = FALSE;
break;
case VM_PROT_ALL:
return; /* nothing to do */
default:
remove = TRUE;
break;
}
pv_h = pai_to_pvh(pai);
LOCK_PVH(pai);
/*
* Walk down PV list, if any, changing or removing all mappings.
*/
if (pv_h->pmap == PMAP_NULL) {
goto done;
}
pv_e = pv_h;
pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
do {
vm_map_offset_t vaddr;
if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
(pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
/* page was modified, so it will be compressed */
options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
options |= PMAP_OPTIONS_COMPRESSOR;
}
pmap = pv_e->pmap;
is_ept = is_ept_pmap(pmap);
vaddr = PVE_VA(pv_e);
pte = pmap_pte(pmap, vaddr);
pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
"pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
if (0 == pte) {
panic("pmap_page_protect() "
"pmap=%p pn=0x%x vaddr=0x%llx\n",
pmap, pn, vaddr);
}
nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
/*
* Remove the mapping if new protection is NONE
*/
if (remove) {
/* Remove per-pmap wired count */
if (iswired(*pte)) {
pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
}
if (pmap != kernel_pmap &&
(options & PMAP_OPTIONS_COMPRESSOR) &&
IS_INTERNAL_PAGE(pai)) {
assert(!PTE_IS_COMPRESSED(*pte, pte, pmap, vaddr));
/* mark this PTE as having been "compressed" */
new_pte_value = PTE_COMPRESSED;
if (IS_ALTACCT_PAGE(pai, pv_e)) {
new_pte_value |= PTE_COMPRESSED_ALT;
}
} else {
new_pte_value = 0;
}
if (options & PMAP_OPTIONS_NOREFMOD) {
pmap_store_pte(is_ept, pte, new_pte_value);
if (options & PMAP_OPTIONS_NOFLUSH) {
PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
} else {
PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
}
} else {
/*
* Remove the mapping, collecting dirty bits.
*/
pmap_update_pte(is_ept, pte, PTE_VALID_MASK(is_ept), 0, true);
PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
if (!is_ept) {
pmap_phys_attributes[pai] |=
*pte & (PHYS_MODIFIED | PHYS_REFERENCED);
} else {
pmap_phys_attributes[pai] |=
ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
}
if ((options &
PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
IS_INTERNAL_PAGE(pai) &&
(pmap_phys_attributes[pai] &
PHYS_MODIFIED)) {
/*
* Page is actually "modified" and
* will be compressed. Start
* accounting for it as "compressed".
*/
assert(!(options & PMAP_OPTIONS_COMPRESSOR));
options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
options |= PMAP_OPTIONS_COMPRESSOR;
assert(new_pte_value == 0);
if (pmap != kernel_pmap) {
new_pte_value = PTE_COMPRESSED;
if (IS_ALTACCT_PAGE(pai, pv_e)) {
new_pte_value |= PTE_COMPRESSED_ALT;
}
}
}
pmap_store_pte(is_ept, pte, new_pte_value);
}
#if TESTING
if (pmap->stats.resident_count < 1) {
panic("pmap_page_protect: resident_count");
}
#endif
pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
/*
* We only ever compress internal pages.
*/
if (options & PMAP_OPTIONS_COMPRESSOR) {
assert(IS_INTERNAL_PAGE(pai));
}
if (pmap != kernel_pmap) {
/* update ledgers */
if (IS_ALTACCT_PAGE(pai, pv_e)) {
assert(IS_INTERNAL_PAGE(pai));
pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
if (options & PMAP_OPTIONS_COMPRESSOR) {
pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
}
} else if (IS_REUSABLE_PAGE(pai)) {
assert(!IS_ALTACCT_PAGE(pai, pv_e));
assert(IS_INTERNAL_PAGE(pai));
if (options & PMAP_OPTIONS_COMPRESSOR) {
pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
/* was not in footprint, but is now */
pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
}
pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
} else if (IS_INTERNAL_PAGE(pai)) {
assert(!IS_ALTACCT_PAGE(pai, pv_e));
assert(!IS_REUSABLE_PAGE(pai));
pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
/*
* Update all stats related to physical
* footprint, which only deals with
* internal pages.
*/
if (options & PMAP_OPTIONS_COMPRESSOR) {
/*
* This removal is only being
* done so we can send this page
* to the compressor; therefore
* it mustn't affect total task
* footprint.
*/
pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
} else {
/*
* This internal page isn't
* going to the compressor,
* so adjust stats to keep
* phys_footprint up to date.
*/
pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
}
} else {
pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
}
}
/*
* Deal with the pv_rooted_entry.
*/
if (pv_e == pv_h) {
/*
* Fix up head later.
*/
pv_h->pmap = PMAP_NULL;
} else {
/*
* Delete this entry.
*/
pv_hash_remove(pvh_e);
pvh_e->qlink.next = (queue_entry_t) pvh_eh;
pvh_eh = pvh_e;
if (pvh_et == PV_HASHED_ENTRY_NULL) {
pvh_et = pvh_e;
}
pvh_cnt++;
}
} else {
/*
* Write-protect, after opportunistic refmod collect
*/
if (!is_ept) {
pmap_phys_attributes[pai] |=
*pte & (PHYS_MODIFIED | PHYS_REFERENCED);
} else {
pmap_phys_attributes[pai] |=
ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
}
pmap_update_pte(is_ept, pte, PTE_WRITE(is_ept), 0, true);
if (options & PMAP_OPTIONS_NOFLUSH) {
PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
} else {
PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
}
}
pvh_e = nexth;
} while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
/*
* If pv_head mapping was removed, fix it up.
*/
if (pv_h->pmap == PMAP_NULL) {
pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
if (pvh_e != (pv_hashed_entry_t) pv_h) {
pv_hash_remove(pvh_e);
pv_h->pmap = pvh_e->pmap;
pv_h->va_and_flags = pvh_e->va_and_flags;
pvh_e->qlink.next = (queue_entry_t) pvh_eh;
pvh_eh = pvh_e;
if (pvh_et == PV_HASHED_ENTRY_NULL) {
pvh_et = pvh_e;
}
pvh_cnt++;
}
}
if (pvh_eh != PV_HASHED_ENTRY_NULL) {
PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
}
done:
UNLOCK_PVH(pai);
PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
}
/*
* Clear specified attribute bits.
*/
void
phys_attribute_clear(
ppnum_t pn,
int bits,
unsigned int options,
void *arg)
{
pv_rooted_entry_t pv_h;
pv_hashed_entry_t pv_e;
pt_entry_t *pte = NULL;
int pai;
pmap_t pmap;
char attributes = 0;
boolean_t is_internal, is_reusable, is_altacct, is_ept;
int ept_bits_to_clear;
boolean_t ept_keep_global_mod = FALSE;
if ((bits & PHYS_MODIFIED) &&
(options & PMAP_OPTIONS_NOFLUSH) &&
arg == NULL) {
panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
"should not clear 'modified' without flushing TLBs\n",
pn, bits, options, arg);
}
/* We only support converting MOD and REF bits for EPT PTEs in this function */
assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
pmap_intr_assert();
assert(pn != vm_page_fictitious_addr);
if (pn == vm_page_guard_addr) {
return;
}
pai = ppn_to_pai(pn);
if (!IS_MANAGED_PAGE(pai)) {
/*
* Not a managed page.
*/
return;
}
PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
pv_h = pai_to_pvh(pai);
LOCK_PVH(pai);
/*
* Walk down PV list, clearing all modify or reference bits.
* We do not have to lock the pv_list because we have
* the per-pmap lock
*/
if (pv_h->pmap != PMAP_NULL) {
/*
* There are some mappings.
*/
is_internal = IS_INTERNAL_PAGE(pai);
is_reusable = IS_REUSABLE_PAGE(pai);
pv_e = (pv_hashed_entry_t)pv_h;
do {
vm_map_offset_t va;
char pte_bits;
pmap = pv_e->pmap;
is_ept = is_ept_pmap(pmap);
is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
va = PVE_VA(pv_e);
pte_bits = 0;
if (bits) {
pte = pmap_pte(pmap, va);
/* grab ref/mod bits from this PTE */
pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
/* propagate to page's global attributes */
if (!is_ept) {
attributes |= pte_bits;
} else {
attributes |= ept_refmod_to_physmap(pte_bits);
if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
ept_keep_global_mod = TRUE;
}
}
/* which bits to clear for this PTE? */
if (!is_ept) {
pte_bits &= bits;
} else {
pte_bits &= ept_bits_to_clear;
}
}
if (options & PMAP_OPTIONS_CLEAR_WRITE) {
pte_bits |= PTE_WRITE(is_ept);
}
/*
* Clear modify and/or reference bits.
*/
if (pte_bits) {
pmap_update_pte(is_ept, pte, pte_bits, 0, true);
/* Ensure all processors using this translation
* invalidate this TLB entry. The invalidation
* *must* follow the PTE update, to ensure that
* the TLB shadow of the 'D' bit (in particular)
* is synchronized with the updated PTE.
*/
if (!(options & PMAP_OPTIONS_NOFLUSH)) {
/* flush TLBS now */
PMAP_UPDATE_TLBS(pmap,
va,
va + PAGE_SIZE);
} else if (arg) {
/* delayed TLB flush: add "pmap" info */
PMAP_UPDATE_TLBS_DELAYED(
pmap,
va,
va + PAGE_SIZE,
(pmap_flush_context *)arg);
} else {
/* no TLB flushing at all */
}
}
/* update pmap "reusable" stats */
if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
is_reusable &&
pmap != kernel_pmap) {
/* one less "reusable" */
pmap_ledger_debit(pmap, task_ledgers.reusable, PAGE_SIZE);
if (is_internal) {
/* one more "internal" */
if (is_altacct) {
/* no impact on ledgers */
} else {
pmap_ledger_credit(pmap,
task_ledgers.internal,
PAGE_SIZE);
pmap_ledger_credit(
pmap,
task_ledgers.phys_footprint,
PAGE_SIZE);
}
} else {
/* one more "external" */
pmap_ledger_credit(pmap, task_ledgers.external, PAGE_SIZE);
}
} else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
!is_reusable &&
pmap != kernel_pmap) {
/* one more "reusable" */
pmap_ledger_credit(pmap, task_ledgers.reusable, PAGE_SIZE);
if (is_internal) {
/* one less "internal" */
if (is_altacct) {
/* no impact on footprint */
} else {
pmap_ledger_debit(pmap,
task_ledgers.internal,
PAGE_SIZE);
pmap_ledger_debit(
pmap,
task_ledgers.phys_footprint,
PAGE_SIZE);
}
} else {
/* one less "external" */
pmap_ledger_debit(pmap, task_ledgers.external, PAGE_SIZE);
}
}
pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
} while (pv_e != (pv_hashed_entry_t)pv_h);
}
/* Opportunistic refmod collection, annulled
* if both REF and MOD are being cleared.
*/
pmap_phys_attributes[pai] |= attributes;
if (ept_keep_global_mod) {
/*
* If the hardware doesn't support AD bits for EPT PTEs and someone is
* requesting that we clear the modified bit for a phys page, we need
* to ensure that there are no EPT mappings for the page with the
* modified bit set. If there are, we cannot clear the global modified bit.
*/
bits &= ~PHYS_MODIFIED;
}
pmap_phys_attributes[pai] &= ~(bits);
/* update this page's "reusable" status */
if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
} else if (options & PMAP_OPTIONS_SET_REUSABLE) {
pmap_phys_attributes[pai] |= PHYS_REUSABLE;
}
UNLOCK_PVH(pai);
PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
}
/*
* Check specified attribute bits.
*/
int
phys_attribute_test(
ppnum_t pn,
int bits)
{
pv_rooted_entry_t pv_h;
pv_hashed_entry_t pv_e;
pt_entry_t *pte;
int pai;
pmap_t pmap;
int attributes = 0;
boolean_t is_ept;
pmap_intr_assert();
assert(pn != vm_page_fictitious_addr);
assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
if (pn == vm_page_guard_addr) {
return 0;
}
pai = ppn_to_pai(pn);
if (!IS_MANAGED_PAGE(pai)) {
/*
* Not a managed page.
*/
return 0;
}
/*
* Fast check... if bits already collected
* no need to take any locks...
* if not set, we need to recheck after taking
* the lock in case they got pulled in while
* we were waiting for the lock
*/
if ((pmap_phys_attributes[pai] & bits) == bits) {
return bits;
}
pv_h = pai_to_pvh(pai);
LOCK_PVH(pai);
attributes = pmap_phys_attributes[pai] & bits;
/*
* Walk down PV list, checking the mappings until we
* reach the end or we've found the desired attributes.
*/
if (attributes != bits &&
pv_h->pmap != PMAP_NULL) {
/*
* There are some mappings.
*/
pv_e = (pv_hashed_entry_t)pv_h;
do {
vm_map_offset_t va;
pmap = pv_e->pmap;
is_ept = is_ept_pmap(pmap);
va = PVE_VA(pv_e);
/*
* pick up modify and/or reference bits from mapping
*/
pte = pmap_pte(pmap, va);
if (!is_ept) {
attributes |= (int)(*pte & bits);
} else {
attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
}
pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
} while ((attributes != bits) &&
(pv_e != (pv_hashed_entry_t)pv_h));
}
pmap_phys_attributes[pai] |= attributes;
UNLOCK_PVH(pai);
return attributes;
}
/*
* Routine: pmap_change_wiring
* Function: Change the wiring attribute for a map/virtual-address
* pair.
* In/out conditions:
* The mapping must already exist in the pmap.
*/
void
pmap_change_wiring(
pmap_t map,
vm_map_offset_t vaddr,
boolean_t wired)
{
pt_entry_t *pte;
PMAP_LOCK_SHARED(map);
if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) {
panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
map, vaddr, wired);
}
if (wired && !iswired(*pte)) {
/*
* wiring down mapping
*/
pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
pmap_update_pte(is_ept_pmap(map), pte, 0, PTE_WIRED, false);
} else if (!wired && iswired(*pte)) {
/*
* unwiring mapping
*/
pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
pmap_update_pte(is_ept_pmap(map), pte, PTE_WIRED, 0, false);
}
PMAP_UNLOCK_SHARED(map);
}
/*
* "Backdoor" direct map routine for early mappings.
* Useful for mapping memory outside the range
* Sets A, D and NC if requested
*/
vm_offset_t
pmap_map_bd(
vm_offset_t virt,
vm_map_offset_t start_addr,
vm_map_offset_t end_addr,
vm_prot_t prot,
unsigned int flags)
{
pt_entry_t template;
pt_entry_t *ptep;
vm_offset_t base = virt;
boolean_t doflush = FALSE;
template = pa_to_pte(start_addr)
| INTEL_PTE_REF
| INTEL_PTE_MOD
| INTEL_PTE_WIRED
| INTEL_PTE_VALID;
if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
template |= INTEL_PTE_NCACHE;
if (!(flags & (VM_MEM_GUARDED))) {
template |= INTEL_PTE_PAT;
}
}
if ((prot & VM_PROT_EXECUTE) == 0) {
template |= INTEL_PTE_NX;
}
if (prot & VM_PROT_WRITE) {
template |= INTEL_PTE_WRITE;
}
vm_map_offset_t caddr = start_addr;
while (caddr < end_addr) {
ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
if (ptep == PT_ENTRY_NULL) {
panic("pmap_map_bd: Invalid kernel address");
}
if (pte_to_pa(*ptep)) {
doflush = TRUE;
}
pmap_store_pte(FALSE, ptep, template);
pte_increment_pa(template);
virt += PAGE_SIZE;
caddr += PAGE_SIZE;
}
if (doflush) {
pmap_tlbi_range(0, ~0ULL, true, 0);
PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
}
return virt;
}
/* Create a virtual alias beginning at 'ava' of the specified kernel virtual
* range. The aliased pagetable range is expanded if
* PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
* assumes caller has stabilized the source and destination ranges. Currently
* used to populate sections of the trampoline "doublemap" at CPU startup.
*/
void
pmap_alias(
vm_offset_t ava,
vm_map_offset_t start_addr,
vm_map_offset_t end_addr,
vm_prot_t prot,
unsigned int eoptions)
{
pt_entry_t prot_template, template;
pt_entry_t *aptep, *sptep;
prot_template = INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
if ((prot & VM_PROT_EXECUTE) == 0) {
prot_template |= INTEL_PTE_NX;
}
if (prot & VM_PROT_WRITE) {
prot_template |= INTEL_PTE_WRITE;
}
assert(((start_addr | end_addr) & PAGE_MASK) == 0);
while (start_addr < end_addr) {
aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
if (aptep == PT_ENTRY_NULL) {
if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
} else {
panic("pmap_alias: Invalid alias address");
}
}
/* The aliased range should not have any active mappings */
assert(pte_to_pa(*aptep) == 0);
sptep = pmap_pte(kernel_pmap, start_addr);
assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
pmap_store_pte(FALSE, aptep, template);
ava += PAGE_SIZE;
start_addr += PAGE_SIZE;
}
}
mach_vm_size_t
pmap_query_resident(
pmap_t pmap,
addr64_t s64,
addr64_t e64,
mach_vm_size_t *compressed_bytes_p)
{
pt_entry_t *pde;
pt_entry_t *spte, *epte;
addr64_t l64;
uint64_t deadline = 0;
mach_vm_size_t resident_bytes;
mach_vm_size_t compressed_bytes;
boolean_t is_ept;
pmap_intr_assert();
if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
if (compressed_bytes_p) {
*compressed_bytes_p = 0;
}
return 0;
}
is_ept = is_ept_pmap(pmap);
PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
VM_KERNEL_ADDRHIDE(e64));
resident_bytes = 0;
compressed_bytes = 0;
PMAP_LOCK_EXCLUSIVE(pmap);
uint32_t traverse_count = 0;
while (s64 < e64) {
if (os_add_overflow(s64, PDE_MAPPED_SIZE, &l64)) {
l64 = e64;
} else {
l64 &= ~(PDE_MAPPED_SIZE - 1);
if (l64 > e64) {
l64 = e64;
}
}
pde = pmap_pde(pmap, s64);
if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
if (*pde & PTE_PS) {
/* superpage: not supported */
} else {
spte = pmap_pte(pmap,
(s64 & ~(PDE_MAPPED_SIZE - 1)));
spte = &spte[ptenum(s64)];
epte = &spte[intel_btop(l64 - s64)];
for (; spte < epte; spte++) {
if (pte_to_pa(*spte) != 0) {
resident_bytes += PAGE_SIZE;
} else if (*spte & PTE_COMPRESSED) {
compressed_bytes += PAGE_SIZE;
}
}
}
}
s64 = l64;
if ((s64 < e64) && (traverse_count++ > PLCHECK_THRESHOLD)) {
if (deadline == 0) {
deadline = rdtsc64() + max_preemption_latency_tsc;
} else {
if (rdtsc64() > deadline) {
PMAP_UNLOCK_EXCLUSIVE(pmap);
__builtin_ia32_pause();
PMAP_LOCK_EXCLUSIVE(pmap);
deadline = rdtsc64() + max_preemption_latency_tsc;
}
}
}
}
PMAP_UNLOCK_EXCLUSIVE(pmap);
PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
resident_bytes);
if (compressed_bytes_p) {
*compressed_bytes_p = compressed_bytes;
}
return resident_bytes;
}
uint64_t pmap_query_page_info_retries;
kern_return_t
pmap_query_page_info(
pmap_t pmap,
vm_map_offset_t va,
int *disp_p)
{
int disp;
boolean_t is_ept;
pmap_paddr_t pa;
ppnum_t pai;
pd_entry_t *pde_p;
pt_entry_t *pte_p, pte;
pmap_intr_assert();
if (pmap == PMAP_NULL || pmap == kernel_pmap) {
*disp_p = 0;
return KERN_INVALID_ARGUMENT;
}
disp = 0;
is_ept = is_ept_pmap(pmap);
PMAP_LOCK_EXCLUSIVE(pmap);
pde_p = pmap_pde(pmap, va);
if (!pde_p ||
!(*pde_p & PTE_VALID_MASK(is_ept)) ||
(*pde_p & PTE_PS)) {
goto done;
}
try_again:
disp = 0;
pte_p = pmap_pte(pmap, va);
if (pte_p == PT_ENTRY_NULL) {
goto done;
}
pte = *pte_p;
pa = pte_to_pa(pte);
if (pa == 0) {
if (PTE_IS_COMPRESSED(pte, pte_p, pmap, va)) {
disp |= PMAP_QUERY_PAGE_COMPRESSED;
if (pte & PTE_COMPRESSED_ALT) {
disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
}
}
} else {
disp |= PMAP_QUERY_PAGE_PRESENT;
pai = pa_index(pa);
if (!IS_MANAGED_PAGE(pai)) {
} else if (pmap_pv_is_altacct(pmap, va, pai)) {
assert(IS_INTERNAL_PAGE(pai));
disp |= PMAP_QUERY_PAGE_INTERNAL;
disp |= PMAP_QUERY_PAGE_ALTACCT;
} else if (IS_REUSABLE_PAGE(pai)) {
disp |= PMAP_QUERY_PAGE_REUSABLE;
} else if (IS_INTERNAL_PAGE(pai)) {
disp |= PMAP_QUERY_PAGE_INTERNAL;
}
}
if (__improbable(pte_p != pmap_pte(pmap, va) || pte != *pte_p)) {
/* something changed: try again */
pmap_query_page_info_retries++;
goto try_again;
}
done:
PMAP_UNLOCK_EXCLUSIVE(pmap);
*disp_p = disp;
return KERN_SUCCESS;
}
void
pmap_set_vm_map_cs_enforced(
pmap_t pmap,
bool new_value)
{
PMAP_LOCK_EXCLUSIVE(pmap);
pmap->pm_vm_map_cs_enforced = new_value;
PMAP_UNLOCK_EXCLUSIVE(pmap);
}
extern int cs_process_enforcement_enable;
bool
pmap_get_vm_map_cs_enforced(
pmap_t pmap)
{
if (cs_process_enforcement_enable) {
return true;
}
return pmap->pm_vm_map_cs_enforced;
}
void
pmap_set_jit_entitled(__unused pmap_t pmap)
{
/* The x86 pmap layer does not care if a map has a JIT entry. */
return;
}
bool
pmap_get_jit_entitled(__unused pmap_t pmap)
{
/* The x86 pmap layer does not care if a map is using JIT. */
return false;
}
void
pmap_set_tpro(__unused pmap_t pmap)
{
/* The x86 pmap layer does not care if a map is using TPRO */
return;
}
bool
pmap_get_tpro(__unused pmap_t pmap)
{
/* The x86 pmap layer does not care if a map is using TPRO */
return false;
}
bool
pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
{
/*
* The x86 pmap layer does not apply any policy to any protection
* types.
*/
return false;
}
uint64_t
pmap_release_pages_fast(void)
{
return 0;
}
void
pmap_trim(__unused pmap_t grand, __unused pmap_t subord, __unused addr64_t vstart, __unused uint64_t size)
{
return;
}
__dead2
void
pmap_ledger_verify_size(size_t size)
{
panic("%s: unsupported, "
"size=%lu",
__func__, size);
}
__dead2
ledger_t
pmap_ledger_alloc(void)
{
panic("%s: unsupported",
__func__);
}
__dead2
void
pmap_ledger_free(ledger_t ledger)
{
panic("%s: unsupported, "
"ledger=%p",
__func__, ledger);
}
kern_return_t
pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
unsigned int level_mask __unused, size_t *bytes_copied __unused)
{
return KERN_NOT_SUPPORTED;
}
void *
pmap_map_compressor_page(ppnum_t pn)
{
assertf(IS_MANAGED_PAGE(ppn_to_pai(pn)), "%s called on non-managed page 0x%08x", __func__, pn);
return PHYSMAP_PTOV((uint64_t)pn << (uint64_t)PAGE_SHIFT);
}
void
pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
{
}
bool
pmap_clear_refmod_range_options(
pmap_t pmap __unused,
vm_map_address_t start __unused,
vm_map_address_t end __unused,
unsigned int mask __unused,
unsigned int options __unused)
{
/*
* x86 doesn't have ranged tlbi instructions, and we already have
* the pmap_flush_context. This operation isn't implemented.
*/
return false;
}
bool
pmap_supported_feature(pmap_t pmap, pmap_feature_flags_t feat)
{
switch (feat) {
case PMAP_FEAT_UEXEC:
return pmap != NULL && is_ept_pmap(pmap);
default:
return false;
}
}