This is xnu-11215.1.10. See this file in:
/*
* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
/*
*/
/*
* File: vm/vm_kern.c
* Author: Avadis Tevanian, Jr., Michael Wayne Young
* Date: 1985
*
* Kernel memory management.
*/
#include <mach/kern_return.h>
#include <mach/vm_param.h>
#include <kern/assert.h>
#include <kern/thread.h>
#include <vm/vm_kern_internal.h>
#include <vm/vm_map_internal.h>
#include <vm/vm_object_internal.h>
#include <vm/vm_page_internal.h>
#include <vm/vm_compressor_xnu.h>
#include <vm/vm_pageout_xnu.h>
#include <vm/vm_init_xnu.h>
#include <vm/vm_fault.h>
#include <vm/vm_memtag.h>
#include <kern/misc_protos.h>
#include <vm/cpm_internal.h>
#include <kern/ledger.h>
#include <kern/bits.h>
#include <kern/startup.h>
#include <string.h>
#include <libkern/OSDebug.h>
#include <libkern/crypto/sha2.h>
#include <libkern/section_keywords.h>
#include <sys/kdebug.h>
#include <sys/kdebug_triage.h>
#include <san/kasan.h>
#include <kern/kext_alloc.h>
#include <kern/backtrace.h>
#include <os/hash.h>
#include <kern/zalloc_internal.h>
#include <libkern/crypto/rand.h>
/*
* Variables exported by this module.
*/
SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];
static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
KMEM_RANGE_ID_NUM_PTR);
#define KMEM_GOBJ_THRESHOLD (32ULL << 20)
#if DEBUG || DEVELOPMENT
#define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
#define KMEM_OUTLIER_SIZE 0
#define KMEM_OUTLIER_ALIGN 1
btlog_t kmem_outlier_log;
#endif /* DEBUG || DEVELOPMENT */
__startup_data static vm_map_size_t data_range_size;
__startup_data static vm_map_size_t ptr_range_size;
__startup_data static vm_map_size_t sprayqtn_range_size;
#pragma mark helpers
__attribute__((overloadable))
__header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)
{
return (kmem_flags_t)flags;
}
__attribute__((overloadable))
__header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)
{
return (kmem_flags_t)flags;
}
__attribute__((overloadable))
__header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)
{
return (kmem_flags_t)flags;
}
__abortlike
static void
__kmem_invalid_size_panic(
vm_map_t map,
vm_size_t size,
uint32_t flags)
{
panic("kmem(map=%p, flags=0x%x): invalid size %zd",
map, flags, (size_t)size);
}
__abortlike
static void
__kmem_invalid_arguments_panic(
const char *what,
vm_map_t map,
vm_address_t address,
vm_size_t size,
uint32_t flags)
{
panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
"invalid arguments passed",
what, map, (void *)address, (size_t)size, flags);
}
__abortlike
static void
__kmem_failed_panic(
vm_map_t map,
vm_size_t size,
uint32_t flags,
kern_return_t kr,
const char *what)
{
panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
what, map, (size_t)size, flags, kr);
}
__abortlike
static void
__kmem_entry_not_found_panic(
vm_map_t map,
vm_offset_t addr)
{
panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
}
static inline vm_object_t
__kmem_object(kmem_flags_t flags)
{
if (flags & KMEM_COMPRESSOR) {
if (flags & KMEM_KOBJECT) {
panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
}
return compressor_object;
}
if (!(flags & KMEM_KOBJECT)) {
panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
}
return kernel_object_default;
}
static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)
{
if (flags & (KMEM_DATA | KMEM_COMPRESSOR)) {
return PMAP_MAPPING_TYPE_DEFAULT;
} else {
return PMAP_MAPPING_TYPE_RESTRICTED;
}
}
static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)
{
return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
}
static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)
{
return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
}
static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)
{
return __kmem_guard_left(flags) + __kmem_guard_right(flags);
}
__pure2
static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)
{
vm_object_t object = VME_OBJECT(entry);
if (entry->vme_kernel_object) {
return entry->vme_end - entry->vme_start -
entry->vme_object_or_delta;
} else {
return object->vo_size - object->vo_size_delta;
}
}
#pragma mark kmem range methods
#if __arm64__
// <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
#define mach_vm_range_load(r, r_min, r_max) \
asm("ldp %[rmin], %[rmax], [%[range]]" \
: [rmin] "=r"(r_min), [rmax] "=r"(r_max) \
: [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address))
#else
#define mach_vm_range_load(r, rmin, rmax) \
({ rmin = (r)->min_address; rmax = (r)->max_address; })
#endif
__abortlike
static void
__mach_vm_range_overflow(
mach_vm_offset_t addr,
mach_vm_offset_t size)
{
panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
addr, addr, size);
}
__abortlike
static void
__mach_vm_range_invalid(
mach_vm_offset_t min_address,
mach_vm_offset_t max_address)
{
panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
min_address, max_address);
}
__header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range *r)
{
mach_vm_offset_t rmin, rmax;
mach_vm_range_load(r, rmin, rmax);
return rmax - rmin;
}
__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
{
mach_vm_offset_t rmin, rmax;
#if CONFIG_KERNEL_TAGGING
if (VM_KERNEL_ADDRESS(addr)) {
addr = vm_memtag_canonicalize_address(addr);
}
#endif /* CONFIG_KERNEL_TAGGING */
/*
* The `&` is not a typo: we really expect the check to pass,
* so encourage the compiler to eagerly load and test without branches
*/
mach_vm_range_load(r, rmin, rmax);
return (addr >= rmin) & (addr < rmax);
}
__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_contains(
const struct mach_vm_range *r,
mach_vm_offset_t addr,
mach_vm_offset_t size)
{
mach_vm_offset_t rmin, rmax;
#if CONFIG_KERNEL_TAGGING
if (VM_KERNEL_ADDRESS(addr)) {
addr = vm_memtag_canonicalize_address(addr);
}
#endif /* CONFIG_KERNEL_TAGGING */
/*
* The `&` is not a typo: we really expect the check to pass,
* so encourage the compiler to eagerly load and test without branches
*/
mach_vm_range_load(r, rmin, rmax);
return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
}
__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_intersects(
const struct mach_vm_range *r1,
const struct mach_vm_range *r2)
{
mach_vm_offset_t r1_min, r1_max;
mach_vm_offset_t r2_min, r2_max;
mach_vm_range_load(r1, r1_min, r1_max);
r2_min = r2->min_address;
r2_max = r2->max_address;
if (r1_min > r1_max) {
__mach_vm_range_invalid(r1_min, r1_max);
}
if (r2_min > r2_max) {
__mach_vm_range_invalid(r2_min, r2_max);
}
return r1_max > r2_min && r1_min < r2_max;
}
__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_intersects(
const struct mach_vm_range *r1,
mach_vm_offset_t addr,
mach_vm_offset_t size)
{
struct mach_vm_range r2;
addr = VM_KERNEL_STRIP_UPTR(addr);
r2.min_address = addr;
if (os_add_overflow(addr, size, &r2.max_address)) {
__mach_vm_range_overflow(addr, size);
}
return mach_vm_range_intersects(r1, &r2);
}
bool
kmem_range_id_contains(
kmem_range_id_t range_id,
vm_map_offset_t addr,
vm_map_size_t size)
{
return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
}
__abortlike
static void
kmem_range_invalid_panic(
kmem_range_id_t range_id,
vm_map_offset_t addr,
vm_map_size_t size)
{
const struct mach_vm_range *r = &kmem_ranges[range_id];
mach_vm_offset_t rmin, rmax;
mach_vm_range_load(r, rmin, rmax);
if (addr + size < rmin) {
panic("addr %p + size %llu overflows %p", (void *)addr, size,
(void *)(addr + size));
}
panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
(void *)addr, size, range_id, (void *)rmin, (void *)rmax);
}
/*
* Return whether the entire allocation is contained in the given range
*/
static bool
kmem_range_contains_fully(
kmem_range_id_t range_id,
vm_map_offset_t addr,
vm_map_size_t size)
{
const struct mach_vm_range *r = &kmem_ranges[range_id];
mach_vm_offset_t rmin, rmax;
bool result = false;
if (VM_KERNEL_ADDRESS(addr)) {
addr = vm_memtag_canonicalize_address(addr);
}
/*
* The `&` is not a typo: we really expect the check to pass,
* so encourage the compiler to eagerly load and test without branches
*/
mach_vm_range_load(r, rmin, rmax);
result = (addr >= rmin) & (addr < rmax);
if (__improbable(result
&& ((addr + size < rmin) || (addr + size > rmax)))) {
kmem_range_invalid_panic(range_id, addr, size);
}
return result;
}
vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)
{
return mach_vm_range_size(&kmem_ranges[range_id]);
}
kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
{
kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
for (; range_id < KMEM_RANGE_COUNT; range_id++) {
if (kmem_range_contains_fully(range_id, addr, size)) {
return range_id;
}
}
return KMEM_RANGE_ID_NONE;
}
bool
kmem_is_ptr_range(vm_map_range_id_t range_id)
{
return (range_id >= KMEM_RANGE_ID_FIRST) &&
(range_id <= KMEM_RANGE_ID_NUM_PTR);
}
__abortlike
static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
{
panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
(void *)addr);
}
mach_vm_range_t
kmem_validate_range_for_overwrite(
vm_map_offset_t addr,
vm_map_size_t size)
{
vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);
if (kmem_is_ptr_range(range_id)) {
kmem_range_invalid_for_overwrite(addr);
}
return &kmem_ranges[range_id];
}
#pragma mark entry parameters
__abortlike
static void
__kmem_entry_validate_panic(
vm_map_t map,
vm_map_entry_t entry,
vm_offset_t addr,
vm_size_t size,
uint32_t flags,
kmem_guard_t guard)
{
const char *what = "???";
if (entry->vme_atomic != guard.kmg_atomic) {
what = "atomicity";
} else if (entry->is_sub_map != guard.kmg_submap) {
what = "objectness";
} else if (addr != entry->vme_start) {
what = "left bound";
} else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
what = "right bound";
} else if (guard.kmg_context != entry->vme_context) {
what = "guard";
}
panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
"entry:%p %s mismatch guard(0x%08x)",
map, (void *)addr, size, flags, entry,
what, guard.kmg_context);
}
static bool
__kmem_entry_validate_guard(
vm_map_entry_t entry,
vm_offset_t addr,
vm_size_t size,
kmem_flags_t flags,
kmem_guard_t guard)
{
if (entry->vme_atomic != guard.kmg_atomic) {
return false;
}
if (!guard.kmg_atomic) {
return true;
}
if (entry->is_sub_map != guard.kmg_submap) {
return false;
}
if (addr != entry->vme_start) {
return false;
}
if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
return false;
}
if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
return false;
}
return true;
}
void
kmem_entry_validate_guard(
vm_map_t map,
vm_map_entry_t entry,
vm_offset_t addr,
vm_size_t size,
kmem_guard_t guard)
{
if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
__kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
}
}
__abortlike
static void
__kmem_entry_validate_object_panic(
vm_map_t map,
vm_map_entry_t entry,
kmem_flags_t flags)
{
const char *what;
const char *verb;
if (entry->is_sub_map) {
panic("kmem(map=%p) entry %p is a submap", map, entry);
}
if (flags & KMEM_KOBJECT) {
what = "kernel";
verb = "isn't";
} else if (flags & KMEM_COMPRESSOR) {
what = "compressor";
verb = "isn't";
} else if (entry->vme_kernel_object) {
what = "kernel";
verb = "is unexpectedly";
} else {
what = "compressor";
verb = "is unexpectedly";
}
panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
map, flags, entry, verb, what);
}
static bool
__kmem_entry_validate_object(
vm_map_entry_t entry,
kmem_flags_t flags)
{
if (entry->is_sub_map) {
return false;
}
if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
return false;
}
return (bool)(flags & KMEM_COMPRESSOR) ==
(VME_OBJECT(entry) == compressor_object);
}
vm_size_t
kmem_size_guard(
vm_map_t map,
vm_offset_t addr,
kmem_guard_t guard)
{
kmem_flags_t flags = KMEM_GUESS_SIZE;
vm_map_entry_t entry;
vm_size_t size;
vm_map_lock_read(map);
#if KASAN_CLASSIC
addr -= PAGE_SIZE;
#endif /* KASAN_CLASSIC */
addr = vm_memtag_canonicalize_address(addr);
if (!vm_map_lookup_entry(map, addr, &entry)) {
__kmem_entry_not_found_panic(map, addr);
}
if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
__kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
}
size = __kmem_entry_orig_size(entry);
vm_map_unlock_read(map);
return size;
}
static inline uint16_t
kmem_hash_backtrace(
void *fp)
{
uint64_t bt_count;
uintptr_t bt[8] = {};
struct backtrace_control ctl = {
.btc_frame_addr = (uintptr_t)fp,
};
bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
}
static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
"Insufficient bits to represent ptr ranges");
kmem_range_id_t
kmem_adjust_range_id(
uint32_t hash)
{
return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
(hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
}
static bool
kmem_use_sprayqtn(
kma_flags_t kma_flags,
vm_map_size_t map_size,
vm_offset_t mask)
{
/*
* Pointer allocations that are above the guard objects threshold or have
* leading guard pages with non standard alignment requests are redirected
* to the sprayqtn range.
*/
#if DEBUG || DEVELOPMENT
btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
BTREF_GET_NOWAIT : 0;
if ((kma_flags & KMA_SPRAYQTN) == 0) {
if (map_size > KMEM_GOBJ_THRESHOLD) {
btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
btref_get(__builtin_frame_address(0), flags));
} else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
btref_get(__builtin_frame_address(0), flags));
}
}
#endif /* DEBUG || DEVELOPMENT */
return (kma_flags & KMA_SPRAYQTN) ||
(map_size > KMEM_GOBJ_THRESHOLD) ||
((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
}
static void
kmem_apply_security_policy(
vm_map_t map,
kma_flags_t kma_flags,
kmem_guard_t guard,
vm_map_size_t map_size,
vm_offset_t mask,
vm_map_kernel_flags_t *vmk_flags,
bool assert_dir __unused)
{
kmem_range_id_t range_id;
bool from_right;
uint16_t type_hash = guard.kmg_type_hash;
if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
return;
}
/*
* A non-zero type-hash must be passed by krealloc_type
*/
#if (DEBUG || DEVELOPMENT)
if (assert_dir && !(kma_flags & KMA_DATA)) {
assert(type_hash != 0);
}
#endif
if (kma_flags & KMA_DATA) {
range_id = KMEM_RANGE_ID_DATA;
/*
* As an optimization in KMA_DATA to avoid fragmentation,
* allocate static carveouts at the end of the DATA range.
*/
from_right = (bool)(kma_flags & KMA_PERMANENT);
} else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
range_id = KMEM_RANGE_ID_SPRAYQTN;
from_right = (bool)(kma_flags & KMA_PERMANENT);
} else if (type_hash) {
range_id = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
from_right = type_hash & KMEM_DIRECTION_MASK;
} else {
/*
* Range id needs to correspond to one of the PTR ranges
*/
type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
range_id = kmem_adjust_range_id(type_hash);
from_right = type_hash & KMEM_DIRECTION_MASK;
}
vmk_flags->vmkf_range_id = range_id;
vmk_flags->vmkf_last_free = from_right;
}
#pragma mark allocation
static kmem_return_t
kmem_alloc_guard_internal(
vm_map_t map,
vm_size_t size,
vm_offset_t mask,
kma_flags_t flags,
kmem_guard_t guard,
kern_return_t (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
{
vm_object_t object;
vm_offset_t delta = 0;
vm_map_entry_t entry = NULL;
vm_map_offset_t map_addr, fill_start;
vm_map_size_t map_size, fill_size;
vm_page_t guard_left = VM_PAGE_NULL;
vm_page_t guard_right = VM_PAGE_NULL;
vm_page_t wired_page_list = VM_PAGE_NULL;
vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
bool skip_guards;
kmem_return_t kmr = { };
assert(kernel_map && map->pmap == kernel_pmap);
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
size, 0, 0, 0);
#endif
if (size == 0 ||
(size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
(size < __kmem_guard_size(ANYF(flags)))) {
__kmem_invalid_size_panic(map, size, flags);
}
/*
* limit the size of a single extent of wired memory
* to try and limit the damage to the system if
* too many pages get wired down
* limit raised to 2GB with 128GB max physical limit,
* but scaled by installed memory above this
*
* Note: kmem_alloc_contig_guard() is immune to this check.
*/
if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
alloc_pages == NULL &&
size > MAX(1ULL << 31, sane_size / 64))) {
kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
goto out_error;
}
/*
* Guard pages:
*
* Guard pages are implemented as fictitious pages.
*
* However, some maps, and some objects are known
* to manage their memory explicitly, and do not need
* those to be materialized, which saves memory.
*
* By placing guard pages on either end of a stack,
* they can help detect cases where a thread walks
* off either end of its stack.
*
* They are allocated and set up here and attempts
* to access those pages are trapped in vm_fault_page().
*
* The map_size we were passed may include extra space for
* guard pages. fill_size represents the actual size to populate.
* Similarly, fill_start indicates where the actual pages
* will begin in the range.
*/
map_size = round_page(size);
fill_start = 0;
fill_size = map_size - __kmem_guard_size(ANYF(flags));
#if KASAN_CLASSIC
if (flags & KMA_KASAN_GUARD) {
assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
delta = ptoa(2);
map_size += delta;
}
#else
(void)delta;
#endif /* KASAN_CLASSIC */
skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
map->never_faults;
if (flags & KMA_GUARD_FIRST) {
vmk_flags.vmkf_guard_before = true;
fill_start += PAGE_SIZE;
}
if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
if (__improbable(guard_left == VM_PAGE_NULL)) {
kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
goto out_error;
}
}
if ((flags & KMA_GUARD_LAST) && !skip_guards) {
guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
if (__improbable(guard_right == VM_PAGE_NULL)) {
kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
goto out_error;
}
}
if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
if (alloc_pages) {
kmr.kmr_return = alloc_pages(fill_size, flags,
&wired_page_list);
} else {
kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
&wired_page_list);
}
if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
goto out_error;
}
}
/*
* Allocate a new object (if necessary). We must do this before
* locking the map, or risk deadlock with the default pager.
*/
if (flags & KMA_KOBJECT) {
{
object = kernel_object_default;
}
vm_object_reference(object);
} else if (flags & KMA_COMPRESSOR) {
object = compressor_object;
vm_object_reference(object);
} else {
object = vm_object_allocate(map_size);
vm_object_lock(object);
vm_object_set_size(object, map_size, size);
/* stabilize the object to prevent shadowing */
object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
vm_object_unlock(object);
}
if (flags & KMA_LAST_FREE) {
vmk_flags.vmkf_last_free = true;
}
if (flags & KMA_PERMANENT) {
vmk_flags.vmf_permanent = true;
}
kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
false);
kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
vmk_flags, &entry);
if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
vm_object_deallocate(object);
goto out_error;
}
map_addr = entry->vme_start;
VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
VME_ALIAS_SET(entry, guard.kmg_tag);
if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
VME_OFFSET_SET(entry, map_addr);
}
#if KASAN
if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
}
#endif /* KASAN */
if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
entry->wired_count = 1;
vme_btref_consider_and_set(entry, __builtin_frame_address(0));
}
if (guard_left || guard_right || wired_page_list) {
vm_object_offset_t offset = 0ull;
vm_object_lock(object);
vm_map_unlock(map);
if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
offset = map_addr;
}
if (guard_left) {
vm_page_insert(guard_left, object, offset);
guard_left->vmp_busy = FALSE;
guard_left = VM_PAGE_NULL;
}
if (guard_right) {
vm_page_insert(guard_right, object,
offset + fill_start + fill_size);
guard_right->vmp_busy = FALSE;
guard_right = VM_PAGE_NULL;
}
if (wired_page_list) {
kernel_memory_populate_object_and_unlock(object,
map_addr + fill_start, offset + fill_start, fill_size,
wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
__kmem_mapping_type(ANYF(flags)));
} else {
vm_object_unlock(object);
}
} else {
vm_map_unlock(map);
}
/*
* now that the pages are wired, we no longer have to fear coalesce
*/
if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
vm_map_simplify(map, map_addr);
}
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
atop(fill_size), 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);
#if KASAN
if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
/*
* We need to allow the range for pageable memory,
* or faulting will not be allowed.
*/
kasan_notify_address(map_addr, map_size);
}
#endif /* KASAN */
#if KASAN_CLASSIC
if (flags & KMA_KASAN_GUARD) {
kmr.kmr_address += PAGE_SIZE;
kasan_alloc_large(kmr.kmr_address, size);
}
#endif /* KASAN_CLASSIC */
#if CONFIG_KERNEL_TAGGING
if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, size);
vm_memtag_set_tag((vm_offset_t)kmr.kmr_address, size);
#if KASAN_TBI
kasan_tbi_retag_unused_space((vm_offset_t)kmr.kmr_address, map_size, size);
#endif /* KASAN_TBI */
}
#endif /* CONFIG_KERNEL_TAGGING */
return kmr;
out_error:
if (flags & KMA_NOFAIL) {
__kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
}
if (guard_left) {
guard_left->vmp_snext = wired_page_list;
wired_page_list = guard_left;
}
if (guard_right) {
guard_right->vmp_snext = wired_page_list;
wired_page_list = guard_right;
}
if (wired_page_list) {
vm_page_free_list(wired_page_list, FALSE);
}
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
0, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
return kmr;
}
kmem_return_t
kmem_alloc_guard(
vm_map_t map,
vm_size_t size,
vm_offset_t mask,
kma_flags_t flags,
kmem_guard_t guard)
{
return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
}
kmem_return_t
kmem_alloc_contig_guard(
vm_map_t map,
vm_size_t size,
vm_offset_t mask,
ppnum_t max_pnum,
ppnum_t pnum_mask,
kma_flags_t flags,
kmem_guard_t guard)
{
__auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
};
return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
}
kmem_return_t
kmem_suballoc(
vm_map_t parent,
mach_vm_offset_t *addr,
vm_size_t size,
vm_map_create_options_t vmc_options,
int vm_flags,
kms_flags_t flags,
vm_tag_t tag)
{
vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
vm_map_offset_t map_addr = 0;
kmem_return_t kmr = { };
vm_map_t map;
assert(page_aligned(size));
assert(parent->pmap == kernel_pmap);
vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);
if (parent == kernel_map) {
assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
}
if (vmk_flags.vmf_fixed) {
map_addr = trunc_page(*addr);
}
pmap_reference(vm_map_pmap(parent));
map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);
/*
* 1. vm_map_enter() will consume one ref on success.
*
* 2. make the entry atomic as kernel submaps should never be split.
*
* 3. instruct vm_map_enter() that it is a fresh submap
* that needs to be taught its bounds as it inserted.
*/
vm_map_reference(map);
vmk_flags.vmkf_submap = true;
if ((flags & KMS_DATA) == 0) {
/* FIXME: IOKit submaps get fragmented and can't be atomic */
vmk_flags.vmkf_submap_atomic = true;
}
vmk_flags.vmkf_submap_adjust = true;
if (flags & KMS_LAST_FREE) {
vmk_flags.vmkf_last_free = true;
}
if (flags & KMS_PERMANENT) {
vmk_flags.vmf_permanent = true;
}
if (flags & KMS_DATA) {
vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
}
kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
vmk_flags, (vm_object_t)map, 0, FALSE,
VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
if (kmr.kmr_return != KERN_SUCCESS) {
if (flags & KMS_NOFAIL) {
panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
parent, size, kmr.kmr_return);
}
assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
vm_map_deallocate(map);
vm_map_deallocate(map); /* also removes ref to pmap */
return kmr;
}
/*
* For kmem_suballocs that register a claim and are assigned a range, ensure
* that the exact same range is returned.
*/
if (*addr != 0 && parent == kernel_map &&
startup_phase > STARTUP_SUB_KMEM) {
assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
} else {
*addr = map_addr;
}
kmr.kmr_submap = map;
return kmr;
}
/*
* kmem_alloc:
*
* Allocate wired-down memory in the kernel's address map
* or a submap. The memory is not zero-filled.
*/
__exported kern_return_t
kmem_alloc_external(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size);
kern_return_t
kmem_alloc_external(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size)
{
if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
}
/* Maintain ABI compatibility: invalid sizes used to be allowed */
return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
}
/*
* kmem_alloc_kobject:
*
* Allocate wired-down memory in the kernel's address map
* or a submap. The memory is not zero-filled.
*
* The memory is allocated in the kernel_object.
* It may not be copied with vm_map_copy, and
* it may not be reallocated with kmem_realloc.
*/
__exported kern_return_t
kmem_alloc_kobject_external(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size);
kern_return_t
kmem_alloc_kobject_external(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size)
{
if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
}
/* Maintain ABI compatibility: invalid sizes used to be allowed */
return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
}
/*
* kmem_alloc_pageable:
*
* Allocate pageable memory in the kernel's address map.
*/
__exported kern_return_t
kmem_alloc_pageable_external(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size);
kern_return_t
kmem_alloc_pageable_external(
vm_map_t map,
vm_offset_t *addrp,
vm_size_t size)
{
if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
}
/* Maintain ABI compatibility: invalid sizes used to be allowed */
return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
}
static inline kern_return_t
mach_vm_allocate_kernel_sanitize(
vm_map_t map,
mach_vm_offset_ut addr_u,
mach_vm_size_ut size_u,
vm_map_kernel_flags_t vmk_flags,
vm_map_offset_t *map_addr,
vm_map_size_t *map_size)
{
kern_return_t result;
vm_map_offset_t map_end;
if (vmk_flags.vmf_fixed) {
result = vm_sanitize_addr_size(addr_u, size_u,
VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
map,
VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
map_addr, &map_end, map_size);
if (__improbable(result != KERN_SUCCESS)) {
return result;
}
} else {
*map_addr = 0;
result = vm_sanitize_size(0, size_u,
VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
map_size);
if (__improbable(result != KERN_SUCCESS)) {
return result;
}
}
return KERN_SUCCESS;
}
kern_return_t
mach_vm_allocate_kernel(
vm_map_t map,
mach_vm_offset_ut *addr_u,
mach_vm_size_ut size_u,
vm_map_kernel_flags_t vmk_flags)
{
vm_map_offset_t map_addr;
vm_map_size_t map_size;
kern_return_t result;
if (map == VM_MAP_NULL) {
ktriage_record(thread_tid(current_thread()),
KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
KDBG_TRIAGE_RESERVED,
KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
KERN_INVALID_ARGUMENT /* arg */);
return KERN_INVALID_ARGUMENT;
}
if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
VM_FLAGS_USER_ALLOCATE)) {
return KERN_INVALID_ARGUMENT;
}
result = mach_vm_allocate_kernel_sanitize(map,
*addr_u,
size_u,
vmk_flags,
&map_addr,
&map_size);
if (__improbable(result != KERN_SUCCESS)) {
result = vm_sanitize_get_kr(result);
if (result == KERN_SUCCESS) {
*addr_u = vm_sanitize_wrap_addr(0);
} else {
ktriage_record(thread_tid(current_thread()),
KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
KDBG_TRIAGE_RESERVED,
KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
KERN_INVALID_ARGUMENT /* arg */);
}
return result;
}
vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);
result = vm_map_enter(
map,
&map_addr,
map_size,
(vm_map_offset_t)0,
vmk_flags,
VM_OBJECT_NULL,
(vm_object_offset_t)0,
FALSE,
VM_PROT_DEFAULT,
VM_PROT_ALL,
VM_INHERIT_DEFAULT);
if (result == KERN_SUCCESS) {
#if KASAN
if (map->pmap == kernel_pmap) {
kasan_notify_address(map_addr, map_size);
}
#endif
*addr_u = vm_sanitize_wrap_addr(map_addr);
} else {
ktriage_record(thread_tid(current_thread()),
KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
KDBG_TRIAGE_RESERVED,
KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
result /* arg */);
}
return result;
}
#pragma mark population
static void
kernel_memory_populate_pmap_enter(
vm_object_t object,
vm_address_t addr,
vm_object_offset_t offset,
vm_page_t mem,
vm_prot_t prot,
int pe_flags,
pmap_mapping_type_t mapping_type)
{
kern_return_t pe_result;
int pe_options;
if (VMP_ERROR_GET(mem)) {
panic("VM page %p should not have an error", mem);
}
pe_options = PMAP_OPTIONS_NOWAIT;
if (object->internal) {
pe_options |= PMAP_OPTIONS_INTERNAL;
}
if (mem->vmp_reusable || object->all_reusable) {
pe_options |= PMAP_OPTIONS_REUSABLE;
}
pe_result = pmap_enter_options(kernel_pmap, addr + offset,
VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
if (pe_result == KERN_RESOURCE_SHORTAGE) {
vm_object_unlock(object);
pe_options &= ~PMAP_OPTIONS_NOWAIT;
pe_result = pmap_enter_options(kernel_pmap, addr + offset,
VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);
vm_object_lock(object);
}
assert(pe_result == KERN_SUCCESS);
}
void
kernel_memory_populate_object_and_unlock(
vm_object_t object, /* must be locked */
vm_address_t addr,
vm_offset_t offset,
vm_size_t size,
vm_page_t page_list,
kma_flags_t flags,
vm_tag_t tag,
vm_prot_t prot,
pmap_mapping_type_t mapping_type)
{
vm_page_t mem;
int pe_flags;
bool gobbled_list = page_list && page_list->vmp_gobbled;
assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);
if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
assert3u(offset, ==, addr);
} else {
/*
* kernel_memory_populate_pmap_enter() might drop the object
* lock, and the caller might not own a reference anymore
* and rely on holding the vm object lock for liveness.
*/
vm_object_reference_locked(object);
}
if (flags & KMA_KSTACK) {
pe_flags = VM_MEM_STACK;
} else {
pe_flags = 0;
}
for (vm_object_offset_t pg_offset = 0;
pg_offset < size;
pg_offset += PAGE_SIZE_64) {
if (page_list == NULL) {
panic("%s: page_list too short", __func__);
}
mem = page_list;
page_list = mem->vmp_snext;
mem->vmp_snext = NULL;
assert(mem->vmp_wire_count == 0);
assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
assert(!mem->vmp_fictitious && !mem->vmp_private);
if (flags & KMA_COMPRESSOR) {
mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
/*
* Background processes doing I/O accounting can call
* into NVME driver to do some work which results in
* an allocation here and so we want to make sure
* that the pages used by compressor, regardless of
* process context, are never on the special Q.
*/
mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;
vm_page_insert(mem, object, offset + pg_offset);
} else {
mem->vmp_q_state = VM_PAGE_IS_WIRED;
mem->vmp_wire_count = 1;
vm_page_insert_wired(mem, object, offset + pg_offset, tag);
}
mem->vmp_gobbled = false;
mem->vmp_busy = false;
mem->vmp_pmapped = true;
mem->vmp_wpmapped = true;
/*
* Manual PMAP_ENTER_OPTIONS() with shortcuts
* for the kernel and compressor objects.
*/
kernel_memory_populate_pmap_enter(object, addr, pg_offset,
mem, prot, pe_flags, mapping_type);
if (flags & KMA_NOENCRYPT) {
pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
}
}
if (page_list) {
panic("%s: page_list too long", __func__);
}
vm_object_unlock(object);
if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
vm_object_deallocate(object);
}
/*
* Update the accounting:
* - the compressor "wired" pages don't really count as wired
* - kmem_alloc_contig_guard() gives gobbled pages,
* which already count as wired but need to be ungobbled.
*/
if (gobbled_list) {
vm_page_lockspin_queues();
if (flags & KMA_COMPRESSOR) {
vm_page_wire_count -= atop(size);
}
vm_page_gobble_count -= atop(size);
vm_page_unlock_queues();
} else if ((flags & KMA_COMPRESSOR) == 0) {
vm_page_lockspin_queues();
vm_page_wire_count += atop(size);
vm_page_unlock_queues();
}
if (flags & KMA_KOBJECT) {
/* vm_page_insert_wired() handles regular objects already */
vm_tag_update_size(tag, size, NULL);
}
#if KASAN
if (flags & KMA_COMPRESSOR) {
kasan_notify_address_nopoison(addr, size);
} else {
kasan_notify_address(addr, size);
}
#endif /* KASAN */
}
kern_return_t
kernel_memory_populate(
vm_offset_t addr,
vm_size_t size,
kma_flags_t flags,
vm_tag_t tag)
{
kern_return_t kr = KERN_SUCCESS;
vm_page_t page_list = NULL;
vm_size_t page_count = atop_64(size);
vm_object_t object = __kmem_object(ANYF(flags));
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
size, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
kr = vm_page_alloc_list(page_count, flags, &page_list);
if (kr == KERN_SUCCESS) {
vm_object_lock(object);
kernel_memory_populate_object_and_unlock(object, addr,
addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
__kmem_mapping_type(ANYF(flags)));
}
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
page_count, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
return kr;
}
void
kernel_memory_depopulate(
vm_offset_t addr,
vm_size_t size,
kma_flags_t flags,
vm_tag_t tag)
{
vm_object_t object = __kmem_object(ANYF(flags));
vm_object_offset_t offset = addr;
vm_page_t mem;
vm_page_t local_freeq = NULL;
unsigned int pages_unwired = 0;
vm_object_lock(object);
pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);
for (vm_object_offset_t pg_offset = 0;
pg_offset < size;
pg_offset += PAGE_SIZE_64) {
mem = vm_page_lookup(object, offset + pg_offset);
assert(mem);
if (flags & KMA_COMPRESSOR) {
assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
} else {
assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
pages_unwired++;
}
mem->vmp_busy = TRUE;
assert(mem->vmp_tabled);
vm_page_remove(mem, TRUE);
assert(mem->vmp_busy);
assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);
mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
mem->vmp_snext = local_freeq;
local_freeq = mem;
}
vm_object_unlock(object);
vm_page_free_list(local_freeq, TRUE);
if (!(flags & KMA_COMPRESSOR)) {
vm_page_lockspin_queues();
vm_page_wire_count -= pages_unwired;
vm_page_unlock_queues();
}
if (flags & KMA_KOBJECT) {
/* vm_page_remove() handles regular objects already */
vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
}
}
#pragma mark reallocation
__abortlike
static void
__kmem_realloc_invalid_object_size_panic(
vm_map_t map,
vm_address_t address,
vm_size_t size,
vm_map_entry_t entry)
{
vm_object_t object = VME_OBJECT(entry);
vm_size_t objsize = __kmem_entry_orig_size(entry);
panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
"object %p has unexpected size %ld",
map, (void *)address, (size_t)size, entry, object, objsize);
}
__abortlike
static void
__kmem_realloc_invalid_pager_panic(
vm_map_t map,
vm_address_t address,
vm_size_t size,
vm_map_entry_t entry)
{
vm_object_t object = VME_OBJECT(entry);
memory_object_t pager = object->pager;
bool pager_created = object->pager_created;
bool pager_initialized = object->pager_initialized;
bool pager_ready = object->pager_ready;
panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
"object %p has unexpected pager %p (%d,%d,%d)",
map, (void *)address, (size_t)size, entry, object,
pager, pager_created, pager_initialized, pager_ready);
}
static kmem_return_t
kmem_realloc_shrink_guard(
vm_map_t map,
vm_offset_t req_oldaddr,
vm_size_t req_oldsize,
vm_size_t req_newsize,
kmr_flags_t flags,
kmem_guard_t guard,
vm_map_entry_t entry)
{
vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
vm_object_t object;
vm_offset_t delta = 0;
kmem_return_t kmr;
bool was_atomic;
vm_size_t oldsize = round_page(req_oldsize);
vm_size_t newsize = round_page(req_newsize);
vm_address_t oldaddr = req_oldaddr;
#if KASAN_CLASSIC
if (flags & KMR_KASAN_GUARD) {
assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
oldaddr -= PAGE_SIZE;
delta = ptoa(2);
oldsize += delta;
newsize += delta;
}
#endif /* KASAN_CLASSIC */
if (flags & KMR_TAG) {
oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
}
vm_map_lock_assert_exclusive(map);
if ((flags & KMR_KOBJECT) == 0) {
object = VME_OBJECT(entry);
vm_object_reference(object);
}
/*
* Shrinking an atomic entry starts with splitting it,
* and removing the second half.
*/
was_atomic = entry->vme_atomic;
entry->vme_atomic = false;
vm_map_clip_end(map, entry, entry->vme_start + newsize);
entry->vme_atomic = was_atomic;
#if KASAN
if (entry->vme_kernel_object && was_atomic) {
entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
}
#if KASAN_CLASSIC
if (flags & KMR_KASAN_GUARD) {
kasan_poison_range(oldaddr + newsize, oldsize - newsize,
ASAN_VALID);
}
#endif
#if KASAN_TBI
if (flags & KMR_TAG) {
kasan_tbi_mark_free_space(req_oldaddr + newsize, oldsize - newsize);
}
#endif /* KASAN_TBI */
#endif /* KASAN */
(void)vm_map_remove_and_unlock(map,
oldaddr + newsize, oldaddr + oldsize,
vmr_flags, KMEM_GUARD_NONE);
/*
* Lastly, if there are guard pages, deal with them.
*
* The kernel object just needs to depopulate,
* regular objects require freeing the last page
* and replacing it with a guard.
*/
if (flags & KMR_KOBJECT) {
if (flags & KMR_GUARD_LAST) {
kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
PAGE_SIZE, KMA_KOBJECT, guard.kmg_tag);
}
} else {
vm_page_t guard_right = VM_PAGE_NULL;
vm_offset_t remove_start = newsize;
if (flags & KMR_GUARD_LAST) {
if (!map->never_faults) {
guard_right = vm_page_grab_guard(true);
}
remove_start -= PAGE_SIZE;
}
vm_object_lock(object);
if (object->vo_size != oldsize) {
__kmem_realloc_invalid_object_size_panic(map,
req_oldaddr, req_oldsize + delta, entry);
}
vm_object_set_size(object, newsize, req_newsize);
vm_object_page_remove(object, remove_start, oldsize);
if (guard_right) {
vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
guard_right->vmp_busy = false;
}
vm_object_unlock(object);
vm_object_deallocate(object);
}
kmr.kmr_address = req_oldaddr;
kmr.kmr_return = 0;
#if KASAN_CLASSIC
if (flags & KMA_KASAN_GUARD) {
kasan_alloc_large(kmr.kmr_address, req_newsize);
}
#endif /* KASAN_CLASSIC */
#if KASAN_TBI
if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
vm_memtag_set_tag(kmr.kmr_address, req_newsize);
kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
}
#endif /* KASAN_TBI */
return kmr;
}
kmem_return_t
kmem_realloc_guard(
vm_map_t map,
vm_offset_t req_oldaddr,
vm_size_t req_oldsize,
vm_size_t req_newsize,
kmr_flags_t flags,
kmem_guard_t guard)
{
vm_object_t object;
vm_size_t oldsize;
vm_size_t newsize;
vm_offset_t delta = 0;
vm_map_offset_t oldaddr;
vm_map_offset_t newaddr;
vm_object_offset_t newoffs;
vm_map_entry_t oldentry;
vm_map_entry_t newentry;
vm_page_t page_list = NULL;
bool needs_wakeup = false;
kmem_return_t kmr = { };
unsigned int last_timestamp;
vm_map_kernel_flags_t vmk_flags = {
.vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
};
assert(KMEM_REALLOC_FLAGS_VALID(flags));
if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) {
__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
req_oldsize, flags);
}
if (req_oldaddr == 0ul) {
return kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
}
if (req_newsize == 0ul) {
kmem_free_guard(map, req_oldaddr, req_oldsize,
(kmf_flags_t)flags, guard);
return kmr;
}
if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
__kmem_invalid_size_panic(map, req_newsize, flags);
}
if (req_newsize < __kmem_guard_size(ANYF(flags))) {
__kmem_invalid_size_panic(map, req_newsize, flags);
}
oldsize = round_page(req_oldsize);
newsize = round_page(req_newsize);
oldaddr = req_oldaddr;
#if KASAN_CLASSIC
if (flags & KMR_KASAN_GUARD) {
flags |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
oldaddr -= PAGE_SIZE;
delta = ptoa(2);
oldsize += delta;
newsize += delta;
}
#endif /* KASAN_CLASSIC */
#if CONFIG_KERNEL_TAGGING
if (flags & KMR_TAG) {
vm_memtag_verify_tag(req_oldaddr);
oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
}
#endif /* CONFIG_KERNEL_TAGGING */
#if !KASAN
/*
* If not on a KASAN variant and no difference in requested size,
* just return.
*
* Otherwise we want to validate the size and re-tag for KASAN_TBI.
*/
if (oldsize == newsize) {
kmr.kmr_address = req_oldaddr;
return kmr;
}
#endif /* !KASAN */
/*
* If we're growing the allocation,
* then reserve the pages we'll need,
* and find a spot for its new place.
*/
if (oldsize < newsize) {
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
DBG_VM_KERN_REQUEST, DBG_FUNC_START,
newsize - oldsize, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
(kma_flags_t)flags, &page_list);
if (kmr.kmr_return == KERN_SUCCESS) {
kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
newsize, 0, &vmk_flags, true);
kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
vmk_flags, &newentry);
}
if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
if (flags & KMR_REALLOCF) {
kmem_free_guard(map, req_oldaddr, req_oldsize,
KMF_NONE, guard);
}
if (page_list) {
vm_page_free_list(page_list, FALSE);
}
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
DBG_VM_KERN_REQUEST, DBG_FUNC_END,
0, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
return kmr;
}
/* map is locked */
} else {
vm_map_lock(map);
}
/*
* Locate the entry:
* - wait for it to quiesce.
* - validate its guard,
* - learn its correct tag,
*/
again:
if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
__kmem_entry_not_found_panic(map, req_oldaddr);
}
if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
oldentry->needs_wakeup = true;
vm_map_entry_wait(map, THREAD_UNINT);
goto again;
}
kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
__kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
}
/*
* TODO: We should validate for non atomic entries that the range
* we are acting on is what we expect here.
*/
#if KASAN
if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
__kmem_realloc_invalid_object_size_panic(map,
req_oldaddr, req_oldsize + delta, oldentry);
}
if (oldsize == newsize) {
kmr.kmr_address = req_oldaddr;
if (oldentry->vme_kernel_object) {
oldentry->vme_object_or_delta = delta +
(-req_newsize & PAGE_MASK);
} else {
object = VME_OBJECT(oldentry);
vm_object_lock(object);
vm_object_set_size(object, newsize, req_newsize);
vm_object_unlock(object);
}
vm_map_unlock(map);
#if KASAN_CLASSIC
if (flags & KMA_KASAN_GUARD) {
kasan_alloc_large(kmr.kmr_address, req_newsize);
}
#endif /* KASAN_CLASSIC */
#if KASAN_TBI
if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
vm_memtag_set_tag(kmr.kmr_address, req_newsize);
kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
}
#endif /* KASAN_TBI */
return kmr;
}
#endif /* KASAN */
guard.kmg_tag = VME_ALIAS(oldentry);
if (newsize < oldsize) {
return kmem_realloc_shrink_guard(map, req_oldaddr,
req_oldsize, req_newsize, flags, guard, oldentry);
}
/*
* We are growing the entry
*
* For regular objects we use the object `vo_size` updates
* as a guarantee that no 2 kmem_realloc() can happen
* concurrently (by doing it before the map is unlocked.
*
* For the kernel object, prevent the entry from being
* reallocated or changed by marking it "in_transition".
*/
object = VME_OBJECT(oldentry);
vm_object_lock(object);
vm_object_reference_locked(object);
newaddr = newentry->vme_start;
newoffs = oldsize;
VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
VME_ALIAS_SET(newentry, guard.kmg_tag);
if (flags & KMR_KOBJECT) {
oldentry->in_transition = true;
VME_OFFSET_SET(newentry, newaddr);
newentry->wired_count = 1;
vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
newoffs = newaddr + oldsize;
#if KASAN
newentry->vme_object_or_delta = delta +
(-req_newsize & PAGE_MASK);
#endif /* KASAN */
} else {
if (object->pager_created || object->pager) {
/*
* We can't "realloc/grow" the pager, so pageable
* allocations should not go through this path.
*/
__kmem_realloc_invalid_pager_panic(map,
req_oldaddr, req_oldsize + delta, oldentry);
}
if (object->vo_size != oldsize) {
__kmem_realloc_invalid_object_size_panic(map,
req_oldaddr, req_oldsize + delta, oldentry);
}
vm_object_set_size(object, newsize, req_newsize);
}
last_timestamp = map->timestamp;
vm_map_unlock(map);
/*
* Now proceed with the population of pages.
*
* Kernel objects can use the kmem population helpers.
*
* Regular objects will insert pages manually,
* then wire the memory into the new range.
*/
vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));
if (flags & KMR_KOBJECT) {
pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));
pmap_protect(kernel_pmap,
oldaddr, oldaddr + oldsize - guard_right_size,
VM_PROT_NONE);
for (vm_object_offset_t offset = 0;
offset < oldsize - guard_right_size;
offset += PAGE_SIZE_64) {
vm_page_t mem;
mem = vm_page_lookup(object, oldaddr + offset);
if (mem == VM_PAGE_NULL) {
continue;
}
pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
mem->vmp_busy = true;
vm_page_remove(mem, true);
vm_page_insert_wired(mem, object, newaddr + offset,
guard.kmg_tag);
mem->vmp_busy = false;
kernel_memory_populate_pmap_enter(object, newaddr,
offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
}
kernel_memory_populate_object_and_unlock(object,
newaddr + oldsize - guard_right_size,
newoffs - guard_right_size,
newsize - oldsize,
page_list, (kma_flags_t)flags,
guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
} else {
vm_page_t guard_right = VM_PAGE_NULL;
/*
* Note: we are borrowing the new entry reference
* on the object for the duration of this code,
* which works because we keep the object locked
* throughout.
*/
if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
assert(guard_right->vmp_fictitious);
guard_right->vmp_busy = true;
vm_page_remove(guard_right, true);
}
if (flags & KMR_FREEOLD) {
/*
* Freeing the old mapping will make
* the old pages become pageable until
* the new mapping makes them wired again.
* Let's take an extra "wire_count" to
* prevent any accidental "page out".
* We'll have to undo that after wiring
* the new mapping.
*/
vm_object_reference_locked(object); /* keep object alive */
for (vm_object_offset_t offset = 0;
offset < oldsize - guard_right_size;
offset += PAGE_SIZE_64) {
vm_page_t mem;
mem = vm_page_lookup(object, offset);
assert(mem != VM_PAGE_NULL);
assertf(!VM_PAGE_PAGEABLE(mem),
"mem %p qstate %d",
mem, mem->vmp_q_state);
if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
/* guard pages are not wired */
} else {
assertf(VM_PAGE_WIRED(mem),
"mem %p qstate %d wirecount %d",
mem,
mem->vmp_q_state,
mem->vmp_wire_count);
assertf(mem->vmp_wire_count >= 1,
"mem %p wirecount %d",
mem, mem->vmp_wire_count);
mem->vmp_wire_count++;
}
}
}
for (vm_object_offset_t offset = oldsize - guard_right_size;
offset < newsize - guard_right_size;
offset += PAGE_SIZE_64) {
vm_page_t mem = page_list;
page_list = mem->vmp_snext;
mem->vmp_snext = VM_PAGE_NULL;
assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
assert(!VM_PAGE_PAGEABLE(mem));
vm_page_insert(mem, object, offset);
mem->vmp_busy = false;
}
if (guard_right) {
vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
guard_right->vmp_busy = false;
}
vm_object_unlock(object);
}
/*
* Mark the entry as idle again,
* and honor KMR_FREEOLD if needed.
*/
vm_map_lock(map);
if (last_timestamp + 1 != map->timestamp &&
!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
__kmem_entry_not_found_panic(map, req_oldaddr);
}
if (flags & KMR_KOBJECT) {
assert(oldentry->in_transition);
oldentry->in_transition = false;
if (oldentry->needs_wakeup) {
needs_wakeup = true;
oldentry->needs_wakeup = false;
}
}
if (flags & KMR_FREEOLD) {
vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
#if KASAN_CLASSIC
if (flags & KMR_KASAN_GUARD) {
kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
}
#endif
#if KASAN_TBI
if (flags & KMR_TAG) {
kasan_tbi_mark_free_space(req_oldaddr, oldsize);
}
#endif /* KASAN_TBI */
if (flags & KMR_GUARD_LAST) {
vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
}
(void)vm_map_remove_and_unlock(map,
oldaddr, oldaddr + oldsize,
vmr_flags, guard);
} else {
vm_map_unlock(map);
}
if ((flags & KMR_KOBJECT) == 0) {
kern_return_t kr;
/*
* This must happen _after_ we do the KMR_FREEOLD,
* because wiring the pages will call into the pmap,
* and if the pages are typed XNU_KERNEL_RESTRICTED,
* this would cause a second mapping of the page and panic.
*/
kr = vm_map_wire_kernel(map,
vm_sanitize_wrap_addr(newaddr),
vm_sanitize_wrap_addr(newaddr + newsize),
vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
guard.kmg_tag, FALSE);
assert(kr == KERN_SUCCESS);
if (flags & KMR_FREEOLD) {
/*
* Undo the extra "wiring" we made above
* and release the extra reference we took
* on the object.
*/
vm_object_lock(object);
for (vm_object_offset_t offset = 0;
offset < oldsize - guard_right_size;
offset += PAGE_SIZE_64) {
vm_page_t mem;
mem = vm_page_lookup(object, offset);
assert(mem != VM_PAGE_NULL);
assertf(!VM_PAGE_PAGEABLE(mem),
"mem %p qstate %d",
mem, mem->vmp_q_state);
if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
/* guard pages are not wired */
} else {
assertf(VM_PAGE_WIRED(mem),
"mem %p qstate %d wirecount %d",
mem,
mem->vmp_q_state,
mem->vmp_wire_count);
assertf(mem->vmp_wire_count >= 2,
"mem %p wirecount %d",
mem, mem->vmp_wire_count);
mem->vmp_wire_count--;
assert(VM_PAGE_WIRED(mem));
assert(mem->vmp_wire_count >= 1);
}
}
vm_object_unlock(object);
vm_object_deallocate(object); /* release extra ref */
}
}
if (needs_wakeup) {
vm_map_entry_wakeup(map);
}
#if DEBUG || DEVELOPMENT
VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
atop(newsize - oldsize), 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
kmr.kmr_address = newaddr;
#if KASAN
kasan_notify_address(kmr.kmr_address, newsize);
#endif /* KASAN */
#if KASAN_CLASSIC
if (flags & KMR_KASAN_GUARD) {
kmr.kmr_address += PAGE_SIZE;
kasan_alloc_large(kmr.kmr_address, req_newsize);
}
#endif /* KASAN_CLASSIC */
#if KASAN_TBI
if (flags & KMR_TAG) {
kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
vm_memtag_set_tag(kmr.kmr_address, req_newsize);
kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
}
#endif /* KASAN_TBI */
return kmr;
}
#pragma mark map/remap/wire
kern_return_t
mach_vm_map_kernel(
vm_map_t target_map,
mach_vm_offset_ut *address,
mach_vm_size_ut initial_size,
mach_vm_offset_ut mask,
vm_map_kernel_flags_t vmk_flags,
ipc_port_t port,
memory_object_offset_ut offset,
boolean_t copy,
vm_prot_ut cur_protection,
vm_prot_ut max_protection,
vm_inherit_ut inheritance)
{
/* range_id is set by vm_map_enter_mem_object */
return vm_map_enter_mem_object(target_map,
address,
initial_size,
mask,
vmk_flags,
port,
offset,
copy,
cur_protection,
max_protection,
inheritance,
NULL,
0);
}
kern_return_t
mach_vm_remap_new_kernel(
vm_map_t target_map,
mach_vm_offset_ut *address,
mach_vm_size_ut size,
mach_vm_offset_ut mask,
vm_map_kernel_flags_t vmk_flags,
vm_map_t src_map,
mach_vm_offset_ut memory_address,
boolean_t copy,
vm_prot_ut *cur_protection, /* IN/OUT */
vm_prot_ut *max_protection, /* IN/OUT */
vm_inherit_ut inheritance)
{
if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
VM_FLAGS_USER_REMAP)) {
return KERN_INVALID_ARGUMENT;
}
vmk_flags.vmf_return_data_addr = true;
/* range_id is set by vm_map_remap */
return vm_map_remap(target_map,
address,
size,
mask,
vmk_flags,
src_map,
memory_address,
copy,
cur_protection,
max_protection,
inheritance);
}
#pragma mark free
#if KASAN
__abortlike
static void
__kmem_free_invalid_object_size_panic(
vm_map_t map,
vm_address_t address,
vm_size_t size,
vm_map_entry_t entry)
{
vm_object_t object = VME_OBJECT(entry);
vm_size_t objsize = __kmem_entry_orig_size(entry);
panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
"object %p has unexpected size %ld",
map, (void *)address, (size_t)size, entry, object, objsize);
}
#endif /* KASAN */
vm_size_t
kmem_free_guard(
vm_map_t map,
vm_offset_t req_addr,
vm_size_t req_size,
kmf_flags_t flags,
kmem_guard_t guard)
{
vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;
vm_address_t addr = req_addr;
vm_offset_t delta = 0;
vm_size_t size;
#if KASAN
vm_map_entry_t entry;
#endif /* KASAN */
assert(map->pmap == kernel_pmap);
#if KASAN_CLASSIC
if (flags & KMF_KASAN_GUARD) {
addr -= PAGE_SIZE;
delta = ptoa(2);
}
#endif /* KASAN_CLASSIC */
#if CONFIG_KERNEL_TAGGING
if (flags & KMF_TAG) {
vm_memtag_verify_tag(req_addr);
addr = vm_memtag_canonicalize_address(req_addr);
}
#endif /* CONFIG_KERNEL_TAGGING */
if (flags & KMF_GUESS_SIZE) {
vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
size = PAGE_SIZE;
} else if (req_size == 0) {
__kmem_invalid_size_panic(map, req_size, flags);
} else {
size = round_page(req_size) + delta;
}
vm_map_lock(map);
#if KASAN
if (!vm_map_lookup_entry(map, addr, &entry)) {
__kmem_entry_not_found_panic(map, req_addr);
}
if (flags & KMF_GUESS_SIZE) {
vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
req_size = __kmem_entry_orig_size(entry);
size = round_page(req_size + delta);
} else if (guard.kmg_atomic && entry->vme_kernel_object &&
__kmem_entry_orig_size(entry) != req_size) {
/*
* We can't make a strict check for regular
* VM objects because it could be:
*
* - the kmem_guard_free() of a kmem_realloc_guard() without
* KMR_FREEOLD, and in that case the object size won't match.
*
* - a submap, in which case there is no "orig size".
*/
__kmem_free_invalid_object_size_panic(map,
req_addr, req_size + delta, entry);
}
#endif /* KASAN */
#if KASAN_CLASSIC
if (flags & KMR_KASAN_GUARD) {
kasan_poison_range(addr, size, ASAN_VALID);
}
#endif
#if KASAN_TBI
if (flags & KMF_TAG) {
kasan_tbi_mark_free_space(req_addr, size);
}
#endif /* KASAN_TBI */
/*
* vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
* unwires the kernel mapping. The page won't be mapped any longer so
* there is no extra step that is required for memory tagging to "clear"
* it -- the page will be later laundered when reused.
*/
return vm_map_remove_and_unlock(map, addr, addr + size,
vmr_flags, guard).kmr_size - delta;
}
__exported void
kmem_free_external(
vm_map_t map,
vm_offset_t addr,
vm_size_t size);
void
kmem_free_external(
vm_map_t map,
vm_offset_t addr,
vm_size_t size)
{
if (size) {
kmem_free(map, trunc_page(addr), size);
#if MACH_ASSERT
} else {
printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
map, (void *)addr, __builtin_return_address(0));
#endif
}
}
#pragma mark kmem metadata
/*
* Guard objects for kmem pointer allocation:
*
* Guard objects introduce size slabs to kmem pointer allocations that are
* allocated in chunks of n * sizeclass. When an allocation of a specific
* sizeclass is requested a random slot from [0, n) is returned.
* Allocations are returned from that chunk until m slots are left. The
* remaining m slots are referred to as guard objects. They don't get
* allocated and the chunk is now considered full. When an allocation is
* freed to the chunk 1 slot is now available from m + 1 for the next
* allocation of that sizeclass.
*
* Guard objects are intended to make exploitation of use after frees harder
* as allocations that are freed can no longer be reliable reallocated.
* They also make exploitation of OOBs harder as overflowing out of an
* allocation can no longer be safe even with sufficient spraying.
*/
#define KMEM_META_PRIMARY UINT8_MAX
#define KMEM_META_START (UINT8_MAX - 1)
#define KMEM_META_FREE (UINT8_MAX - 2)
#if __ARM_16K_PG__
#define KMEM_MIN_SIZE PAGE_SIZE
#define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
#else /* __ARM_16K_PG__ */
/*
* PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
* devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
* Therefore populate sizeclasses from 4k for those devices.
*/
#define KMEM_MIN_SIZE (4 * 1024)
#define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
#endif /* __ARM_16K_PG__ */
#define KMEM_MAX_SIZE (32ULL << 20)
#define KMEM_START_IDX (kmem_log2down(KMEM_MIN_SIZE))
#define KMEM_LAST_IDX (kmem_log2down(KMEM_MAX_SIZE))
#define KMEM_NUM_SIZECLASS (KMEM_LAST_IDX - KMEM_START_IDX + 1)
#define KMEM_FRONTS (KMEM_RANGE_ID_NUM_PTR * 2)
#define KMEM_NUM_GUARDS 2
struct kmem_page_meta {
union {
/*
* On primary allocated chunk with KMEM_META_PRIMARY marker
*/
uint32_t km_bitmap;
/*
* On start and end of free chunk with KMEM_META_FREE marker
*/
uint32_t km_free_chunks;
};
/*
* KMEM_META_PRIMARY: Start meta of allocated chunk
* KMEM_META_FREE : Start and end meta of free chunk
* KMEM_META_START : Meta region start and end
*/
uint8_t km_page_marker;
uint8_t km_sizeclass;
union {
/*
* On primary allocated chunk with KMEM_META_PRIMARY marker
*/
uint16_t km_chunk_len;
/*
* On secondary allocated chunks
*/
uint16_t km_page_idx;
};
LIST_ENTRY(kmem_page_meta) km_link;
} kmem_page_meta_t;
typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
struct kmem_sizeclass {
vm_map_size_t ks_size;
uint32_t ks_num_chunk;
uint32_t ks_num_elem;
crypto_random_ctx_t __zpercpu ks_rng_ctx;
kmem_list_head_t ks_allfree_head[KMEM_FRONTS];
kmem_list_head_t ks_partial_head[KMEM_FRONTS];
kmem_list_head_t ks_full_head[KMEM_FRONTS];
};
static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];
/*
* Locks to synchronize metadata population
*/
static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
#define kmem_meta_lock() lck_mtx_lock(&kmem_meta_region_lck)
#define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)
static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
/*
* Keeps track of metadata high water mark for each front
*/
static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
static SECURITY_READ_ONLY_LATE(vm_map_t)
kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
static vm_map_size_t kmem_meta_size;
static uint32_t
kmem_get_front(
kmem_range_id_t range_id,
bool from_right)
{
assert((range_id >= KMEM_RANGE_ID_FIRST) &&
(range_id <= KMEM_RANGE_ID_NUM_PTR));
return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
}
static inline uint32_t
kmem_slot_idx_to_bit(
uint32_t slot_idx,
uint32_t size_idx __unused)
{
assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
return 1ull << slot_idx;
}
static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)
{
assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
}
__abortlike
static void
kmem_invalid_size_idx(uint32_t idx)
{
panic("Invalid sizeclass idx %u", idx);
}
static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)
{
if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
kmem_invalid_size_idx(idx);
}
return 1ul << (idx + KMEM_START_IDX);
}
static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta *meta)
{
uint8_t page_marker = meta->km_page_marker;
return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
}
__abortlike
static void
kmem_invalid_chunk_len(struct kmem_page_meta *meta)
{
panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
meta);
}
static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta *meta)
{
if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
kmem_invalid_chunk_len(meta);
}
return meta->km_chunk_len;
}
__abortlike
static void
kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
{
panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
meta);
}
static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta *meta)
{
if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
kmem_invalid_free_chunk_len(meta);
}
return meta->km_free_chunks;
}
/*
* Return the metadata corresponding to the specified address
*/
static struct kmem_page_meta *
kmem_addr_to_meta(
vm_map_offset_t addr,
vm_map_range_id_t range_id,
vm_map_offset_t *range_start,
uint64_t *meta_idx)
{
struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
*range_start = kmem_ranges[range_id].min_address;
*meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
return &meta_base[*meta_idx];
}
/*
* Return the metadata start of the chunk that the address belongs to
*/
static struct kmem_page_meta *
kmem_addr_to_meta_start(
vm_address_t addr,
vm_map_range_id_t range_id,
vm_map_offset_t *chunk_start)
{
vm_map_offset_t range_start;
uint64_t meta_idx;
struct kmem_page_meta *meta;
meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
meta_idx -= kmem_get_page_idx(meta);
meta -= kmem_get_page_idx(meta);
assert(meta->km_page_marker == KMEM_META_PRIMARY);
*chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
return meta;
}
__startup_func
static void
kmem_init_meta_front(
struct kmem_page_meta *meta,
kmem_range_id_t range_id,
bool from_right)
{
kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
meta->km_page_marker = KMEM_META_START;
if (!from_right) {
meta++;
kmem_meta_base[range_id] = meta;
}
kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
}
__startup_func
static void
kmem_metadata_init(void)
{
for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
vm_map_offset_t addr = kmem_meta_range[i].min_address;
struct kmem_page_meta *meta;
uint64_t meta_idx;
vm_map_will_allocate_early_map(&kmem_meta_map[i]);
kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, KMS_PERMANENT | KMS_NOFAIL,
VM_KERN_MEMORY_OSFMK).kmr_submap;
kmem_meta_range[i].min_address = addr;
kmem_meta_range[i].max_address = addr + kmem_meta_size;
meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
kmem_init_meta_front(meta, i, 0);
meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
&meta_idx);
kmem_init_meta_front(meta, i, 1);
}
}
__startup_func
static void
kmem_init_front_head(
struct kmem_sizeclass *ks,
uint32_t front)
{
LIST_INIT(&ks->ks_allfree_head[front]);
LIST_INIT(&ks->ks_partial_head[front]);
LIST_INIT(&ks->ks_full_head[front]);
}
__startup_func
static void
kmem_sizeclass_init(void)
{
for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
struct kmem_sizeclass *ks = &kmem_size_array[i];
kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;
ks->ks_size = kmem_get_size_from_idx(i);
ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
KMEM_CHUNK_SIZE_MIN;
ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
assert(ks->ks_num_elem <=
(sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
kmem_init_front_head(ks, kmem_get_front(range_id, 0));
kmem_init_front_head(ks, kmem_get_front(range_id, 1));
}
}
}
/*
* This is done during EARLY_BOOT as it needs the corecrypto module to be
* set up.
*/
__startup_func
static void
kmem_crypto_init(void)
{
vm_size_t ctx_size = crypto_random_kmem_ctx_size();
for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
struct kmem_sizeclass *ks = &kmem_size_array[i];
ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
zpercpu_foreach(ctx, ks->ks_rng_ctx) {
crypto_random_kmem_init(ctx);
}
}
}
STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);
__abortlike
static void
kmem_validate_slot_panic(
vm_map_offset_t addr,
struct kmem_page_meta *meta,
uint32_t slot_idx,
uint32_t size_idx)
{
if (meta->km_page_marker != KMEM_META_PRIMARY) {
panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
}
if (meta->km_sizeclass != size_idx) {
panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
meta, meta->km_sizeclass, size_idx);
}
panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
slot_idx, meta, (void *)addr);
}
__abortlike
static void
kmem_invalid_slot_for_addr(
mach_vm_range_t slot,
vm_map_offset_t start,
vm_map_offset_t end)
{
panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
(void *)slot->min_address, (void *)slot->max_address,
(void *)start, (void *)end);
}
void
kmem_validate_slot(
vm_map_offset_t addr,
struct kmem_page_meta *meta,
uint32_t size_idx,
uint32_t slot_idx)
{
if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
(meta->km_sizeclass != size_idx) ||
((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
}
}
static void
kmem_validate_slot_initial(
mach_vm_range_t slot,
vm_map_offset_t start,
vm_map_offset_t end,
struct kmem_page_meta *meta,
uint32_t size_idx,
uint32_t slot_idx)
{
if ((slot->min_address == 0) || (slot->max_address == 0) ||
(start < slot->min_address) || (start >= slot->max_address) ||
(end > slot->max_address)) {
kmem_invalid_slot_for_addr(slot, start, end);
}
kmem_validate_slot(start, meta, size_idx, slot_idx);
}
uint32_t
kmem_addr_get_slot_idx(
vm_map_offset_t start,
vm_map_offset_t end,
vm_map_range_id_t range_id,
struct kmem_page_meta **meta,
uint32_t *size_idx,
mach_vm_range_t slot)
{
vm_map_offset_t chunk_start;
vm_map_size_t slot_size;
uint32_t slot_idx;
*meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
*size_idx = (*meta)->km_sizeclass;
slot_size = kmem_get_size_from_idx(*size_idx);
slot_idx = (start - chunk_start) / slot_size;
slot->min_address = chunk_start + slot_idx * slot_size;
slot->max_address = slot->min_address + slot_size;
kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);
return slot_idx;
}
static bool
kmem_populate_needed(vm_offset_t from, vm_offset_t to)
{
#if KASAN
#pragma unused(from, to)
return true;
#else
vm_offset_t page_addr = trunc_page(from);
for (; page_addr < to; page_addr += PAGE_SIZE) {
/*
* This can race with another thread doing a populate on the same metadata
* page, where we see an updated pmap but unmapped KASan shadow, causing a
* fault in the shadow when we first access the metadata page. Avoid this
* by always synchronizing on the kmem_meta_lock with KASan.
*/
if (!pmap_find_phys(kernel_pmap, page_addr)) {
return true;
}
}
return false;
#endif /* !KASAN */
}
static void
kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
{
vm_offset_t page_addr = trunc_page(from);
vm_map_unlock(kernel_map);
for (; page_addr < to; page_addr += PAGE_SIZE) {
for (;;) {
kern_return_t ret = KERN_SUCCESS;
/*
* All updates to kmem metadata are done under the kmem_meta_lock
*/
kmem_meta_lock();
if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
ret = kernel_memory_populate(page_addr,
PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
VM_KERN_MEMORY_OSFMK);
}
kmem_meta_unlock();
if (ret == KERN_SUCCESS) {
break;
}
/*
* We can't pass KMA_NOPAGEWAIT under a global lock as it leads
* to bad system deadlocks, so if the allocation failed,
* we need to do the VM_PAGE_WAIT() outside of the lock.
*/
VM_PAGE_WAIT();
}
}
vm_map_lock(kernel_map);
}
__abortlike
static void
kmem_invalid_meta_panic(
struct kmem_page_meta *meta,
uint32_t slot_idx,
struct kmem_sizeclass sizeclass)
{
uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);
if (slot_idx >= sizeclass.ks_num_elem) {
panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
sizeclass.ks_num_elem, meta);
}
if (meta->km_sizeclass != size_idx) {
panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
meta->km_sizeclass, meta);
}
panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
}
__abortlike
static void
kmem_slot_has_entry_panic(
vm_map_entry_t entry,
vm_map_offset_t addr)
{
panic("Entry (%p) already exists for addr (%p) being returned",
entry, (void *)addr);
}
__abortlike
static void
kmem_slot_not_found(
struct kmem_page_meta *meta,
uint32_t slot_idx)
{
panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
meta->km_bitmap);
}
/*
* Returns a 16bit random number between 0 and
* upper_limit (inclusive)
*/
__startup_func
uint16_t
kmem_get_random16(
uint16_t upper_limit)
{
static uint64_t random_entropy;
assert(upper_limit < UINT16_MAX);
if (random_entropy == 0) {
random_entropy = early_random();
}
uint32_t result = random_entropy & UINT32_MAX;
random_entropy >>= 32;
return (uint16_t)(result % (upper_limit + 1));
}
static uint32_t
kmem_get_nth_free_slot(
struct kmem_page_meta *meta,
uint32_t n,
uint32_t bitmap)
{
uint32_t zeros_seen = 0, ones_seen = 0;
while (bitmap) {
uint32_t count = __builtin_ctz(bitmap);
zeros_seen += count;
bitmap >>= count;
if (__probable(~bitmap)) {
count = __builtin_ctz(~bitmap);
} else {
count = 32;
}
if (count + ones_seen > n) {
return zeros_seen + n;
}
ones_seen += count;
bitmap >>= count;
}
kmem_slot_not_found(meta, n);
}
static uint32_t
kmem_get_next_slot(
struct kmem_page_meta *meta,
struct kmem_sizeclass sizeclass,
uint32_t bitmap)
{
uint32_t num_slots = __builtin_popcount(bitmap);
uint64_t slot_idx = 0;
assert(num_slots > 0);
if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
/*
* Use early random prior to early boot as the ks_rng_ctx requires
* the corecrypto module to be setup before it is initialized and
* used.
*
* num_slots can't be 0 as we take this path when we have more than
* one slot left.
*/
slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
} else {
crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
&slot_idx);
}
return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
}
/*
* Returns an unallocated slot from the given metadata
*/
static vm_map_offset_t
kmem_get_addr_from_meta(
struct kmem_page_meta *meta,
vm_map_range_id_t range_id,
struct kmem_sizeclass sizeclass,
vm_map_entry_t *entry)
{
vm_map_offset_t addr;
vm_map_size_t size = sizeclass.ks_size;
uint32_t size_idx = kmem_get_idx_from_size(size);
uint64_t meta_idx = meta - kmem_meta_base[range_id];
mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
uint32_t slot_bit;
uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);
if ((slot_idx >= sizeclass.ks_num_elem) ||
(meta->km_sizeclass != size_idx) ||
(meta->km_page_marker != KMEM_META_PRIMARY)) {
kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
}
slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
meta->km_bitmap &= ~slot_bit;
addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
assert(kmem_range_contains_fully(range_id, addr, size));
if (vm_map_lookup_entry(kernel_map, addr, entry)) {
kmem_slot_has_entry_panic(*entry, addr);
}
if ((*entry != vm_map_to_entry(kernel_map)) &&
((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
((*entry)->vme_next->vme_start < (addr + size))) {
kmem_slot_has_entry_panic(*entry, addr);
}
return addr;
}
__abortlike
static void
kmem_range_out_of_va(
kmem_range_id_t range_id,
uint32_t num_chunks)
{
panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
}
static void
kmem_init_allocated_chunk(
struct kmem_page_meta *meta,
struct kmem_sizeclass sizeclass,
uint32_t size_idx)
{
uint32_t meta_num = sizeclass.ks_num_chunk;
uint32_t num_elem = sizeclass.ks_num_elem;
meta->km_bitmap = (1ull << num_elem) - 1;
meta->km_chunk_len = (uint16_t)meta_num;
assert(LIST_NEXT(meta, km_link) == NULL);
assert(meta->km_link.le_prev == NULL);
meta->km_sizeclass = (uint8_t)size_idx;
meta->km_page_marker = KMEM_META_PRIMARY;
meta++;
for (uint32_t i = 1; i < meta_num; i++) {
meta->km_page_idx = (uint16_t)i;
meta->km_sizeclass = (uint8_t)size_idx;
meta->km_page_marker = 0;
meta->km_bitmap = 0;
meta++;
}
}
static uint32_t
kmem_get_additional_meta(
struct kmem_page_meta *meta,
uint32_t meta_req,
bool from_right,
struct kmem_page_meta **adj_free_meta)
{
struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);
if (meta_prev->km_page_marker == KMEM_META_FREE) {
uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);
*adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
meta_req -= chunk_len;
} else {
*adj_free_meta = NULL;
}
return meta_req;
}
static struct kmem_page_meta *
kmem_get_new_chunk(
vm_map_range_id_t range_id,
bool from_right,
uint32_t size_idx)
{
struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
struct kmem_page_meta *start, *end, *meta_update;
struct kmem_page_meta *adj_free_meta = NULL;
uint32_t meta_req = sizeclass.ks_num_chunk;
for (;;) {
struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
struct kmem_page_meta *meta;
vm_offset_t start_addr, end_addr;
uint32_t meta_num;
meta = from_right ? metab : metaf;
meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
&adj_free_meta);
if (metaf + meta_num >= metab) {
kmem_range_out_of_va(range_id, meta_num);
}
start = from_right ? (metab - meta_num) : metaf;
end = from_right ? metab : (metaf + meta_num);
start_addr = (vm_offset_t)start;
end_addr = (vm_offset_t)end;
/*
* If the new high watermark stays on the same page,
* no need to populate and drop the lock.
*/
if (!page_aligned(from_right ? end_addr : start_addr) &&
trunc_page(start_addr) == trunc_page(end_addr - 1)) {
break;
}
if (!kmem_populate_needed(start_addr, end_addr)) {
break;
}
kmem_populate_meta_locked(start_addr, end_addr);
/*
* Since we dropped the lock, reassess conditions still hold:
* - the HWM we are changing must not have moved
* - the other HWM must not intersect with ours
* - in case of coalescing, the adjacent free meta must still
* be free and of the same size.
*
* If we failed to grow, reevaluate whether freelists have
* entries now by returning NULL.
*/
metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
if (meta != (from_right ? metab : metaf)) {
return NULL;
}
if (metaf + meta_num >= metab) {
kmem_range_out_of_va(range_id, meta_num);
}
if (adj_free_meta) {
if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
kmem_get_free_chunk_len(adj_free_meta) !=
meta_req - meta_num) {
return NULL;
}
}
break;
}
/*
* If there is an adjacent free chunk remove it from free list
*/
if (adj_free_meta) {
LIST_REMOVE(adj_free_meta, km_link);
LIST_NEXT(adj_free_meta, km_link) = NULL;
adj_free_meta->km_link.le_prev = NULL;
}
/*
* Update hwm
*/
meta_update = from_right ? start : end;
kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;
/*
* Initialize metadata
*/
start = from_right ? start : (end - meta_req);
kmem_init_allocated_chunk(start, sizeclass, size_idx);
return start;
}
static void
kmem_requeue_meta(
struct kmem_page_meta *meta,
struct kmem_list_head *head)
{
LIST_REMOVE(meta, km_link);
LIST_INSERT_HEAD(head, meta, km_link);
}
/*
* Return corresponding sizeclass to stash free chunks in
*/
__abortlike
static void
kmem_invalid_chunk_num(uint32_t chunks)
{
panic("Invalid number of chunks %u\n", chunks);
}
static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)
{
for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
if (chunks >= kmem_size_array[i].ks_num_chunk) {
return i;
}
}
kmem_invalid_chunk_num(chunks);
}
static void
kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
{
bzero(meta, count * sizeof(struct kmem_page_meta));
}
static void
kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
{
#if MACH_ASSERT
size_t size = count * sizeof(struct kmem_page_meta);
assert(memcmp_zero_ptr_aligned(meta, size) == 0);
#else
#pragma unused(meta, count)
#endif
}
/*!
* @function kmem_init_free_chunk()
*
* @discussion
* This function prepares a range of chunks to be put on a free list.
* The first and last metadata might be dirty, but the "inner" ones
* must be zero filled by the caller prior to calling this function.
*/
static void
kmem_init_free_chunk(
struct kmem_page_meta *meta,
uint32_t num_chunks,
uint32_t front)
{
struct kmem_sizeclass *sizeclass;
uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);
if (num_chunks > 2) {
kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
}
meta[0] = (struct kmem_page_meta){
.km_free_chunks = num_chunks,
.km_page_marker = KMEM_META_FREE,
.km_sizeclass = (uint8_t)size_idx,
};
if (num_chunks > 1) {
meta[num_chunks - 1] = (struct kmem_page_meta){
.km_free_chunks = num_chunks,
.km_page_marker = KMEM_META_FREE,
.km_sizeclass = (uint8_t)size_idx,
};
}
sizeclass = &kmem_size_array[size_idx];
LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
}
static struct kmem_page_meta *
kmem_get_free_chunk_from_list(
struct kmem_sizeclass *org_sizeclass,
uint32_t size_idx,
uint32_t front)
{
struct kmem_sizeclass *sizeclass;
uint32_t num_chunks = org_sizeclass->ks_num_chunk;
struct kmem_page_meta *meta;
uint32_t idx = size_idx;
while (idx < KMEM_NUM_SIZECLASS) {
sizeclass = &kmem_size_array[idx];
meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
if (meta) {
break;
}
idx++;
}
/*
* Trim if larger in size
*/
if (meta) {
uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);
assert(meta->km_page_marker == KMEM_META_FREE);
LIST_REMOVE(meta, km_link);
LIST_NEXT(meta, km_link) = NULL;
meta->km_link.le_prev = NULL;
if (num_chunks_free > num_chunks) {
num_chunks_free -= num_chunks;
kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
}
kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
}
return meta;
}
kern_return_t
kmem_locate_space(
vm_map_size_t size,
vm_map_range_id_t range_id,
bool from_right,
vm_map_offset_t *start_inout,
vm_map_entry_t *entry_out)
{
vm_map_entry_t entry;
uint32_t size_idx = kmem_get_idx_from_size(size);
uint32_t front = kmem_get_front(range_id, from_right);
struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
struct kmem_page_meta *meta;
assert(size <= sizeclass->ks_size);
again:
if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
/*
* Requeue to full if necessary
*/
assert(meta->km_page_marker == KMEM_META_PRIMARY);
if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
}
} else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
front)) != NULL) {
*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
/*
* Queue to partial
*/
assert(meta->km_page_marker == KMEM_META_PRIMARY);
assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
} else {
meta = kmem_get_new_chunk(range_id, from_right, size_idx);
if (meta == NULL) {
goto again;
}
*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
assert(meta->km_page_marker == KMEM_META_PRIMARY);
LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
}
if (entry_out) {
*entry_out = entry;
}
return KERN_SUCCESS;
}
/*
* Determine whether the given metadata was allocated from the right
*/
static bool
kmem_meta_is_from_right(
kmem_range_id_t range_id,
struct kmem_page_meta *meta)
{
struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
__assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
struct kmem_page_meta *meta_end;
meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;
if ((meta >= meta_base) && (meta < metaf)) {
return false;
}
assert(meta >= metab && meta < meta_end);
return true;
}
static void
kmem_free_chunk(
kmem_range_id_t range_id,
struct kmem_page_meta *meta,
bool from_right)
{
struct kmem_page_meta *meta_coalesce = meta - 1;
struct kmem_page_meta *meta_start = meta;
uint32_t num_chunks = kmem_get_chunk_len(meta);
uint32_t add_chunks;
struct kmem_page_meta *meta_end = meta + num_chunks;
struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
uint32_t front = kmem_get_front(range_id, from_right);
meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];
LIST_REMOVE(meta, km_link);
kmem_clear_meta_range(meta, num_chunks);
/*
* Coalesce left
*/
if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
(meta_coalesce->km_page_marker == KMEM_META_FREE)) {
meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
add_chunks = kmem_get_free_chunk_len(meta_start);
num_chunks += add_chunks;
LIST_REMOVE(meta_start, km_link);
kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
}
/*
* Coalesce right
*/
if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
(meta_end->km_page_marker == KMEM_META_FREE)) {
add_chunks = kmem_get_free_chunk_len(meta_end);
LIST_REMOVE(meta_end, km_link);
kmem_clear_meta_range(meta_end, 1);
meta_end = meta_end + add_chunks;
num_chunks += add_chunks;
}
kmem_init_free_chunk(meta_start, num_chunks, front);
}
static void
kmem_free_slot(
kmem_range_id_t range_id,
mach_vm_range_t slot)
{
struct kmem_page_meta *meta;
vm_map_offset_t chunk_start;
uint32_t size_idx, chunk_elem, slot_idx, num_elem;
struct kmem_sizeclass *sizeclass;
vm_map_size_t slot_size;
meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
size_idx = meta->km_sizeclass;
slot_size = kmem_get_size_from_idx(size_idx);
slot_idx = (slot->min_address - chunk_start) / slot_size;
assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);
sizeclass = &kmem_size_array[size_idx];
chunk_elem = sizeclass->ks_num_elem;
num_elem = __builtin_popcount(meta->km_bitmap);
if (num_elem == chunk_elem) {
/*
* If entire chunk empty add to emtpy list
*/
bool from_right = kmem_meta_is_from_right(range_id, meta);
kmem_free_chunk(range_id, meta, from_right);
} else if (num_elem == KMEM_NUM_GUARDS + 1) {
/*
* If we freed to full chunk move it to partial
*/
uint32_t front = kmem_get_front(range_id,
kmem_meta_is_from_right(range_id, meta));
kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
}
}
void
kmem_free_space(
vm_map_offset_t start,
vm_map_offset_t end,
vm_map_range_id_t range_id,
mach_vm_range_t slot)
{
bool entry_present = false;
vm_map_entry_t prev_entry;
vm_map_entry_t next_entry;
if ((slot->min_address == start) && (slot->max_address == end)) {
/*
* Entire slot is being freed at once
*/
return kmem_free_slot(range_id, slot);
}
entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
assert(!entry_present);
next_entry = prev_entry->vme_next;
if (((prev_entry == vm_map_to_entry(kernel_map) ||
prev_entry->vme_end <= slot->min_address)) &&
(next_entry == vm_map_to_entry(kernel_map) ||
(next_entry->vme_start >= slot->max_address))) {
/*
* Free entire slot
*/
kmem_free_slot(range_id, slot);
}
}
#pragma mark kmem init
/*
* The default percentage of memory that can be mlocked is scaled based on the total
* amount of memory in the system. These percentages are caclulated
* offline and stored in this table. We index this table by
* log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
* [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
*
* Note that these values were picked for mac.
* If we ever have very large memory config arm devices, we may want to revisit
* since the kernel overhead is smaller there due to the larger page size.
*/
/* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
#define VM_USER_WIREABLE_MIN_CONFIG 32
#if CONFIG_JETSAM
/* Systems with jetsam can wire a bit more b/c the system can relieve wired
* pressure.
*/
static vm_map_size_t wire_limit_percents[] =
{ 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
#else
static vm_map_size_t wire_limit_percents[] =
{ 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
#endif /* CONFIG_JETSAM */
/*
* Sets the default global user wire limit which limits the amount of
* memory that can be locked via mlock() based on the above algorithm..
* This can be overridden via a sysctl.
*/
static void
kmem_set_user_wire_limits(void)
{
uint64_t available_mem_log;
uint64_t max_wire_percent;
size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
sizeof(vm_map_size_t);
vm_map_size_t limit;
uint64_t config_memsize = max_mem;
#if defined(XNU_TARGET_OS_OSX)
config_memsize = max_mem_actual;
#endif /* defined(XNU_TARGET_OS_OSX) */
available_mem_log = bit_floor(config_memsize);
if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
available_mem_log = 0;
} else {
available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
}
if (available_mem_log >= wire_limit_percents_length) {
available_mem_log = wire_limit_percents_length - 1;
}
max_wire_percent = wire_limit_percents[available_mem_log];
limit = config_memsize * max_wire_percent / 100;
/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
}
vm_global_user_wire_limit = limit;
/* the default per task limit is the same as the global limit */
vm_per_task_user_wire_limit = limit;
vm_add_wire_count_over_global_limit = 0;
vm_add_wire_count_over_user_limit = 0;
}
#define KMEM_MAX_CLAIMS 50
__startup_data
struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
__startup_data
uint32_t kmem_claim_count = 0;
__startup_func
void
kmem_range_startup_init(
struct kmem_range_startup_spec *sp)
{
assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
if (sp->kc_calculate_sz) {
sp->kc_size = (sp->kc_calculate_sz)();
}
if (sp->kc_size) {
kmem_claims[kmem_claim_count] = *sp;
kmem_claim_count++;
}
}
static vm_offset_t
kmem_fuzz_start(void)
{
vm_offset_t kmapoff_kaddr = 0;
uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);
kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
VM_KERN_MEMORY_OSFMK);
return kmapoff_kaddr + kmapoff_size;
}
/*
* Generate a randomly shuffled array of indices from 0 to count - 1
*/
__startup_func
void
kmem_shuffle(
uint16_t *shuffle_buf,
uint16_t count)
{
for (uint16_t i = 0; i < count; i++) {
uint16_t j = kmem_get_random16(i);
if (j != i) {
shuffle_buf[i] = shuffle_buf[j];
}
shuffle_buf[j] = i;
}
}
__startup_func
static void
kmem_shuffle_claims(void)
{
uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
uint16_t limit = (uint16_t)kmem_claim_count;
kmem_shuffle(&shuffle_buf[0], limit);
for (uint16_t i = 0; i < limit; i++) {
struct kmem_range_startup_spec tmp = kmem_claims[i];
kmem_claims[i] = kmem_claims[shuffle_buf[i]];
kmem_claims[shuffle_buf[i]] = tmp;
}
}
__startup_func
static void
kmem_readjust_ranges(
uint32_t cur_idx)
{
assert(cur_idx != 0);
uint32_t j = cur_idx - 1, random;
struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
struct mach_vm_range *sp_range = sp.kc_range;
/*
* Find max index where restriction is met
*/
for (; j > 0; j--) {
struct kmem_range_startup_spec spj = kmem_claims[j];
vm_map_offset_t max_start = spj.kc_range->min_address;
if (spj.kc_flags & KC_NO_MOVE) {
panic("kmem_range_init: Can't scramble with multiple constraints");
}
if (max_start <= sp_range->min_address) {
break;
}
}
/*
* Pick a random index from 0 to max index and shift claims to the right
* to make room for restricted claim
*/
random = kmem_get_random16((uint16_t)j);
assert(random <= j);
sp_range->min_address = kmem_claims[random].kc_range->min_address;
sp_range->max_address = sp_range->min_address + sp.kc_size;
for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
struct kmem_range_startup_spec spj = kmem_claims[j];
struct mach_vm_range *range = spj.kc_range;
range->min_address += sp.kc_size;
range->max_address += sp.kc_size;
kmem_claims[j + 1] = spj;
}
sp.kc_flags = KC_NO_MOVE;
kmem_claims[random] = sp;
}
__startup_func
static vm_map_size_t
kmem_add_ptr_claims(void)
{
uint64_t kmem_meta_num, kmem_ptr_chunks;
vm_map_size_t org_ptr_range_size = ptr_range_size;
ptr_range_size -= PAGE_SIZE;
ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));
kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;
kmem_meta_num = kmem_ptr_chunks + 2;
kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));
assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
/*
* Add claims for kmem's ranges
*/
for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
struct kmem_range_startup_spec kmem_spec = {
.kc_name = "kmem_ptr_range",
.kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
.kc_size = ptr_range_size,
.kc_flags = KC_NO_ENTRY,
};
kmem_claims[kmem_claim_count++] = kmem_spec;
struct kmem_range_startup_spec kmem_meta_spec = {
.kc_name = "kmem_ptr_range_meta",
.kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
.kc_size = kmem_meta_size,
.kc_flags = KC_NONE,
};
kmem_claims[kmem_claim_count++] = kmem_meta_spec;
}
return (org_ptr_range_size - ptr_range_size - kmem_meta_size) *
kmem_ptr_ranges;
}
__startup_func
static void
kmem_add_extra_claims(void)
{
vm_map_size_t largest_free_size = 0, total_claims = 0;
vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
largest_free_size = trunc_page(largest_free_size);
/*
* kasan and configs w/o *TRR need to have just one ptr range due to
* resource constraints.
*/
#if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
kmem_ptr_ranges = 1;
#endif
/*
* Determine size of data and pointer kmem_ranges
*/
for (uint32_t i = 0; i < kmem_claim_count; i++) {
total_claims += kmem_claims[i].kc_size;
}
assert((total_claims & PAGE_MASK) == 0);
largest_free_size -= total_claims;
/*
* Use half the total available VA for all pointer allocations (this
* includes the kmem_sprayqtn range). Given that we have 4 total
* ranges divide the available VA by 8.
*/
ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
sprayqtn_range_size = ptr_range_size;
if (sprayqtn_range_size > (sane_size / 2)) {
sprayqtn_range_size = sane_size / 2;
}
ptr_range_size = round_page(ptr_range_size);
sprayqtn_range_size = round_page(sprayqtn_range_size);
data_range_size = largest_free_size
- (ptr_range_size * kmem_ptr_ranges)
- sprayqtn_range_size;
/*
* Add claims for kmem's ranges
*/
data_range_size += kmem_add_ptr_claims();
assert(data_range_size + sprayqtn_range_size +
((ptr_range_size + kmem_meta_size) * kmem_ptr_ranges) <=
largest_free_size);
struct kmem_range_startup_spec kmem_spec_sprayqtn = {
.kc_name = "kmem_sprayqtn_range",
.kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
.kc_size = sprayqtn_range_size,
.kc_flags = KC_NO_ENTRY,
};
kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;
struct kmem_range_startup_spec kmem_spec_data = {
.kc_name = "kmem_data_range",
.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
.kc_size = data_range_size,
.kc_flags = KC_NO_ENTRY,
};
kmem_claims[kmem_claim_count++] = kmem_spec_data;
}
__startup_func
static void
kmem_scramble_ranges(void)
{
vm_map_offset_t start = 0;
/*
* Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
* the vm can find the requested ranges.
*/
kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
VM_MAP_PAGE_SIZE(kernel_map));
kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;
/*
* Allocating the g_kext_map prior to randomizing the remaining submaps as
* this map is 2G in size and starts at the end of kernel_text on x86. It
* could overflow into the heap.
*/
kext_alloc_init();
/*
* Eat a random amount of kernel_map to fuzz subsequent heap, zone and
* stack addresses. (With a 4K page and 9 bits of randomness, this
* eats about 2M of VA from the map)
*
* Note that we always need to slide by at least one page because the VM
* pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
* do not admit this address to be part of any zone submap.
*/
start = kmem_fuzz_start();
/*
* Add claims for ptr and data kmem_ranges
*/
kmem_add_extra_claims();
/*
* Shuffle registered claims
*/
assert(kmem_claim_count < UINT16_MAX);
kmem_shuffle_claims();
/*
* Apply restrictions and determine range for each claim
*/
for (uint32_t i = 0; i < kmem_claim_count; i++) {
vm_map_offset_t end = 0;
struct kmem_range_startup_spec sp = kmem_claims[i];
struct mach_vm_range *sp_range = sp.kc_range;
if (vm_map_locate_space_anywhere(kernel_map, sp.kc_size, 0,
VM_MAP_KERNEL_FLAGS_ANYWHERE(), &start, NULL) != KERN_SUCCESS) {
panic("kmem_range_init: vm_map_locate_space failing for claim %s",
sp.kc_name);
}
end = start + sp.kc_size;
/*
* Re-adjust ranges if restriction not met
*/
if (sp_range->min_address && start > sp_range->min_address) {
kmem_readjust_ranges(i);
} else {
sp_range->min_address = start;
sp_range->max_address = end;
}
start = end;
}
/*
* We have settled on the ranges, now create temporary entries for the
* claims
*/
for (uint32_t i = 0; i < kmem_claim_count; i++) {
struct kmem_range_startup_spec sp = kmem_claims[i];
vm_map_entry_t entry = NULL;
if (sp.kc_flags & KC_NO_ENTRY) {
continue;
}
if (vm_map_find_space(kernel_map, sp.kc_range->min_address, sp.kc_size, 0,
VM_MAP_KERNEL_FLAGS_ANYWHERE(), &entry) != KERN_SUCCESS) {
panic("kmem_range_init: vm_map_find_space failing for claim %s",
sp.kc_name);
}
vm_object_reference(kernel_object_default);
VME_OBJECT_SET(entry, kernel_object_default, false, 0);
VME_OFFSET_SET(entry, entry->vme_start);
vm_map_unlock(kernel_map);
}
/*
* Now that we are done assigning all the ranges, reset
* kmem_ranges[KMEM_RANGE_ID_NONE]
*/
kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};
#if DEBUG || DEVELOPMENT
for (uint32_t i = 0; i < kmem_claim_count; i++) {
struct kmem_range_startup_spec sp = kmem_claims[i];
printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
(void *)sp.kc_range->min_address,
(void *)sp.kc_range->max_address,
mach_vm_size_pretty(sp.kc_size),
mach_vm_size_unit(sp.kc_size));
}
#endif /* DEBUG || DEVELOPMENT */
}
__startup_func
static void
kmem_range_init(void)
{
vm_size_t range_adjustment;
kmem_scramble_ranges();
range_adjustment = sprayqtn_range_size >> 3;
kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;
range_adjustment = data_range_size >> 3;
kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
kmem_ranges[KMEM_RANGE_ID_DATA].max_address;
pmap_init();
kmem_metadata_init();
kmem_sizeclass_init();
#if DEBUG || DEVELOPMENT
for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
printf("kmem_large_ranges[%d] : %p - %p (%u%c)\n", i,
(void *)kmem_large_ranges[i].min_address,
(void *)kmem_large_ranges[i].max_address,
mach_vm_size_pretty(range_size),
mach_vm_size_unit(range_size));
}
#endif
}
STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);
#if DEBUG || DEVELOPMENT
__startup_func
static void
kmem_log_init(void)
{
/*
* Log can only be created after the the kmem subsystem is initialized as
* btlog creation uses kmem
*/
kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
}
STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);
kmem_gobj_stats
kmem_get_gobj_stats(void)
{
kmem_gobj_stats stats = {};
vm_map_lock(kernel_map);
for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
struct mach_vm_range range = kmem_ranges[range_id];
struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
struct kmem_page_meta *meta_end;
uint64_t meta_idx = meta - kmem_meta_base[range_id];
vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
vm_map_offset_t addr;
vm_map_entry_t entry;
/*
* Left front
*/
va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));
/*
* Right front
*/
meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
&meta_idx);
meta_idx = meta_end - meta;
meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
va += (meta_idx * KMEM_CHUNK_SIZE_MIN);
/*
* Compute VA allocated in entire range
*/
if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
entry = entry->vme_next;
}
while (entry != vm_map_to_entry(kernel_map) &&
entry->vme_start < range.max_address) {
used += (entry->vme_end - entry->vme_start);
entry = entry->vme_next;
}
pte_sz = round_page(atop(va - used) * 8);
stats.total_used += used;
stats.total_va += va;
stats.pte_sz += pte_sz;
stats.meta_sz += meta_sz;
}
vm_map_unlock(kernel_map);
return stats;
}
#endif /* DEBUG || DEVELOPMENT */
/*
* kmem_init:
*
* Initialize the kernel's virtual memory map, taking
* into account all memory allocated up to this time.
*/
__startup_func
void
kmem_init(
vm_offset_t start,
vm_offset_t end)
{
vm_map_offset_t map_start;
vm_map_offset_t map_end;
map_start = vm_map_trunc_page(start,
VM_MAP_PAGE_MASK(kernel_map));
map_end = vm_map_round_page(end,
VM_MAP_PAGE_MASK(kernel_map));
vm_map_will_allocate_early_map(&kernel_map);
#if defined(__arm64__)
kernel_map = vm_map_create_options(pmap_kernel(),
VM_MIN_KERNEL_AND_KEXT_ADDRESS,
VM_MAX_KERNEL_ADDRESS,
VM_MAP_CREATE_DEFAULT);
/*
* Reserve virtual memory allocated up to this time.
*/
{
unsigned int region_select = 0;
vm_map_offset_t region_start;
vm_map_size_t region_size;
vm_map_offset_t map_addr;
kern_return_t kr;
while (pmap_virtual_region(region_select, ®ion_start, ®ion_size)) {
map_addr = region_start;
kr = vm_map_enter(kernel_map, &map_addr,
vm_map_round_page(region_size,
VM_MAP_PAGE_MASK(kernel_map)),
(vm_map_offset_t) 0,
VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(.vmkf_no_pmap_check = true),
VM_OBJECT_NULL,
(vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS) {
panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
(uint64_t) start, (uint64_t) end, (uint64_t) region_start,
(uint64_t) region_size, kr);
}
region_select++;
}
}
#else
kernel_map = vm_map_create_options(pmap_kernel(),
VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
VM_MAP_CREATE_DEFAULT);
/*
* Reserve virtual memory allocated up to this time.
*/
if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
vm_map_offset_t map_addr;
kern_return_t kr;
map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
kr = vm_map_enter(kernel_map,
&map_addr,
(vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
(vm_map_offset_t) 0,
VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
VM_OBJECT_NULL,
(vm_object_offset_t) 0, FALSE,
VM_PROT_NONE, VM_PROT_NONE,
VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS) {
panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
(uint64_t) start, (uint64_t) end,
(uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
(uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
kr);
}
}
#endif
kmem_set_user_wire_limits();
}
#pragma mark map copyio
/*
* Note: semantic types aren't used as `copyio` already validates.
*/
kern_return_t
copyinmap(
vm_map_t map,
vm_map_offset_t fromaddr,
void *todata,
vm_size_t length)
{
kern_return_t kr = KERN_SUCCESS;
vm_map_t oldmap;
if (vm_map_pmap(map) == pmap_kernel()) {
/* assume a correct copy */
memcpy(todata, CAST_DOWN(void *, fromaddr), length);
} else if (current_map() == map) {
if (copyin(fromaddr, todata, length) != 0) {
kr = KERN_INVALID_ADDRESS;
}
} else {
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyin(fromaddr, todata, length) != 0) {
kr = KERN_INVALID_ADDRESS;
}
vm_map_switch(oldmap);
vm_map_deallocate(map);
}
return kr;
}
kern_return_t
copyoutmap(
vm_map_t map,
void *fromdata,
vm_map_address_t toaddr,
vm_size_t length)
{
kern_return_t kr = KERN_SUCCESS;
vm_map_t oldmap;
if (vm_map_pmap(map) == pmap_kernel()) {
/* assume a correct copy */
memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
} else if (current_map() == map) {
if (copyout(fromdata, toaddr, length) != 0) {
ktriage_record(thread_tid(current_thread()),
KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
KDBG_TRIAGE_RESERVED,
KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
KERN_INVALID_ADDRESS /* arg */);
kr = KERN_INVALID_ADDRESS;
}
} else {
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyout(fromdata, toaddr, length) != 0) {
ktriage_record(thread_tid(current_thread()),
KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
KDBG_TRIAGE_RESERVED,
KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
KERN_INVALID_ADDRESS /* arg */);
kr = KERN_INVALID_ADDRESS;
}
vm_map_switch(oldmap);
vm_map_deallocate(map);
}
return kr;
}
kern_return_t
copyoutmap_atomic32(
vm_map_t map,
uint32_t value,
vm_map_address_t toaddr)
{
kern_return_t kr = KERN_SUCCESS;
vm_map_t oldmap;
if (vm_map_pmap(map) == pmap_kernel()) {
/* assume a correct toaddr */
*(uint32_t *)toaddr = value;
} else if (current_map() == map) {
if (copyout_atomic32(value, toaddr) != 0) {
kr = KERN_INVALID_ADDRESS;
}
} else {
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyout_atomic32(value, toaddr) != 0) {
kr = KERN_INVALID_ADDRESS;
}
vm_map_switch(oldmap);
vm_map_deallocate(map);
}
return kr;
}
kern_return_t
copyoutmap_atomic64(
vm_map_t map,
uint64_t value,
vm_map_address_t toaddr)
{
kern_return_t kr = KERN_SUCCESS;
vm_map_t oldmap;
if (vm_map_pmap(map) == pmap_kernel()) {
/* assume a correct toaddr */
*(uint64_t *)toaddr = value;
} else if (current_map() == map) {
if (copyout_atomic64(value, toaddr) != 0) {
kr = KERN_INVALID_ADDRESS;
}
} else {
vm_map_reference(map);
oldmap = vm_map_switch(map);
if (copyout_atomic64(value, toaddr) != 0) {
kr = KERN_INVALID_ADDRESS;
}
vm_map_switch(oldmap);
vm_map_deallocate(map);
}
return kr;
}
#pragma mark pointer obfuscation / packing
/*
*
* The following two functions are to be used when exposing kernel
* addresses to userspace via any of the various debug or info
* facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
* and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
* are exported to KEXTs.
*
* NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
*/
vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
{
assert(salt != 0);
if (addr == 0) {
return 0ul;
}
if (VM_KERNEL_IS_SLID(addr)) {
return VM_KERNEL_UNSLIDE(addr);
}
vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
SHA256_CTX sha_ctx;
SHA256_Init(&sha_ctx);
SHA256_Update(&sha_ctx, &salt, sizeof(salt));
SHA256_Update(&sha_ctx, &addr, sizeof(addr));
SHA256_Final(sha_digest, &sha_ctx);
return sha_digest[0];
}
__exported vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr);
vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)
{
return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
}
void
vm_kernel_addrhide(
vm_offset_t addr,
vm_offset_t *hide_addr)
{
*hide_addr = VM_KERNEL_ADDRHIDE(addr);
}
void
vm_kernel_addrperm_external(
vm_offset_t addr,
vm_offset_t *perm_addr)
{
if (VM_KERNEL_IS_SLID(addr)) {
*perm_addr = VM_KERNEL_UNSLIDE(addr);
} else if (VM_KERNEL_ADDRESS(addr)) {
*perm_addr = addr + vm_kernel_addrperm_ext;
} else {
*perm_addr = addr;
}
}
void
vm_kernel_unslide_or_perm_external(
vm_offset_t addr,
vm_offset_t *up_addr)
{
vm_kernel_addrperm_external(addr, up_addr);
}
void
vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
{
if (ptr & ((1ul << params.vmpp_shift) - 1)) {
panic("pointer %p can't be packed: low %d bits aren't 0",
(void *)ptr, params.vmpp_shift);
} else if (ptr <= params.vmpp_base) {
panic("pointer %p can't be packed: below base %p",
(void *)ptr, (void *)params.vmpp_base);
} else {
panic("pointer %p can't be packed: maximum encodable pointer is %p",
(void *)ptr, (void *)vm_packing_max_packable(params));
}
}
void
vm_packing_verify_range(
const char *subsystem,
vm_offset_t min_address,
vm_offset_t max_address,
vm_packing_params_t params)
{
if (min_address > max_address) {
panic("%s: %s range invalid min:%p > max:%p",
__func__, subsystem, (void *)min_address, (void *)max_address);
}
if (!params.vmpp_base_relative) {
return;
}
if (min_address <= params.vmpp_base) {
panic("%s: %s range invalid min:%p <= base:%p",
__func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
}
if (max_address > vm_packing_max_packable(params)) {
panic("%s: %s range invalid max:%p >= max packable:%p",
__func__, subsystem, (void *)max_address,
(void *)vm_packing_max_packable(params));
}
}
#pragma mark tests
#if MACH_ASSERT
#include <sys/errno.h>
static void
kmem_test_for_entry(
vm_map_t map,
vm_offset_t addr,
void (^block)(vm_map_entry_t))
{
vm_map_entry_t entry;
vm_map_lock(map);
block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
vm_map_unlock(map);
}
#define kmem_test_assert_map(map, pg, entries) ({ \
assert3u((map)->size, ==, ptoa(pg)); \
assert3u((map)->hdr.nentries, ==, entries); \
})
static bool
can_write_at(vm_offset_t offs, uint32_t page)
{
static const int zero;
return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
}
#define assert_writeable(offs, page) \
assertf(can_write_at(offs, page), \
"can write at %p + ptoa(%d)", (void *)offs, page)
#define assert_faults(offs, page) \
assertf(!can_write_at(offs, page), \
"can write at %p + ptoa(%d)", (void *)offs, page)
#define peek(offs, page) \
(*(uint32_t *)((offs) + ptoa(page)))
#define poke(offs, page, v) \
(*(uint32_t *)((offs) + ptoa(page)) = (v))
__attribute__((noinline))
static void
kmem_alloc_basic_test(vm_map_t map)
{
kmem_guard_t guard = {
.kmg_tag = VM_KERN_MEMORY_DIAG,
};
vm_offset_t addr;
/*
* Test wired basics:
* - KMA_KOBJECT
* - KMA_GUARD_FIRST, KMA_GUARD_LAST
* - allocation alignment
*/
addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
kmem_test_assert_map(map, 10, 1);
kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
assertf(e, "unable to find address %p in map %p", (void *)addr, map);
assert(e->vme_kernel_object);
assert(!e->vme_atomic);
assert3u(e->vme_start, <=, addr);
assert3u(addr + ptoa(10), <=, e->vme_end);
});
assert_faults(addr, 0);
for (int i = 1; i < 9; i++) {
assert_writeable(addr, i);
}
assert_faults(addr, 9);
kmem_free(map, addr, ptoa(10));
kmem_test_assert_map(map, 0, 0);
/*
* Test pageable basics.
*/
addr = kmem_alloc_guard(map, ptoa(10), 0,
KMA_PAGEABLE, guard).kmr_address;
assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
kmem_test_assert_map(map, 10, 1);
for (int i = 0; i < 9; i++) {
assert_faults(addr, i);
poke(addr, i, 42);
assert_writeable(addr, i);
}
kmem_free(map, addr, ptoa(10));
kmem_test_assert_map(map, 0, 0);
}
__attribute__((noinline))
static void
kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
{
kmem_guard_t guard = {
.kmg_atomic = !(kind & KMR_DATA),
.kmg_tag = VM_KERN_MEMORY_DIAG,
.kmg_context = 0xefface,
};
vm_offset_t addr, newaddr;
const int N = 10;
/*
* This isn't something kmem_realloc_guard() _needs_ to do,
* we could conceive an implementation where it grows in place
* if there's space after it.
*
* However, this is what the implementation does today.
*/
bool realloc_growth_changes_address = true;
bool GL = (kind & KMR_GUARD_LAST);
/*
* Initial N page allocation
*/
addr = kmem_alloc_guard(map, ptoa(N), 0,
(kind & (KMA_KOBJECT | KMA_GUARD_LAST | KMA_DATA)) | KMA_ZERO,
guard).kmr_address;
assert3u(addr, !=, 0);
kmem_test_assert_map(map, N, 1);
for (int pg = 0; pg < N - GL; pg++) {
poke(addr, pg, 42 + pg);
}
for (int pg = N - GL; pg < N; pg++) {
assert_faults(addr, pg);
}
/*
* Grow to N + 3 pages
*/
newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
kind | KMR_ZERO, guard).kmr_address;
assert3u(newaddr, !=, 0);
if (realloc_growth_changes_address) {
assert3u(addr, !=, newaddr);
}
if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
kmem_test_assert_map(map, N + 3, 1);
} else {
kmem_test_assert_map(map, 2 * N + 3, 2);
}
for (int pg = 0; pg < N - GL; pg++) {
assert3u(peek(newaddr, pg), ==, 42 + pg);
}
if ((kind & KMR_FREEOLD) == 0) {
for (int pg = 0; pg < N - GL; pg++) {
assert3u(peek(addr, pg), ==, 42 + pg);
}
/* check for tru-share */
poke(addr + 16, 0, 1234);
assert3u(peek(newaddr + 16, 0), ==, 1234);
kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard);
kmem_test_assert_map(map, N + 3, 1);
}
if (addr != newaddr) {
for (int pg = 0; pg < N - GL; pg++) {
assert_faults(addr, pg);
}
}
for (int pg = N - GL; pg < N + 3 - GL; pg++) {
assert3u(peek(newaddr, pg), ==, 0);
}
for (int pg = N + 3 - GL; pg < N + 3; pg++) {
assert_faults(newaddr, pg);
}
addr = newaddr;
/*
* Shrink to N - 2 pages
*/
newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
kind | KMR_ZERO, guard).kmr_address;
assert3u(map->size, ==, ptoa(N - 2));
assert3u(newaddr, ==, addr);
kmem_test_assert_map(map, N - 2, 1);
for (int pg = 0; pg < N - 2 - GL; pg++) {
assert3u(peek(addr, pg), ==, 42 + pg);
}
for (int pg = N - 2 - GL; pg < N + 3; pg++) {
assert_faults(addr, pg);
}
kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard);
kmem_test_assert_map(map, 0, 0);
}
static int
kmem_basic_test(__unused int64_t in, int64_t *out)
{
mach_vm_offset_t addr;
vm_map_t map;
printf("%s: test running\n", __func__);
map = kmem_suballoc(kernel_map, &addr, 64U << 20,
VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;
printf("%s: kmem_alloc ...\n", __func__);
kmem_alloc_basic_test(map);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_FREEOLD);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
printf("%s: PASS\n", __func__);
/* using KMR_DATA signals to test the non atomic realloc path */
printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
printf("%s: PASS\n", __func__);
printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
kmem_realloc_basic_test(map, KMR_DATA);
printf("%s: PASS\n", __func__);
kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
vm_map_deallocate(map);
printf("%s: test passed\n", __func__);
*out = 1;
return 0;
}
SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);
static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)
{
__assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);
assert(chunks >= kmem_size_array[idx].ks_num_chunk);
}
__attribute__((noinline))
static void
kmem_test_get_size_idx_for_all_chunks()
{
for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
uint32_t chunks = kmem_size_array[i].ks_num_chunk;
if (chunks != 1) {
kmem_test_get_size_idx_for_chunks(chunks - 1);
}
kmem_test_get_size_idx_for_chunks(chunks);
kmem_test_get_size_idx_for_chunks(chunks + 1);
}
}
static int
kmem_guard_obj_test(__unused int64_t in, int64_t *out)
{
printf("%s: test running\n", __func__);
printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
kmem_test_get_size_idx_for_all_chunks();
printf("%s: PASS\n", __func__);
printf("%s: test passed\n", __func__);
*out = 1;
return 0;
}
SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
#endif /* MACH_ASSERT */