This is xnu-11215.1.10. See this file in:
/*
 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/*
 * @OSF_COPYRIGHT@
 */
/*
 * Mach Operating System
 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
 * All Rights Reserved.
 *
 * Permission to use, copy, modify and distribute this software and its
 * documentation is hereby granted, provided that both the copyright
 * notice and this permission notice appear in all copies of the
 * software, derivative works or modified versions, and any portions
 * thereof, and that both notices appear in supporting documentation.
 *
 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
 *
 * Carnegie Mellon requests users of this software to return to
 *
 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
 *  School of Computer Science
 *  Carnegie Mellon University
 *  Pittsburgh PA 15213-3890
 *
 * any improvements or extensions that they make and grant Carnegie Mellon
 * the rights to redistribute these changes.
 */
/*
 */
/*
 *	File:	vm/vm_kern.c
 *	Author:	Avadis Tevanian, Jr., Michael Wayne Young
 *	Date:	1985
 *
 *	Kernel memory management.
 */

#include <mach/kern_return.h>
#include <mach/vm_param.h>
#include <kern/assert.h>
#include <kern/thread.h>
#include <vm/vm_kern_internal.h>
#include <vm/vm_map_internal.h>
#include <vm/vm_object_internal.h>
#include <vm/vm_page_internal.h>
#include <vm/vm_compressor_xnu.h>
#include <vm/vm_pageout_xnu.h>
#include <vm/vm_init_xnu.h>
#include <vm/vm_fault.h>
#include <vm/vm_memtag.h>
#include <kern/misc_protos.h>
#include <vm/cpm_internal.h>
#include <kern/ledger.h>
#include <kern/bits.h>
#include <kern/startup.h>

#include <string.h>

#include <libkern/OSDebug.h>
#include <libkern/crypto/sha2.h>
#include <libkern/section_keywords.h>
#include <sys/kdebug.h>
#include <sys/kdebug_triage.h>

#include <san/kasan.h>
#include <kern/kext_alloc.h>
#include <kern/backtrace.h>
#include <os/hash.h>
#include <kern/zalloc_internal.h>
#include <libkern/crypto/rand.h>

/*
 *	Variables exported by this module.
 */

SECURITY_READ_ONLY_LATE(vm_map_t) kernel_map;
SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_ranges[KMEM_RANGE_COUNT];
SECURITY_READ_ONLY_LATE(struct mach_vm_range) kmem_large_ranges[KMEM_RANGE_COUNT];

static TUNABLE(uint32_t, kmem_ptr_ranges, "kmem_ptr_ranges",
    KMEM_RANGE_ID_NUM_PTR);
#define KMEM_GOBJ_THRESHOLD   (32ULL << 20)
#if DEBUG || DEVELOPMENT
#define KMEM_OUTLIER_LOG_SIZE (16ULL << 10)
#define KMEM_OUTLIER_SIZE      0
#define KMEM_OUTLIER_ALIGN     1
btlog_t kmem_outlier_log;
#endif /* DEBUG || DEVELOPMENT */

__startup_data static vm_map_size_t data_range_size;
__startup_data static vm_map_size_t ptr_range_size;
__startup_data static vm_map_size_t sprayqtn_range_size;

#pragma mark helpers

__attribute__((overloadable))
__header_always_inline kmem_flags_t
ANYF(kma_flags_t flags)
{
	return (kmem_flags_t)flags;
}

__attribute__((overloadable))
__header_always_inline kmem_flags_t
ANYF(kmr_flags_t flags)
{
	return (kmem_flags_t)flags;
}

__attribute__((overloadable))
__header_always_inline kmem_flags_t
ANYF(kmf_flags_t flags)
{
	return (kmem_flags_t)flags;
}

__abortlike
static void
__kmem_invalid_size_panic(
	vm_map_t        map,
	vm_size_t       size,
	uint32_t        flags)
{
	panic("kmem(map=%p, flags=0x%x): invalid size %zd",
	    map, flags, (size_t)size);
}

__abortlike
static void
__kmem_invalid_arguments_panic(
	const char     *what,
	vm_map_t        map,
	vm_address_t    address,
	vm_size_t       size,
	uint32_t        flags)
{
	panic("kmem_%s(map=%p, addr=%p, size=%zd, flags=0x%x): "
	    "invalid arguments passed",
	    what, map, (void *)address, (size_t)size, flags);
}

__abortlike
static void
__kmem_failed_panic(
	vm_map_t        map,
	vm_size_t       size,
	uint32_t        flags,
	kern_return_t   kr,
	const char     *what)
{
	panic("kmem_%s(%p, %zd, 0x%x): failed with %d",
	    what, map, (size_t)size, flags, kr);
}

__abortlike
static void
__kmem_entry_not_found_panic(
	vm_map_t        map,
	vm_offset_t     addr)
{
	panic("kmem(map=%p) no entry found at %p", map, (void *)addr);
}

static inline vm_object_t
__kmem_object(kmem_flags_t flags)
{
	if (flags & KMEM_COMPRESSOR) {
		if (flags & KMEM_KOBJECT) {
			panic("both KMEM_KOBJECT and KMEM_COMPRESSOR specified");
		}
		return compressor_object;
	}
	if (!(flags & KMEM_KOBJECT)) {
		panic("KMEM_KOBJECT or KMEM_COMPRESSOR is required");
	}
	return kernel_object_default;
}

static inline pmap_mapping_type_t
__kmem_mapping_type(kmem_flags_t flags)
{
	if (flags & (KMEM_DATA | KMEM_COMPRESSOR)) {
		return PMAP_MAPPING_TYPE_DEFAULT;
	} else {
		return PMAP_MAPPING_TYPE_RESTRICTED;
	}
}

static inline vm_size_t
__kmem_guard_left(kmem_flags_t flags)
{
	return (flags & KMEM_GUARD_FIRST) ? PAGE_SIZE : 0;
}

static inline vm_size_t
__kmem_guard_right(kmem_flags_t flags)
{
	return (flags & KMEM_GUARD_LAST) ? PAGE_SIZE : 0;
}

static inline vm_size_t
__kmem_guard_size(kmem_flags_t flags)
{
	return __kmem_guard_left(flags) + __kmem_guard_right(flags);
}

__pure2
static inline vm_size_t
__kmem_entry_orig_size(vm_map_entry_t entry)
{
	vm_object_t object = VME_OBJECT(entry);

	if (entry->vme_kernel_object) {
		return entry->vme_end - entry->vme_start -
		       entry->vme_object_or_delta;
	} else {
		return object->vo_size - object->vo_size_delta;
	}
}


#pragma mark kmem range methods

#if __arm64__
// <rdar://problem/48304934> arm64 doesn't use ldp when I'd expect it to
#define mach_vm_range_load(r, r_min, r_max) \
	asm("ldp %[rmin], %[rmax], [%[range]]" \
	    : [rmin] "=r"(r_min), [rmax] "=r"(r_max) \
	    : [range] "r"(r), "m"((r)->min_address), "m"((r)->max_address))
#else
#define mach_vm_range_load(r, rmin, rmax) \
	({ rmin = (r)->min_address; rmax = (r)->max_address; })
#endif

__abortlike
static void
__mach_vm_range_overflow(
	mach_vm_offset_t        addr,
	mach_vm_offset_t        size)
{
	panic("invalid vm range: [0x%llx, 0x%llx + 0x%llx) wraps around",
	    addr, addr, size);
}

__abortlike
static void
__mach_vm_range_invalid(
	mach_vm_offset_t        min_address,
	mach_vm_offset_t        max_address)
{
	panic("invalid vm range: [0x%llx, 0x%llx) wraps around",
	    min_address, max_address);
}

__header_always_inline mach_vm_size_t
mach_vm_range_size(const struct mach_vm_range *r)
{
	mach_vm_offset_t rmin, rmax;

	mach_vm_range_load(r, rmin, rmax);
	return rmax - rmin;
}

__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_contains(const struct mach_vm_range *r, mach_vm_offset_t addr)
{
	mach_vm_offset_t rmin, rmax;

#if CONFIG_KERNEL_TAGGING
	if (VM_KERNEL_ADDRESS(addr)) {
		addr = vm_memtag_canonicalize_address(addr);
	}
#endif /* CONFIG_KERNEL_TAGGING */

	/*
	 * The `&` is not a typo: we really expect the check to pass,
	 * so encourage the compiler to eagerly load and test without branches
	 */
	mach_vm_range_load(r, rmin, rmax);
	return (addr >= rmin) & (addr < rmax);
}

__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_contains(
	const struct mach_vm_range *r,
	mach_vm_offset_t        addr,
	mach_vm_offset_t        size)
{
	mach_vm_offset_t rmin, rmax;

#if CONFIG_KERNEL_TAGGING
	if (VM_KERNEL_ADDRESS(addr)) {
		addr = vm_memtag_canonicalize_address(addr);
	}
#endif /* CONFIG_KERNEL_TAGGING */

	/*
	 * The `&` is not a typo: we really expect the check to pass,
	 * so encourage the compiler to eagerly load and test without branches
	 */
	mach_vm_range_load(r, rmin, rmax);
	return (addr >= rmin) & (addr + size >= rmin) & (addr + size <= rmax);
}

__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_intersects(
	const struct mach_vm_range *r1,
	const struct mach_vm_range *r2)
{
	mach_vm_offset_t r1_min, r1_max;
	mach_vm_offset_t r2_min, r2_max;

	mach_vm_range_load(r1, r1_min, r1_max);
	r2_min = r2->min_address;
	r2_max = r2->max_address;

	if (r1_min > r1_max) {
		__mach_vm_range_invalid(r1_min, r1_max);
	}

	if (r2_min > r2_max) {
		__mach_vm_range_invalid(r2_min, r2_max);
	}

	return r1_max > r2_min && r1_min < r2_max;
}

__attribute__((overloadable))
__header_always_inline bool
mach_vm_range_intersects(
	const struct mach_vm_range *r1,
	mach_vm_offset_t        addr,
	mach_vm_offset_t        size)
{
	struct mach_vm_range r2;

	addr = VM_KERNEL_STRIP_UPTR(addr);
	r2.min_address = addr;
	if (os_add_overflow(addr, size, &r2.max_address)) {
		__mach_vm_range_overflow(addr, size);
	}

	return mach_vm_range_intersects(r1, &r2);
}

bool
kmem_range_id_contains(
	kmem_range_id_t         range_id,
	vm_map_offset_t         addr,
	vm_map_size_t           size)
{
	return mach_vm_range_contains(&kmem_ranges[range_id], addr, size);
}

__abortlike
static void
kmem_range_invalid_panic(
	kmem_range_id_t         range_id,
	vm_map_offset_t         addr,
	vm_map_size_t           size)
{
	const struct mach_vm_range *r = &kmem_ranges[range_id];
	mach_vm_offset_t rmin, rmax;

	mach_vm_range_load(r, rmin, rmax);
	if (addr + size < rmin) {
		panic("addr %p + size %llu overflows %p", (void *)addr, size,
		    (void *)(addr + size));
	}
	panic("addr %p + size %llu doesnt fit in one range (id: %u min: %p max: %p)",
	    (void *)addr, size, range_id, (void *)rmin, (void *)rmax);
}

/*
 * Return whether the entire allocation is contained in the given range
 */
static bool
kmem_range_contains_fully(
	kmem_range_id_t         range_id,
	vm_map_offset_t         addr,
	vm_map_size_t           size)
{
	const struct mach_vm_range *r = &kmem_ranges[range_id];
	mach_vm_offset_t rmin, rmax;
	bool result = false;

	if (VM_KERNEL_ADDRESS(addr)) {
		addr = vm_memtag_canonicalize_address(addr);
	}

	/*
	 * The `&` is not a typo: we really expect the check to pass,
	 * so encourage the compiler to eagerly load and test without branches
	 */
	mach_vm_range_load(r, rmin, rmax);
	result = (addr >= rmin) & (addr < rmax);
	if (__improbable(result
	    && ((addr + size < rmin) || (addr + size > rmax)))) {
		kmem_range_invalid_panic(range_id, addr, size);
	}
	return result;
}

vm_map_size_t
kmem_range_id_size(kmem_range_id_t range_id)
{
	return mach_vm_range_size(&kmem_ranges[range_id]);
}

kmem_range_id_t
kmem_addr_get_range(vm_map_offset_t addr, vm_map_size_t size)
{
	kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;

	for (; range_id < KMEM_RANGE_COUNT; range_id++) {
		if (kmem_range_contains_fully(range_id, addr, size)) {
			return range_id;
		}
	}
	return KMEM_RANGE_ID_NONE;
}

bool
kmem_is_ptr_range(vm_map_range_id_t range_id)
{
	return (range_id >= KMEM_RANGE_ID_FIRST) &&
	       (range_id <= KMEM_RANGE_ID_NUM_PTR);
}

__abortlike
static void
kmem_range_invalid_for_overwrite(vm_map_offset_t addr)
{
	panic("Can't overwrite mappings (addr: %p) in kmem ptr ranges",
	    (void *)addr);
}

mach_vm_range_t
kmem_validate_range_for_overwrite(
	vm_map_offset_t         addr,
	vm_map_size_t           size)
{
	vm_map_range_id_t range_id = kmem_addr_get_range(addr, size);

	if (kmem_is_ptr_range(range_id)) {
		kmem_range_invalid_for_overwrite(addr);
	}

	return &kmem_ranges[range_id];
}


#pragma mark entry parameters


__abortlike
static void
__kmem_entry_validate_panic(
	vm_map_t        map,
	vm_map_entry_t  entry,
	vm_offset_t     addr,
	vm_size_t       size,
	uint32_t        flags,
	kmem_guard_t    guard)
{
	const char *what = "???";

	if (entry->vme_atomic != guard.kmg_atomic) {
		what = "atomicity";
	} else if (entry->is_sub_map != guard.kmg_submap) {
		what = "objectness";
	} else if (addr != entry->vme_start) {
		what = "left bound";
	} else if ((flags & KMF_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
		what = "right bound";
	} else if (guard.kmg_context != entry->vme_context) {
		what = "guard";
	}

	panic("kmem(map=%p, addr=%p, size=%zd, flags=0x%x): "
	    "entry:%p %s mismatch guard(0x%08x)",
	    map, (void *)addr, size, flags, entry,
	    what, guard.kmg_context);
}

static bool
__kmem_entry_validate_guard(
	vm_map_entry_t  entry,
	vm_offset_t     addr,
	vm_size_t       size,
	kmem_flags_t    flags,
	kmem_guard_t    guard)
{
	if (entry->vme_atomic != guard.kmg_atomic) {
		return false;
	}

	if (!guard.kmg_atomic) {
		return true;
	}

	if (entry->is_sub_map != guard.kmg_submap) {
		return false;
	}

	if (addr != entry->vme_start) {
		return false;
	}

	if ((flags & KMEM_GUESS_SIZE) == 0 && addr + size != entry->vme_end) {
		return false;
	}

	if (!guard.kmg_submap && guard.kmg_context != entry->vme_context) {
		return false;
	}

	return true;
}

void
kmem_entry_validate_guard(
	vm_map_t        map,
	vm_map_entry_t  entry,
	vm_offset_t     addr,
	vm_size_t       size,
	kmem_guard_t    guard)
{
	if (!__kmem_entry_validate_guard(entry, addr, size, KMEM_NONE, guard)) {
		__kmem_entry_validate_panic(map, entry, addr, size, KMEM_NONE, guard);
	}
}

__abortlike
static void
__kmem_entry_validate_object_panic(
	vm_map_t        map,
	vm_map_entry_t  entry,
	kmem_flags_t    flags)
{
	const char *what;
	const char *verb;

	if (entry->is_sub_map) {
		panic("kmem(map=%p) entry %p is a submap", map, entry);
	}

	if (flags & KMEM_KOBJECT) {
		what = "kernel";
		verb = "isn't";
	} else if (flags & KMEM_COMPRESSOR) {
		what = "compressor";
		verb = "isn't";
	} else if (entry->vme_kernel_object) {
		what = "kernel";
		verb = "is unexpectedly";
	} else {
		what = "compressor";
		verb = "is unexpectedly";
	}

	panic("kmem(map=%p, flags=0x%x): entry %p %s for the %s object",
	    map, flags, entry, verb, what);
}

static bool
__kmem_entry_validate_object(
	vm_map_entry_t  entry,
	kmem_flags_t    flags)
{
	if (entry->is_sub_map) {
		return false;
	}
	if ((bool)(flags & KMEM_KOBJECT) != entry->vme_kernel_object) {
		return false;
	}

	return (bool)(flags & KMEM_COMPRESSOR) ==
	       (VME_OBJECT(entry) == compressor_object);
}

vm_size_t
kmem_size_guard(
	vm_map_t        map,
	vm_offset_t     addr,
	kmem_guard_t    guard)
{
	kmem_flags_t flags = KMEM_GUESS_SIZE;
	vm_map_entry_t entry;
	vm_size_t size;

	vm_map_lock_read(map);

#if KASAN_CLASSIC
	addr -= PAGE_SIZE;
#endif /* KASAN_CLASSIC */
	addr = vm_memtag_canonicalize_address(addr);

	if (!vm_map_lookup_entry(map, addr, &entry)) {
		__kmem_entry_not_found_panic(map, addr);
	}

	if (!__kmem_entry_validate_guard(entry, addr, 0, flags, guard)) {
		__kmem_entry_validate_panic(map, entry, addr, 0, flags, guard);
	}

	size = __kmem_entry_orig_size(entry);

	vm_map_unlock_read(map);

	return size;
}

static inline uint16_t
kmem_hash_backtrace(
	void                     *fp)
{
	uint64_t  bt_count;
	uintptr_t bt[8] = {};

	struct backtrace_control ctl = {
		.btc_frame_addr = (uintptr_t)fp,
	};

	bt_count = backtrace(bt, sizeof(bt) / sizeof(bt[0]), &ctl, NULL);
	return (uint16_t) os_hash_jenkins(bt, bt_count * sizeof(bt[0]));
}

static_assert(KMEM_RANGE_ID_DATA - 1 <= KMEM_RANGE_MASK,
    "Insufficient bits to represent ptr ranges");

kmem_range_id_t
kmem_adjust_range_id(
	uint32_t                  hash)
{
	return (kmem_range_id_t) (KMEM_RANGE_ID_PTR_0 +
	       (hash & KMEM_RANGE_MASK) % kmem_ptr_ranges);
}

static bool
kmem_use_sprayqtn(
	kma_flags_t               kma_flags,
	vm_map_size_t             map_size,
	vm_offset_t               mask)
{
	/*
	 * Pointer allocations that are above the guard objects threshold or have
	 * leading guard pages with non standard alignment requests are redirected
	 * to the sprayqtn range.
	 */
#if DEBUG || DEVELOPMENT
	btref_get_flags_t flags = (kma_flags & KMA_NOPAGEWAIT) ?
	    BTREF_GET_NOWAIT : 0;

	if ((kma_flags & KMA_SPRAYQTN) == 0) {
		if (map_size > KMEM_GOBJ_THRESHOLD) {
			btlog_record(kmem_outlier_log, (void *)map_size, KMEM_OUTLIER_SIZE,
			    btref_get(__builtin_frame_address(0), flags));
		} else if ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK)) {
			btlog_record(kmem_outlier_log, (void *)mask, KMEM_OUTLIER_ALIGN,
			    btref_get(__builtin_frame_address(0), flags));
		}
	}
#endif /* DEBUG || DEVELOPMENT */

	return (kma_flags & KMA_SPRAYQTN) ||
	       (map_size > KMEM_GOBJ_THRESHOLD) ||
	       ((kma_flags & KMA_GUARD_FIRST) && (mask > PAGE_MASK));
}

static void
kmem_apply_security_policy(
	vm_map_t                  map,
	kma_flags_t               kma_flags,
	kmem_guard_t              guard,
	vm_map_size_t             map_size,
	vm_offset_t               mask,
	vm_map_kernel_flags_t    *vmk_flags,
	bool                      assert_dir __unused)
{
	kmem_range_id_t range_id;
	bool from_right;
	uint16_t type_hash = guard.kmg_type_hash;

	if (startup_phase < STARTUP_SUB_KMEM || map != kernel_map) {
		return;
	}

	/*
	 * A non-zero type-hash must be passed by krealloc_type
	 */
#if (DEBUG || DEVELOPMENT)
	if (assert_dir && !(kma_flags & KMA_DATA)) {
		assert(type_hash != 0);
	}
#endif

	if (kma_flags & KMA_DATA) {
		range_id  = KMEM_RANGE_ID_DATA;
		/*
		 * As an optimization in KMA_DATA to avoid fragmentation,
		 * allocate static carveouts at the end of the DATA range.
		 */
		from_right = (bool)(kma_flags & KMA_PERMANENT);
	} else if (kmem_use_sprayqtn(kma_flags, map_size, mask)) {
		range_id = KMEM_RANGE_ID_SPRAYQTN;
		from_right = (bool)(kma_flags & KMA_PERMANENT);
	} else if (type_hash) {
		range_id  = (kmem_range_id_t)(type_hash & KMEM_RANGE_MASK);
		from_right = type_hash & KMEM_DIRECTION_MASK;
	} else {
		/*
		 * Range id needs to correspond to one of the PTR ranges
		 */
		type_hash = (uint16_t) kmem_hash_backtrace(__builtin_frame_address(0));
		range_id  = kmem_adjust_range_id(type_hash);
		from_right = type_hash & KMEM_DIRECTION_MASK;
	}

	vmk_flags->vmkf_range_id = range_id;
	vmk_flags->vmkf_last_free = from_right;
}

#pragma mark allocation

static kmem_return_t
kmem_alloc_guard_internal(
	vm_map_t                map,
	vm_size_t               size,
	vm_offset_t             mask,
	kma_flags_t             flags,
	kmem_guard_t            guard,
	kern_return_t         (^alloc_pages)(vm_size_t, kma_flags_t, vm_page_t *))
{
	vm_object_t             object;
	vm_offset_t             delta = 0;
	vm_map_entry_t          entry = NULL;
	vm_map_offset_t         map_addr, fill_start;
	vm_map_size_t           map_size, fill_size;
	vm_page_t               guard_left = VM_PAGE_NULL;
	vm_page_t               guard_right = VM_PAGE_NULL;
	vm_page_t               wired_page_list = VM_PAGE_NULL;
	vm_map_kernel_flags_t   vmk_flags = VM_MAP_KERNEL_FLAGS_ANYWHERE();
	bool                    skip_guards;
	kmem_return_t           kmr = { };

	assert(kernel_map && map->pmap == kernel_pmap);

#if DEBUG || DEVELOPMENT
	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
	    size, 0, 0, 0);
#endif


	if (size == 0 ||
	    (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) ||
	    (size < __kmem_guard_size(ANYF(flags)))) {
		__kmem_invalid_size_panic(map, size, flags);
	}

	/*
	 * limit the size of a single extent of wired memory
	 * to try and limit the damage to the system if
	 * too many pages get wired down
	 * limit raised to 2GB with 128GB max physical limit,
	 * but scaled by installed memory above this
	 *
	 * Note: kmem_alloc_contig_guard() is immune to this check.
	 */
	if (__improbable(!(flags & (KMA_VAONLY | KMA_PAGEABLE)) &&
	    alloc_pages == NULL &&
	    size > MAX(1ULL << 31, sane_size / 64))) {
		kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
		goto out_error;
	}

	/*
	 * Guard pages:
	 *
	 * Guard pages are implemented as fictitious pages.
	 *
	 * However, some maps, and some objects are known
	 * to manage their memory explicitly, and do not need
	 * those to be materialized, which saves memory.
	 *
	 * By placing guard pages on either end of a stack,
	 * they can help detect cases where a thread walks
	 * off either end of its stack.
	 *
	 * They are allocated and set up here and attempts
	 * to access those pages are trapped in vm_fault_page().
	 *
	 * The map_size we were passed may include extra space for
	 * guard pages. fill_size represents the actual size to populate.
	 * Similarly, fill_start indicates where the actual pages
	 * will begin in the range.
	 */

	map_size   = round_page(size);
	fill_start = 0;
	fill_size  = map_size - __kmem_guard_size(ANYF(flags));

#if KASAN_CLASSIC
	if (flags & KMA_KASAN_GUARD) {
		assert((flags & (KMA_GUARD_FIRST | KMA_GUARD_LAST)) == 0);
		flags |= KMA_GUARD_FIRST | KMEM_GUARD_LAST;
		delta     = ptoa(2);
		map_size += delta;
	}
#else
	(void)delta;
#endif /* KASAN_CLASSIC */

	skip_guards = (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) ||
	    map->never_faults;

	if (flags & KMA_GUARD_FIRST) {
		vmk_flags.vmkf_guard_before = true;
		fill_start += PAGE_SIZE;
	}
	if ((flags & KMA_GUARD_FIRST) && !skip_guards) {
		guard_left = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
		if (__improbable(guard_left == VM_PAGE_NULL)) {
			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
			goto out_error;
		}
	}
	if ((flags & KMA_GUARD_LAST) && !skip_guards) {
		guard_right = vm_page_grab_guard((flags & KMA_NOPAGEWAIT) == 0);
		if (__improbable(guard_right == VM_PAGE_NULL)) {
			kmr.kmr_return = KERN_RESOURCE_SHORTAGE;
			goto out_error;
		}
	}

	if (!(flags & (KMA_VAONLY | KMA_PAGEABLE))) {
		if (alloc_pages) {
			kmr.kmr_return = alloc_pages(fill_size, flags,
			    &wired_page_list);
		} else {
			kmr.kmr_return = vm_page_alloc_list(atop(fill_size), flags,
			    &wired_page_list);
		}
		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
			goto out_error;
		}
	}

	/*
	 *	Allocate a new object (if necessary).  We must do this before
	 *	locking the map, or risk deadlock with the default pager.
	 */
	if (flags & KMA_KOBJECT) {
		{
			object = kernel_object_default;
		}
		vm_object_reference(object);
	} else if (flags & KMA_COMPRESSOR) {
		object = compressor_object;
		vm_object_reference(object);
	} else {
		object = vm_object_allocate(map_size);
		vm_object_lock(object);
		vm_object_set_size(object, map_size, size);
		/* stabilize the object to prevent shadowing */
		object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
		VM_OBJECT_SET_TRUE_SHARE(object, TRUE);
		vm_object_unlock(object);
	}

	if (flags & KMA_LAST_FREE) {
		vmk_flags.vmkf_last_free = true;
	}
	if (flags & KMA_PERMANENT) {
		vmk_flags.vmf_permanent = true;
	}
	kmem_apply_security_policy(map, flags, guard, map_size, mask, &vmk_flags,
	    false);

	kmr.kmr_return = vm_map_find_space(map, 0, map_size, mask,
	    vmk_flags, &entry);
	if (__improbable(KERN_SUCCESS != kmr.kmr_return)) {
		vm_object_deallocate(object);
		goto out_error;
	}

	map_addr = entry->vme_start;
	VME_OBJECT_SET(entry, object, guard.kmg_atomic, guard.kmg_context);
	VME_ALIAS_SET(entry, guard.kmg_tag);
	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
		VME_OFFSET_SET(entry, map_addr);
	}

#if KASAN
	if ((flags & KMA_KOBJECT) && guard.kmg_atomic) {
		entry->vme_object_or_delta = (-size & PAGE_MASK) + delta;
	}
#endif /* KASAN */

	if (!(flags & (KMA_COMPRESSOR | KMA_PAGEABLE))) {
		entry->wired_count = 1;
		vme_btref_consider_and_set(entry, __builtin_frame_address(0));
	}

	if (guard_left || guard_right || wired_page_list) {
		vm_object_offset_t offset = 0ull;

		vm_object_lock(object);
		vm_map_unlock(map);

		if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
			offset = map_addr;
		}

		if (guard_left) {
			vm_page_insert(guard_left, object, offset);
			guard_left->vmp_busy = FALSE;
			guard_left = VM_PAGE_NULL;
		}

		if (guard_right) {
			vm_page_insert(guard_right, object,
			    offset + fill_start + fill_size);
			guard_right->vmp_busy = FALSE;
			guard_right = VM_PAGE_NULL;
		}

		if (wired_page_list) {
			kernel_memory_populate_object_and_unlock(object,
			    map_addr + fill_start, offset + fill_start, fill_size,
			    wired_page_list, flags, guard.kmg_tag, VM_PROT_DEFAULT,
			    __kmem_mapping_type(ANYF(flags)));
		} else {
			vm_object_unlock(object);
		}
	} else {
		vm_map_unlock(map);
	}

	/*
	 * now that the pages are wired, we no longer have to fear coalesce
	 */
	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
		vm_map_simplify(map, map_addr);
	}

#if DEBUG || DEVELOPMENT
	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
	    atop(fill_size), 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
	kmr.kmr_address = CAST_DOWN(vm_offset_t, map_addr);

#if KASAN
	if (flags & (KMA_KASAN_GUARD | KMA_PAGEABLE)) {
		/*
		 * We need to allow the range for pageable memory,
		 * or faulting will not be allowed.
		 */
		kasan_notify_address(map_addr, map_size);
	}
#endif /* KASAN */
#if KASAN_CLASSIC
	if (flags & KMA_KASAN_GUARD) {
		kmr.kmr_address += PAGE_SIZE;
		kasan_alloc_large(kmr.kmr_address, size);
	}
#endif /* KASAN_CLASSIC */
#if CONFIG_KERNEL_TAGGING
	if (!(flags & KMA_VAONLY) && (flags & KMA_TAG)) {
		kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, size);
		vm_memtag_set_tag((vm_offset_t)kmr.kmr_address, size);
#if KASAN_TBI
		kasan_tbi_retag_unused_space((vm_offset_t)kmr.kmr_address, map_size, size);
#endif /* KASAN_TBI */
	}
#endif /* CONFIG_KERNEL_TAGGING */
	return kmr;

out_error:
	if (flags & KMA_NOFAIL) {
		__kmem_failed_panic(map, size, flags, kmr.kmr_return, "alloc");
	}
	if (guard_left) {
		guard_left->vmp_snext = wired_page_list;
		wired_page_list = guard_left;
	}
	if (guard_right) {
		guard_right->vmp_snext = wired_page_list;
		wired_page_list = guard_right;
	}
	if (wired_page_list) {
		vm_page_free_list(wired_page_list, FALSE);
	}

#if DEBUG || DEVELOPMENT
	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
	    0, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */

	return kmr;
}

kmem_return_t
kmem_alloc_guard(
	vm_map_t        map,
	vm_size_t       size,
	vm_offset_t     mask,
	kma_flags_t     flags,
	kmem_guard_t    guard)
{
	return kmem_alloc_guard_internal(map, size, mask, flags, guard, NULL);
}

kmem_return_t
kmem_alloc_contig_guard(
	vm_map_t                map,
	vm_size_t               size,
	vm_offset_t             mask,
	ppnum_t                 max_pnum,
	ppnum_t                 pnum_mask,
	kma_flags_t             flags,
	kmem_guard_t            guard)
{
	__auto_type alloc_pages = ^(vm_size_t fill_size, kma_flags_t kma_flags, vm_page_t *pages) {
		return cpm_allocate(fill_size, pages, max_pnum, pnum_mask, FALSE, kma_flags);
	};

	return kmem_alloc_guard_internal(map, size, mask, flags, guard, alloc_pages);
}

kmem_return_t
kmem_suballoc(
	vm_map_t                parent,
	mach_vm_offset_t       *addr,
	vm_size_t               size,
	vm_map_create_options_t vmc_options,
	int                     vm_flags,
	kms_flags_t             flags,
	vm_tag_t                tag)
{
	vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
	vm_map_offset_t map_addr = 0;
	kmem_return_t kmr = { };
	vm_map_t map;

	assert(page_aligned(size));
	assert(parent->pmap == kernel_pmap);

	vm_map_kernel_flags_set_vmflags(&vmk_flags, vm_flags, tag);

	if (parent == kernel_map) {
		assert(vmk_flags.vmf_overwrite || (flags & KMS_DATA));
	}

	if (vmk_flags.vmf_fixed) {
		map_addr = trunc_page(*addr);
	}

	pmap_reference(vm_map_pmap(parent));
	map = vm_map_create_options(vm_map_pmap(parent), 0, size, vmc_options);

	/*
	 * 1. vm_map_enter() will consume one ref on success.
	 *
	 * 2. make the entry atomic as kernel submaps should never be split.
	 *
	 * 3. instruct vm_map_enter() that it is a fresh submap
	 *    that needs to be taught its bounds as it inserted.
	 */
	vm_map_reference(map);

	vmk_flags.vmkf_submap = true;
	if ((flags & KMS_DATA) == 0) {
		/* FIXME: IOKit submaps get fragmented and can't be atomic */
		vmk_flags.vmkf_submap_atomic = true;
	}
	vmk_flags.vmkf_submap_adjust = true;
	if (flags & KMS_LAST_FREE) {
		vmk_flags.vmkf_last_free = true;
	}
	if (flags & KMS_PERMANENT) {
		vmk_flags.vmf_permanent = true;
	}
	if (flags & KMS_DATA) {
		vmk_flags.vmkf_range_id = KMEM_RANGE_ID_DATA;
	}

	kmr.kmr_return = vm_map_enter(parent, &map_addr, size, 0,
	    vmk_flags, (vm_object_t)map, 0, FALSE,
	    VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);

	if (kmr.kmr_return != KERN_SUCCESS) {
		if (flags & KMS_NOFAIL) {
			panic("kmem_suballoc(map=%p, size=%zd) failed with %d",
			    parent, size, kmr.kmr_return);
		}
		assert(os_ref_get_count_raw(&map->map_refcnt) == 2);
		vm_map_deallocate(map);
		vm_map_deallocate(map); /* also removes ref to pmap */
		return kmr;
	}

	/*
	 * For kmem_suballocs that register a claim and are assigned a range, ensure
	 * that the exact same range is returned.
	 */
	if (*addr != 0 && parent == kernel_map &&
	    startup_phase > STARTUP_SUB_KMEM) {
		assert(CAST_DOWN(vm_offset_t, map_addr) == *addr);
	} else {
		*addr = map_addr;
	}

	kmr.kmr_submap = map;
	return kmr;
}

/*
 *	kmem_alloc:
 *
 *	Allocate wired-down memory in the kernel's address map
 *	or a submap.  The memory is not zero-filled.
 */

__exported kern_return_t
kmem_alloc_external(
	vm_map_t        map,
	vm_offset_t     *addrp,
	vm_size_t       size);
kern_return_t
kmem_alloc_external(
	vm_map_t        map,
	vm_offset_t     *addrp,
	vm_size_t       size)
{
	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
		return kmem_alloc(map, addrp, size, KMA_NONE, vm_tag_bt());
	}
	/* Maintain ABI compatibility: invalid sizes used to be allowed */
	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
}


/*
 *	kmem_alloc_kobject:
 *
 *	Allocate wired-down memory in the kernel's address map
 *	or a submap.  The memory is not zero-filled.
 *
 *	The memory is allocated in the kernel_object.
 *	It may not be copied with vm_map_copy, and
 *	it may not be reallocated with kmem_realloc.
 */

__exported kern_return_t
kmem_alloc_kobject_external(
	vm_map_t        map,
	vm_offset_t     *addrp,
	vm_size_t       size);
kern_return_t
kmem_alloc_kobject_external(
	vm_map_t        map,
	vm_offset_t     *addrp,
	vm_size_t       size)
{
	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
		return kmem_alloc(map, addrp, size, KMA_KOBJECT, vm_tag_bt());
	}
	/* Maintain ABI compatibility: invalid sizes used to be allowed */
	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
}

/*
 *	kmem_alloc_pageable:
 *
 *	Allocate pageable memory in the kernel's address map.
 */

__exported kern_return_t
kmem_alloc_pageable_external(
	vm_map_t        map,
	vm_offset_t     *addrp,
	vm_size_t       size);
kern_return_t
kmem_alloc_pageable_external(
	vm_map_t        map,
	vm_offset_t     *addrp,
	vm_size_t       size)
{
	if (size && (size >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) == 0) {
		return kmem_alloc(map, addrp, size, KMA_PAGEABLE | KMA_DATA, vm_tag_bt());
	}
	/* Maintain ABI compatibility: invalid sizes used to be allowed */
	return size ? KERN_NO_SPACE: KERN_INVALID_ARGUMENT;
}

static inline kern_return_t
mach_vm_allocate_kernel_sanitize(
	vm_map_t                map,
	mach_vm_offset_ut       addr_u,
	mach_vm_size_ut         size_u,
	vm_map_kernel_flags_t   vmk_flags,
	vm_map_offset_t        *map_addr,
	vm_map_size_t          *map_size)
{
	kern_return_t   result;
	vm_map_offset_t map_end;

	if (vmk_flags.vmf_fixed) {
		result = vm_sanitize_addr_size(addr_u, size_u,
		    VM_SANITIZE_CALLER_VM_ALLOCATE_FIXED,
		    map,
		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS | VM_SANITIZE_FLAGS_REALIGN_START,
		    map_addr, &map_end, map_size);
		if (__improbable(result != KERN_SUCCESS)) {
			return result;
		}
	} else {
		*map_addr = 0;
		result = vm_sanitize_size(0, size_u,
		    VM_SANITIZE_CALLER_VM_ALLOCATE_ANYWHERE, map,
		    VM_SANITIZE_FLAGS_SIZE_ZERO_SUCCEEDS,
		    map_size);
		if (__improbable(result != KERN_SUCCESS)) {
			return result;
		}
	}

	return KERN_SUCCESS;
}

kern_return_t
mach_vm_allocate_kernel(
	vm_map_t                map,
	mach_vm_offset_ut      *addr_u,
	mach_vm_size_ut         size_u,
	vm_map_kernel_flags_t   vmk_flags)
{
	vm_map_offset_t map_addr;
	vm_map_size_t   map_size;
	kern_return_t   result;

	if (map == VM_MAP_NULL) {
		ktriage_record(thread_tid(current_thread()),
		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
		    KDBG_TRIAGE_RESERVED,
		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADMAP_ERROR),
		    KERN_INVALID_ARGUMENT /* arg */);
		return KERN_INVALID_ARGUMENT;
	}

	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
	    VM_FLAGS_USER_ALLOCATE)) {
		return KERN_INVALID_ARGUMENT;
	}

	result = mach_vm_allocate_kernel_sanitize(map,
	    *addr_u,
	    size_u,
	    vmk_flags,
	    &map_addr,
	    &map_size);
	if (__improbable(result != KERN_SUCCESS)) {
		result = vm_sanitize_get_kr(result);
		if (result == KERN_SUCCESS) {
			*addr_u = vm_sanitize_wrap_addr(0);
		} else {
			ktriage_record(thread_tid(current_thread()),
			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
			    KDBG_TRIAGE_RESERVED,
			    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_BADSIZE_ERROR),
			    KERN_INVALID_ARGUMENT /* arg */);
		}
		return result;
	}

	vm_map_kernel_flags_update_range_id(&vmk_flags, map, map_size);

	result = vm_map_enter(
		map,
		&map_addr,
		map_size,
		(vm_map_offset_t)0,
		vmk_flags,
		VM_OBJECT_NULL,
		(vm_object_offset_t)0,
		FALSE,
		VM_PROT_DEFAULT,
		VM_PROT_ALL,
		VM_INHERIT_DEFAULT);

	if (result == KERN_SUCCESS) {
#if KASAN
		if (map->pmap == kernel_pmap) {
			kasan_notify_address(map_addr, map_size);
		}
#endif
		*addr_u = vm_sanitize_wrap_addr(map_addr);
	} else {
		ktriage_record(thread_tid(current_thread()),
		    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
		    KDBG_TRIAGE_RESERVED,
		    KDBG_TRIAGE_VM_ALLOCATE_KERNEL_VMMAPENTER_ERROR),
		    result /* arg */);
	}
	return result;
}

#pragma mark population

static void
kernel_memory_populate_pmap_enter(
	vm_object_t             object,
	vm_address_t            addr,
	vm_object_offset_t      offset,
	vm_page_t               mem,
	vm_prot_t               prot,
	int                     pe_flags,
	pmap_mapping_type_t     mapping_type)
{
	kern_return_t   pe_result;
	int             pe_options;

	if (VMP_ERROR_GET(mem)) {
		panic("VM page %p should not have an error", mem);
	}

	pe_options = PMAP_OPTIONS_NOWAIT;
	if (object->internal) {
		pe_options |= PMAP_OPTIONS_INTERNAL;
	}
	if (mem->vmp_reusable || object->all_reusable) {
		pe_options |= PMAP_OPTIONS_REUSABLE;
	}

	pe_result = pmap_enter_options(kernel_pmap, addr + offset,
	    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
	    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);

	if (pe_result == KERN_RESOURCE_SHORTAGE) {
		vm_object_unlock(object);

		pe_options &= ~PMAP_OPTIONS_NOWAIT;

		pe_result = pmap_enter_options(kernel_pmap, addr + offset,
		    VM_PAGE_GET_PHYS_PAGE(mem), prot, VM_PROT_NONE,
		    pe_flags, /* wired */ TRUE, pe_options, NULL, mapping_type);

		vm_object_lock(object);
	}

	assert(pe_result == KERN_SUCCESS);
}

void
kernel_memory_populate_object_and_unlock(
	vm_object_t             object, /* must be locked */
	vm_address_t            addr,
	vm_offset_t             offset,
	vm_size_t               size,
	vm_page_t               page_list,
	kma_flags_t             flags,
	vm_tag_t                tag,
	vm_prot_t               prot,
	pmap_mapping_type_t     mapping_type)
{
	vm_page_t       mem;
	int             pe_flags;
	bool            gobbled_list = page_list && page_list->vmp_gobbled;

	assert(((flags & KMA_KOBJECT) != 0) == (is_kernel_object(object) != 0));
	assert3u((bool)(flags & KMA_COMPRESSOR), ==, object == compressor_object);


	if (flags & (KMA_KOBJECT | KMA_COMPRESSOR)) {
		assert3u(offset, ==, addr);
	} else {
		/*
		 * kernel_memory_populate_pmap_enter() might drop the object
		 * lock, and the caller might not own a reference anymore
		 * and rely on holding the vm object lock for liveness.
		 */
		vm_object_reference_locked(object);
	}

	if (flags & KMA_KSTACK) {
		pe_flags = VM_MEM_STACK;
	} else {
		pe_flags = 0;
	}


	for (vm_object_offset_t pg_offset = 0;
	    pg_offset < size;
	    pg_offset += PAGE_SIZE_64) {
		if (page_list == NULL) {
			panic("%s: page_list too short", __func__);
		}

		mem = page_list;
		page_list = mem->vmp_snext;
		mem->vmp_snext = NULL;

		assert(mem->vmp_wire_count == 0);
		assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
		assert(!mem->vmp_fictitious && !mem->vmp_private);

		if (flags & KMA_COMPRESSOR) {
			mem->vmp_q_state = VM_PAGE_USED_BY_COMPRESSOR;
			/*
			 * Background processes doing I/O accounting can call
			 * into NVME driver to do some work which results in
			 * an allocation here and so we want to make sure
			 * that the pages used by compressor, regardless of
			 * process context, are never on the special Q.
			 */
			mem->vmp_on_specialq = VM_PAGE_SPECIAL_Q_EMPTY;

			vm_page_insert(mem, object, offset + pg_offset);
		} else {
			mem->vmp_q_state = VM_PAGE_IS_WIRED;
			mem->vmp_wire_count = 1;

			vm_page_insert_wired(mem, object, offset + pg_offset, tag);
		}

		mem->vmp_gobbled = false;
		mem->vmp_busy = false;
		mem->vmp_pmapped = true;
		mem->vmp_wpmapped = true;

		/*
		 * Manual PMAP_ENTER_OPTIONS() with shortcuts
		 * for the kernel and compressor objects.
		 */
		kernel_memory_populate_pmap_enter(object, addr, pg_offset,
		    mem, prot, pe_flags, mapping_type);

		if (flags & KMA_NOENCRYPT) {
			pmap_set_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem));
		}
	}

	if (page_list) {
		panic("%s: page_list too long", __func__);
	}

	vm_object_unlock(object);
	if ((flags & (KMA_KOBJECT | KMA_COMPRESSOR)) == 0) {
		vm_object_deallocate(object);
	}

	/*
	 * Update the accounting:
	 * - the compressor "wired" pages don't really count as wired
	 * - kmem_alloc_contig_guard() gives gobbled pages,
	 *   which already count as wired but need to be ungobbled.
	 */
	if (gobbled_list) {
		vm_page_lockspin_queues();
		if (flags & KMA_COMPRESSOR) {
			vm_page_wire_count -= atop(size);
		}
		vm_page_gobble_count -= atop(size);
		vm_page_unlock_queues();
	} else if ((flags & KMA_COMPRESSOR) == 0) {
		vm_page_lockspin_queues();
		vm_page_wire_count += atop(size);
		vm_page_unlock_queues();
	}

	if (flags & KMA_KOBJECT) {
		/* vm_page_insert_wired() handles regular objects already */
		vm_tag_update_size(tag, size, NULL);
	}

#if KASAN
	if (flags & KMA_COMPRESSOR) {
		kasan_notify_address_nopoison(addr, size);
	} else {
		kasan_notify_address(addr, size);
	}
#endif /* KASAN */
}


kern_return_t
kernel_memory_populate(
	vm_offset_t     addr,
	vm_size_t       size,
	kma_flags_t     flags,
	vm_tag_t        tag)
{
	kern_return_t   kr = KERN_SUCCESS;
	vm_page_t       page_list = NULL;
	vm_size_t       page_count = atop_64(size);
	vm_object_t     object = __kmem_object(ANYF(flags));

#if DEBUG || DEVELOPMENT
	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_START,
	    size, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */


	kr = vm_page_alloc_list(page_count, flags, &page_list);
	if (kr == KERN_SUCCESS) {
		vm_object_lock(object);
		kernel_memory_populate_object_and_unlock(object, addr,
		    addr, size, page_list, flags, tag, VM_PROT_DEFAULT,
		    __kmem_mapping_type(ANYF(flags)));
	}

#if DEBUG || DEVELOPMENT
	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
	    page_count, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
	return kr;
}

void
kernel_memory_depopulate(
	vm_offset_t        addr,
	vm_size_t          size,
	kma_flags_t        flags,
	vm_tag_t           tag)
{
	vm_object_t        object = __kmem_object(ANYF(flags));
	vm_object_offset_t offset = addr;
	vm_page_t          mem;
	vm_page_t          local_freeq = NULL;
	unsigned int       pages_unwired = 0;

	vm_object_lock(object);

	pmap_protect(kernel_pmap, offset, offset + size, VM_PROT_NONE);

	for (vm_object_offset_t pg_offset = 0;
	    pg_offset < size;
	    pg_offset += PAGE_SIZE_64) {
		mem = vm_page_lookup(object, offset + pg_offset);

		assert(mem);

		if (flags & KMA_COMPRESSOR) {
			assert(mem->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR);
		} else {
			assert(mem->vmp_q_state == VM_PAGE_IS_WIRED);
			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));
			pages_unwired++;
		}

		mem->vmp_busy = TRUE;

		assert(mem->vmp_tabled);
		vm_page_remove(mem, TRUE);
		assert(mem->vmp_busy);

		assert(mem->vmp_pageq.next == 0 && mem->vmp_pageq.prev == 0);

		mem->vmp_q_state = VM_PAGE_NOT_ON_Q;
		mem->vmp_snext = local_freeq;
		local_freeq = mem;
	}

	vm_object_unlock(object);

	vm_page_free_list(local_freeq, TRUE);

	if (!(flags & KMA_COMPRESSOR)) {
		vm_page_lockspin_queues();
		vm_page_wire_count -= pages_unwired;
		vm_page_unlock_queues();
	}

	if (flags & KMA_KOBJECT) {
		/* vm_page_remove() handles regular objects already */
		vm_tag_update_size(tag, -ptoa_64(pages_unwired), NULL);
	}
}

#pragma mark reallocation

__abortlike
static void
__kmem_realloc_invalid_object_size_panic(
	vm_map_t                map,
	vm_address_t            address,
	vm_size_t               size,
	vm_map_entry_t          entry)
{
	vm_object_t object  = VME_OBJECT(entry);
	vm_size_t   objsize = __kmem_entry_orig_size(entry);

	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
	    "object %p has unexpected size %ld",
	    map, (void *)address, (size_t)size, entry, object, objsize);
}

__abortlike
static void
__kmem_realloc_invalid_pager_panic(
	vm_map_t                map,
	vm_address_t            address,
	vm_size_t               size,
	vm_map_entry_t          entry)
{
	vm_object_t object     = VME_OBJECT(entry);
	memory_object_t pager  = object->pager;
	bool pager_created     = object->pager_created;
	bool pager_initialized = object->pager_initialized;
	bool pager_ready       = object->pager_ready;

	panic("kmem_realloc(map=%p, addr=%p, size=%zd, entry=%p): "
	    "object %p has unexpected pager %p (%d,%d,%d)",
	    map, (void *)address, (size_t)size, entry, object,
	    pager, pager_created, pager_initialized, pager_ready);
}

static kmem_return_t
kmem_realloc_shrink_guard(
	vm_map_t                map,
	vm_offset_t             req_oldaddr,
	vm_size_t               req_oldsize,
	vm_size_t               req_newsize,
	kmr_flags_t             flags,
	kmem_guard_t            guard,
	vm_map_entry_t          entry)
{
	vmr_flags_t             vmr_flags = VM_MAP_REMOVE_KUNWIRE;
	vm_object_t             object;
	vm_offset_t             delta = 0;
	kmem_return_t           kmr;
	bool                    was_atomic;
	vm_size_t               oldsize = round_page(req_oldsize);
	vm_size_t               newsize = round_page(req_newsize);
	vm_address_t            oldaddr = req_oldaddr;

#if KASAN_CLASSIC
	if (flags & KMR_KASAN_GUARD) {
		assert((flags & (KMR_GUARD_FIRST | KMR_GUARD_LAST)) == 0);
		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
		oldaddr -= PAGE_SIZE;
		delta    = ptoa(2);
		oldsize += delta;
		newsize += delta;
	}
#endif /* KASAN_CLASSIC */

	if (flags & KMR_TAG) {
		oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
	}

	vm_map_lock_assert_exclusive(map);

	if ((flags & KMR_KOBJECT) == 0) {
		object = VME_OBJECT(entry);
		vm_object_reference(object);
	}

	/*
	 *	Shrinking an atomic entry starts with splitting it,
	 *	and removing the second half.
	 */
	was_atomic = entry->vme_atomic;
	entry->vme_atomic = false;
	vm_map_clip_end(map, entry, entry->vme_start + newsize);
	entry->vme_atomic = was_atomic;

#if KASAN
	if (entry->vme_kernel_object && was_atomic) {
		entry->vme_object_or_delta = (-req_newsize & PAGE_MASK) + delta;
	}
#if KASAN_CLASSIC
	if (flags & KMR_KASAN_GUARD) {
		kasan_poison_range(oldaddr + newsize, oldsize - newsize,
		    ASAN_VALID);
	}
#endif
#if KASAN_TBI
	if (flags & KMR_TAG) {
		kasan_tbi_mark_free_space(req_oldaddr + newsize, oldsize - newsize);
	}
#endif /* KASAN_TBI */
#endif /* KASAN */
	(void)vm_map_remove_and_unlock(map,
	    oldaddr + newsize, oldaddr + oldsize,
	    vmr_flags, KMEM_GUARD_NONE);


	/*
	 *	Lastly, if there are guard pages, deal with them.
	 *
	 *	The kernel object just needs to depopulate,
	 *	regular objects require freeing the last page
	 *	and replacing it with a guard.
	 */
	if (flags & KMR_KOBJECT) {
		if (flags & KMR_GUARD_LAST) {
			kernel_memory_depopulate(oldaddr + newsize - PAGE_SIZE,
			    PAGE_SIZE, KMA_KOBJECT, guard.kmg_tag);
		}
	} else {
		vm_page_t guard_right = VM_PAGE_NULL;
		vm_offset_t remove_start = newsize;

		if (flags & KMR_GUARD_LAST) {
			if (!map->never_faults) {
				guard_right = vm_page_grab_guard(true);
			}
			remove_start -= PAGE_SIZE;
		}

		vm_object_lock(object);

		if (object->vo_size != oldsize) {
			__kmem_realloc_invalid_object_size_panic(map,
			    req_oldaddr, req_oldsize + delta, entry);
		}
		vm_object_set_size(object, newsize, req_newsize);

		vm_object_page_remove(object, remove_start, oldsize);

		if (guard_right) {
			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
			guard_right->vmp_busy = false;
		}
		vm_object_unlock(object);
		vm_object_deallocate(object);
	}

	kmr.kmr_address = req_oldaddr;
	kmr.kmr_return  = 0;
#if KASAN_CLASSIC
	if (flags & KMA_KASAN_GUARD) {
		kasan_alloc_large(kmr.kmr_address, req_newsize);
	}
#endif /* KASAN_CLASSIC */
#if KASAN_TBI
	if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
		kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
		vm_memtag_set_tag(kmr.kmr_address, req_newsize);
		kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
	}
#endif /* KASAN_TBI */

	return kmr;
}

kmem_return_t
kmem_realloc_guard(
	vm_map_t                map,
	vm_offset_t             req_oldaddr,
	vm_size_t               req_oldsize,
	vm_size_t               req_newsize,
	kmr_flags_t             flags,
	kmem_guard_t            guard)
{
	vm_object_t             object;
	vm_size_t               oldsize;
	vm_size_t               newsize;
	vm_offset_t             delta = 0;
	vm_map_offset_t         oldaddr;
	vm_map_offset_t         newaddr;
	vm_object_offset_t      newoffs;
	vm_map_entry_t          oldentry;
	vm_map_entry_t          newentry;
	vm_page_t               page_list = NULL;
	bool                    needs_wakeup = false;
	kmem_return_t           kmr = { };
	unsigned int            last_timestamp;
	vm_map_kernel_flags_t   vmk_flags = {
		.vmkf_last_free = (bool)(flags & KMR_LAST_FREE),
	};

	assert(KMEM_REALLOC_FLAGS_VALID(flags));
	if (!guard.kmg_atomic && (flags & (KMR_DATA | KMR_KOBJECT)) != KMR_DATA) {
		__kmem_invalid_arguments_panic("realloc", map, req_oldaddr,
		    req_oldsize, flags);
	}

	if (req_oldaddr == 0ul) {
		return kmem_alloc_guard(map, req_newsize, 0, (kma_flags_t)flags, guard);
	}

	if (req_newsize == 0ul) {
		kmem_free_guard(map, req_oldaddr, req_oldsize,
		    (kmf_flags_t)flags, guard);
		return kmr;
	}

	if (req_newsize >> VM_KERNEL_POINTER_SIGNIFICANT_BITS) {
		__kmem_invalid_size_panic(map, req_newsize, flags);
	}
	if (req_newsize < __kmem_guard_size(ANYF(flags))) {
		__kmem_invalid_size_panic(map, req_newsize, flags);
	}

	oldsize = round_page(req_oldsize);
	newsize = round_page(req_newsize);
	oldaddr = req_oldaddr;
#if KASAN_CLASSIC
	if (flags & KMR_KASAN_GUARD) {
		flags   |= KMR_GUARD_FIRST | KMR_GUARD_LAST;
		oldaddr -= PAGE_SIZE;
		delta    = ptoa(2);
		oldsize += delta;
		newsize += delta;
	}
#endif /* KASAN_CLASSIC */
#if CONFIG_KERNEL_TAGGING
	if (flags & KMR_TAG) {
		vm_memtag_verify_tag(req_oldaddr);
		oldaddr = vm_memtag_canonicalize_address(req_oldaddr);
	}
#endif /* CONFIG_KERNEL_TAGGING */

#if !KASAN
	/*
	 *	If not on a KASAN variant and no difference in requested size,
	 *  just return.
	 *
	 *	Otherwise we want to validate the size and re-tag for KASAN_TBI.
	 */
	if (oldsize == newsize) {
		kmr.kmr_address = req_oldaddr;
		return kmr;
	}
#endif /* !KASAN */

	/*
	 *	If we're growing the allocation,
	 *	then reserve the pages we'll need,
	 *	and find a spot for its new place.
	 */
	if (oldsize < newsize) {
#if DEBUG || DEVELOPMENT
		VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
		    DBG_VM_KERN_REQUEST, DBG_FUNC_START,
		    newsize - oldsize, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
		kmr.kmr_return = vm_page_alloc_list(atop(newsize - oldsize),
		    (kma_flags_t)flags, &page_list);
		if (kmr.kmr_return == KERN_SUCCESS) {
			kmem_apply_security_policy(map, (kma_flags_t)flags, guard,
			    newsize, 0, &vmk_flags, true);
			kmr.kmr_return = vm_map_find_space(map, 0, newsize, 0,
			    vmk_flags, &newentry);
		}
		if (__improbable(kmr.kmr_return != KERN_SUCCESS)) {
			if (flags & KMR_REALLOCF) {
				kmem_free_guard(map, req_oldaddr, req_oldsize,
				    KMF_NONE, guard);
			}
			if (page_list) {
				vm_page_free_list(page_list, FALSE);
			}
#if DEBUG || DEVELOPMENT
			VM_DEBUG_CONSTANT_EVENT(vm_kern_request,
			    DBG_VM_KERN_REQUEST, DBG_FUNC_END,
			    0, 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
			return kmr;
		}

		/* map is locked */
	} else {
		vm_map_lock(map);
	}


	/*
	 *	Locate the entry:
	 *	- wait for it to quiesce.
	 *	- validate its guard,
	 *	- learn its correct tag,
	 */
again:
	if (!vm_map_lookup_entry(map, oldaddr, &oldentry)) {
		__kmem_entry_not_found_panic(map, req_oldaddr);
	}
	if ((flags & KMR_KOBJECT) && oldentry->in_transition) {
		oldentry->needs_wakeup = true;
		vm_map_entry_wait(map, THREAD_UNINT);
		goto again;
	}
	kmem_entry_validate_guard(map, oldentry, oldaddr, oldsize, guard);
	if (!__kmem_entry_validate_object(oldentry, ANYF(flags))) {
		__kmem_entry_validate_object_panic(map, oldentry, ANYF(flags));
	}
	/*
	 *	TODO: We should validate for non atomic entries that the range
	 *	      we are acting on is what we expect here.
	 */
#if KASAN
	if (__kmem_entry_orig_size(oldentry) != req_oldsize) {
		__kmem_realloc_invalid_object_size_panic(map,
		    req_oldaddr, req_oldsize + delta, oldentry);
	}

	if (oldsize == newsize) {
		kmr.kmr_address = req_oldaddr;
		if (oldentry->vme_kernel_object) {
			oldentry->vme_object_or_delta = delta +
			    (-req_newsize & PAGE_MASK);
		} else {
			object = VME_OBJECT(oldentry);
			vm_object_lock(object);
			vm_object_set_size(object, newsize, req_newsize);
			vm_object_unlock(object);
		}
		vm_map_unlock(map);

#if KASAN_CLASSIC
		if (flags & KMA_KASAN_GUARD) {
			kasan_alloc_large(kmr.kmr_address, req_newsize);
		}
#endif /* KASAN_CLASSIC */
#if KASAN_TBI
		if ((flags & KMR_TAG) && (flags & KMR_FREEOLD)) {
			kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
			vm_memtag_set_tag(kmr.kmr_address, req_newsize);
			kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
		}
#endif /* KASAN_TBI */
		return kmr;
	}
#endif /* KASAN */

	guard.kmg_tag = VME_ALIAS(oldentry);

	if (newsize < oldsize) {
		return kmem_realloc_shrink_guard(map, req_oldaddr,
		           req_oldsize, req_newsize, flags, guard, oldentry);
	}


	/*
	 *	We are growing the entry
	 *
	 *	For regular objects we use the object `vo_size` updates
	 *	as a guarantee that no 2 kmem_realloc() can happen
	 *	concurrently (by doing it before the map is unlocked.
	 *
	 *	For the kernel object, prevent the entry from being
	 *	reallocated or changed by marking it "in_transition".
	 */

	object = VME_OBJECT(oldentry);
	vm_object_lock(object);
	vm_object_reference_locked(object);

	newaddr = newentry->vme_start;
	newoffs = oldsize;

	VME_OBJECT_SET(newentry, object, guard.kmg_atomic, guard.kmg_context);
	VME_ALIAS_SET(newentry, guard.kmg_tag);
	if (flags & KMR_KOBJECT) {
		oldentry->in_transition = true;
		VME_OFFSET_SET(newentry, newaddr);
		newentry->wired_count = 1;
		vme_btref_consider_and_set(newentry, __builtin_frame_address(0));
		newoffs = newaddr + oldsize;
#if KASAN
		newentry->vme_object_or_delta = delta +
		    (-req_newsize & PAGE_MASK);
#endif /* KASAN */
	} else {
		if (object->pager_created || object->pager) {
			/*
			 * We can't "realloc/grow" the pager, so pageable
			 * allocations should not go through this path.
			 */
			__kmem_realloc_invalid_pager_panic(map,
			    req_oldaddr, req_oldsize + delta, oldentry);
		}
		if (object->vo_size != oldsize) {
			__kmem_realloc_invalid_object_size_panic(map,
			    req_oldaddr, req_oldsize + delta, oldentry);
		}
		vm_object_set_size(object, newsize, req_newsize);
	}

	last_timestamp = map->timestamp;
	vm_map_unlock(map);


	/*
	 *	Now proceed with the population of pages.
	 *
	 *	Kernel objects can use the kmem population helpers.
	 *
	 *	Regular objects will insert pages manually,
	 *	then wire the memory into the new range.
	 */

	vm_size_t guard_right_size = __kmem_guard_right(ANYF(flags));

	if (flags & KMR_KOBJECT) {
		pmap_mapping_type_t mapping_type = __kmem_mapping_type(ANYF(flags));

		pmap_protect(kernel_pmap,
		    oldaddr, oldaddr + oldsize - guard_right_size,
		    VM_PROT_NONE);

		for (vm_object_offset_t offset = 0;
		    offset < oldsize - guard_right_size;
		    offset += PAGE_SIZE_64) {
			vm_page_t mem;

			mem = vm_page_lookup(object, oldaddr + offset);
			if (mem == VM_PAGE_NULL) {
				continue;
			}

			pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem));

			mem->vmp_busy = true;
			vm_page_remove(mem, true);
			vm_page_insert_wired(mem, object, newaddr + offset,
			    guard.kmg_tag);
			mem->vmp_busy = false;

			kernel_memory_populate_pmap_enter(object, newaddr,
			    offset, mem, VM_PROT_DEFAULT, 0, mapping_type);
		}

		kernel_memory_populate_object_and_unlock(object,
		    newaddr + oldsize - guard_right_size,
		    newoffs - guard_right_size,
		    newsize - oldsize,
		    page_list, (kma_flags_t)flags,
		    guard.kmg_tag, VM_PROT_DEFAULT, mapping_type);
	} else {
		vm_page_t guard_right = VM_PAGE_NULL;

		/*
		 *	Note: we are borrowing the new entry reference
		 *	on the object for the duration of this code,
		 *	which works because we keep the object locked
		 *	throughout.
		 */
		if ((flags & KMR_GUARD_LAST) && !map->never_faults) {
			guard_right = vm_page_lookup(object, oldsize - PAGE_SIZE);
			assert(guard_right->vmp_fictitious);
			guard_right->vmp_busy = true;
			vm_page_remove(guard_right, true);
		}

		if (flags & KMR_FREEOLD) {
			/*
			 * Freeing the old mapping will make
			 * the old pages become pageable until
			 * the new mapping makes them wired again.
			 * Let's take an extra "wire_count" to
			 * prevent any accidental "page out".
			 * We'll have to undo that after wiring
			 * the new mapping.
			 */
			vm_object_reference_locked(object); /* keep object alive */
			for (vm_object_offset_t offset = 0;
			    offset < oldsize - guard_right_size;
			    offset += PAGE_SIZE_64) {
				vm_page_t mem;

				mem = vm_page_lookup(object, offset);
				assert(mem != VM_PAGE_NULL);
				assertf(!VM_PAGE_PAGEABLE(mem),
				    "mem %p qstate %d",
				    mem, mem->vmp_q_state);
				if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
					/* guard pages are not wired */
				} else {
					assertf(VM_PAGE_WIRED(mem),
					    "mem %p qstate %d wirecount %d",
					    mem,
					    mem->vmp_q_state,
					    mem->vmp_wire_count);
					assertf(mem->vmp_wire_count >= 1,
					    "mem %p wirecount %d",
					    mem, mem->vmp_wire_count);
					mem->vmp_wire_count++;
				}
			}
		}

		for (vm_object_offset_t offset = oldsize - guard_right_size;
		    offset < newsize - guard_right_size;
		    offset += PAGE_SIZE_64) {
			vm_page_t mem = page_list;

			page_list = mem->vmp_snext;
			mem->vmp_snext = VM_PAGE_NULL;
			assert(mem->vmp_q_state == VM_PAGE_NOT_ON_Q);
			assert(!VM_PAGE_PAGEABLE(mem));

			vm_page_insert(mem, object, offset);
			mem->vmp_busy = false;
		}

		if (guard_right) {
			vm_page_insert(guard_right, object, newsize - PAGE_SIZE);
			guard_right->vmp_busy = false;
		}

		vm_object_unlock(object);
	}

	/*
	 *	Mark the entry as idle again,
	 *	and honor KMR_FREEOLD if needed.
	 */

	vm_map_lock(map);
	if (last_timestamp + 1 != map->timestamp &&
	    !vm_map_lookup_entry(map, oldaddr, &oldentry)) {
		__kmem_entry_not_found_panic(map, req_oldaddr);
	}

	if (flags & KMR_KOBJECT) {
		assert(oldentry->in_transition);
		oldentry->in_transition = false;
		if (oldentry->needs_wakeup) {
			needs_wakeup = true;
			oldentry->needs_wakeup = false;
		}
	}

	if (flags & KMR_FREEOLD) {
		vmr_flags_t vmr_flags = VM_MAP_REMOVE_KUNWIRE;

#if KASAN_CLASSIC
		if (flags & KMR_KASAN_GUARD) {
			kasan_poison_range(oldaddr, oldsize, ASAN_VALID);
		}
#endif
#if KASAN_TBI
		if (flags & KMR_TAG) {
			kasan_tbi_mark_free_space(req_oldaddr, oldsize);
		}
#endif /* KASAN_TBI */
		if (flags & KMR_GUARD_LAST) {
			vmr_flags |= VM_MAP_REMOVE_NOKUNWIRE_LAST;
		}
		(void)vm_map_remove_and_unlock(map,
		    oldaddr, oldaddr + oldsize,
		    vmr_flags, guard);
	} else {
		vm_map_unlock(map);
	}

	if ((flags & KMR_KOBJECT) == 0) {
		kern_return_t kr;
		/*
		 * This must happen _after_ we do the KMR_FREEOLD,
		 * because wiring the pages will call into the pmap,
		 * and if the pages are typed XNU_KERNEL_RESTRICTED,
		 * this would cause a second mapping of the page and panic.
		 */
		kr = vm_map_wire_kernel(map,
		    vm_sanitize_wrap_addr(newaddr),
		    vm_sanitize_wrap_addr(newaddr + newsize),
		    vm_sanitize_wrap_prot(VM_PROT_DEFAULT),
		    guard.kmg_tag, FALSE);
		assert(kr == KERN_SUCCESS);

		if (flags & KMR_FREEOLD) {
			/*
			 * Undo the extra "wiring" we made above
			 * and release the extra reference we took
			 * on the object.
			 */
			vm_object_lock(object);
			for (vm_object_offset_t offset = 0;
			    offset < oldsize - guard_right_size;
			    offset += PAGE_SIZE_64) {
				vm_page_t mem;

				mem = vm_page_lookup(object, offset);
				assert(mem != VM_PAGE_NULL);
				assertf(!VM_PAGE_PAGEABLE(mem),
				    "mem %p qstate %d",
				    mem, mem->vmp_q_state);
				if (VM_PAGE_GET_PHYS_PAGE(mem) == vm_page_guard_addr) {
					/* guard pages are not wired */
				} else {
					assertf(VM_PAGE_WIRED(mem),
					    "mem %p qstate %d wirecount %d",
					    mem,
					    mem->vmp_q_state,
					    mem->vmp_wire_count);
					assertf(mem->vmp_wire_count >= 2,
					    "mem %p wirecount %d",
					    mem, mem->vmp_wire_count);
					mem->vmp_wire_count--;
					assert(VM_PAGE_WIRED(mem));
					assert(mem->vmp_wire_count >= 1);
				}
			}
			vm_object_unlock(object);
			vm_object_deallocate(object); /* release extra ref */
		}
	}

	if (needs_wakeup) {
		vm_map_entry_wakeup(map);
	}

#if DEBUG || DEVELOPMENT
	VM_DEBUG_CONSTANT_EVENT(vm_kern_request, DBG_VM_KERN_REQUEST, DBG_FUNC_END,
	    atop(newsize - oldsize), 0, 0, 0);
#endif /* DEBUG || DEVELOPMENT */
	kmr.kmr_address = newaddr;

#if KASAN
	kasan_notify_address(kmr.kmr_address, newsize);
#endif /* KASAN */
#if KASAN_CLASSIC
	if (flags & KMR_KASAN_GUARD) {
		kmr.kmr_address += PAGE_SIZE;
		kasan_alloc_large(kmr.kmr_address, req_newsize);
	}
#endif /* KASAN_CLASSIC */
#if KASAN_TBI
	if (flags & KMR_TAG) {
		kmr.kmr_address = vm_memtag_assign_tag(kmr.kmr_address, req_newsize);
		vm_memtag_set_tag(kmr.kmr_address, req_newsize);
		kasan_tbi_retag_unused_space(kmr.kmr_address, newsize, req_newsize);
	}
#endif /* KASAN_TBI */

	return kmr;
}

#pragma mark map/remap/wire

kern_return_t
mach_vm_map_kernel(
	vm_map_t                target_map,
	mach_vm_offset_ut      *address,
	mach_vm_size_ut         initial_size,
	mach_vm_offset_ut       mask,
	vm_map_kernel_flags_t   vmk_flags,
	ipc_port_t              port,
	memory_object_offset_ut offset,
	boolean_t               copy,
	vm_prot_ut              cur_protection,
	vm_prot_ut              max_protection,
	vm_inherit_ut           inheritance)
{
	/* range_id is set by vm_map_enter_mem_object */
	return vm_map_enter_mem_object(target_map,
	           address,
	           initial_size,
	           mask,
	           vmk_flags,
	           port,
	           offset,
	           copy,
	           cur_protection,
	           max_protection,
	           inheritance,
	           NULL,
	           0);
}

kern_return_t
mach_vm_remap_new_kernel(
	vm_map_t                target_map,
	mach_vm_offset_ut      *address,
	mach_vm_size_ut         size,
	mach_vm_offset_ut       mask,
	vm_map_kernel_flags_t   vmk_flags,
	vm_map_t                src_map,
	mach_vm_offset_ut       memory_address,
	boolean_t               copy,
	vm_prot_ut             *cur_protection,   /* IN/OUT */
	vm_prot_ut             *max_protection,   /* IN/OUT */
	vm_inherit_ut           inheritance)
{
	if (!vm_map_kernel_flags_check_vm_and_kflags(vmk_flags,
	    VM_FLAGS_USER_REMAP)) {
		return KERN_INVALID_ARGUMENT;
	}


	vmk_flags.vmf_return_data_addr = true;

	/* range_id is set by vm_map_remap */
	return vm_map_remap(target_map,
	           address,
	           size,
	           mask,
	           vmk_flags,
	           src_map,
	           memory_address,
	           copy,
	           cur_protection,
	           max_protection,
	           inheritance);
}

#pragma mark free

#if KASAN

__abortlike
static void
__kmem_free_invalid_object_size_panic(
	vm_map_t                map,
	vm_address_t            address,
	vm_size_t               size,
	vm_map_entry_t          entry)
{
	vm_object_t object  = VME_OBJECT(entry);
	vm_size_t   objsize = __kmem_entry_orig_size(entry);

	panic("kmem_free(map=%p, addr=%p, size=%zd, entry=%p): "
	    "object %p has unexpected size %ld",
	    map, (void *)address, (size_t)size, entry, object, objsize);
}

#endif /* KASAN */

vm_size_t
kmem_free_guard(
	vm_map_t        map,
	vm_offset_t     req_addr,
	vm_size_t       req_size,
	kmf_flags_t     flags,
	kmem_guard_t    guard)
{
	vmr_flags_t     vmr_flags = VM_MAP_REMOVE_KUNWIRE;
	vm_address_t    addr      = req_addr;
	vm_offset_t     delta     = 0;
	vm_size_t       size;
#if KASAN
	vm_map_entry_t  entry;
#endif /* KASAN */

	assert(map->pmap == kernel_pmap);

#if KASAN_CLASSIC
	if (flags & KMF_KASAN_GUARD) {
		addr  -= PAGE_SIZE;
		delta  = ptoa(2);
	}
#endif /* KASAN_CLASSIC */
#if CONFIG_KERNEL_TAGGING
	if (flags & KMF_TAG) {
		vm_memtag_verify_tag(req_addr);
		addr = vm_memtag_canonicalize_address(req_addr);
	}
#endif /* CONFIG_KERNEL_TAGGING */

	if (flags & KMF_GUESS_SIZE) {
		vmr_flags |= VM_MAP_REMOVE_GUESS_SIZE;
		size = PAGE_SIZE;
	} else if (req_size == 0) {
		__kmem_invalid_size_panic(map, req_size, flags);
	} else {
		size = round_page(req_size) + delta;
	}

	vm_map_lock(map);

#if KASAN
	if (!vm_map_lookup_entry(map, addr, &entry)) {
		__kmem_entry_not_found_panic(map, req_addr);
	}
	if (flags & KMF_GUESS_SIZE) {
		vmr_flags &= ~VM_MAP_REMOVE_GUESS_SIZE;
		req_size = __kmem_entry_orig_size(entry);
		size = round_page(req_size + delta);
	} else if (guard.kmg_atomic && entry->vme_kernel_object &&
	    __kmem_entry_orig_size(entry) != req_size) {
		/*
		 * We can't make a strict check for regular
		 * VM objects because it could be:
		 *
		 * - the kmem_guard_free() of a kmem_realloc_guard() without
		 *   KMR_FREEOLD, and in that case the object size won't match.
		 *
		 * - a submap, in which case there is no "orig size".
		 */
		__kmem_free_invalid_object_size_panic(map,
		    req_addr, req_size + delta, entry);
	}
#endif /* KASAN */
#if KASAN_CLASSIC
	if (flags & KMR_KASAN_GUARD) {
		kasan_poison_range(addr, size, ASAN_VALID);
	}
#endif
#if KASAN_TBI
	if (flags & KMF_TAG) {
		kasan_tbi_mark_free_space(req_addr, size);
	}
#endif /* KASAN_TBI */

	/*
	 * vm_map_remove_and_unlock is called with VM_MAP_REMOVE_KUNWIRE, which
	 * unwires the kernel mapping. The page won't be mapped any longer so
	 * there is no extra step that is required for memory tagging to "clear"
	 * it -- the page will be later laundered when reused.
	 */
	return vm_map_remove_and_unlock(map, addr, addr + size,
	           vmr_flags, guard).kmr_size - delta;
}

__exported void
kmem_free_external(
	vm_map_t        map,
	vm_offset_t     addr,
	vm_size_t       size);
void
kmem_free_external(
	vm_map_t        map,
	vm_offset_t     addr,
	vm_size_t       size)
{
	if (size) {
		kmem_free(map, trunc_page(addr), size);
#if MACH_ASSERT
	} else {
		printf("kmem_free(map=%p, addr=%p) called with size=0, lr: %p\n",
		    map, (void *)addr, __builtin_return_address(0));
#endif
	}
}

#pragma mark kmem metadata

/*
 * Guard objects for kmem pointer allocation:
 *
 * Guard objects introduce size slabs to kmem pointer allocations that are
 * allocated in chunks of n * sizeclass. When an allocation of a specific
 * sizeclass is requested a random slot from [0, n) is returned.
 * Allocations are returned from that chunk until m slots are left. The
 * remaining m slots are referred to as guard objects. They don't get
 * allocated and the chunk is now considered full. When an allocation is
 * freed to the chunk 1 slot is now available from m + 1 for the next
 * allocation of that sizeclass.
 *
 * Guard objects are intended to make exploitation of use after frees harder
 * as allocations that are freed can no longer be reliable reallocated.
 * They also make exploitation of OOBs harder as overflowing out of an
 * allocation can no longer be safe even with sufficient spraying.
 */

#define KMEM_META_PRIMARY    UINT8_MAX
#define KMEM_META_START     (UINT8_MAX - 1)
#define KMEM_META_FREE      (UINT8_MAX - 2)
#if __ARM_16K_PG__
#define KMEM_MIN_SIZE        PAGE_SIZE
#define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 16)
#else /* __ARM_16K_PG__ */
/*
 * PAGE_SIZE isn't a compile time constant on some arm64 devices. Those
 * devices use 4k page size when their RAM is <= 1GB and 16k otherwise.
 * Therefore populate sizeclasses from 4k for those devices.
 */
#define KMEM_MIN_SIZE       (4 * 1024)
#define KMEM_CHUNK_SIZE_MIN (KMEM_MIN_SIZE * 32)
#endif /* __ARM_16K_PG__ */
#define KMEM_MAX_SIZE       (32ULL << 20)
#define KMEM_START_IDX      (kmem_log2down(KMEM_MIN_SIZE))
#define KMEM_LAST_IDX       (kmem_log2down(KMEM_MAX_SIZE))
#define KMEM_NUM_SIZECLASS  (KMEM_LAST_IDX - KMEM_START_IDX + 1)
#define KMEM_FRONTS         (KMEM_RANGE_ID_NUM_PTR * 2)
#define KMEM_NUM_GUARDS      2

struct kmem_page_meta {
	union {
		/*
		 * On primary allocated chunk with KMEM_META_PRIMARY marker
		 */
		uint32_t km_bitmap;
		/*
		 * On start and end of free chunk with KMEM_META_FREE marker
		 */
		uint32_t km_free_chunks;
	};
	/*
	 * KMEM_META_PRIMARY: Start meta of allocated chunk
	 * KMEM_META_FREE   : Start and end meta of free chunk
	 * KMEM_META_START  : Meta region start and end
	 */
	uint8_t  km_page_marker;
	uint8_t  km_sizeclass;
	union {
		/*
		 * On primary allocated chunk with KMEM_META_PRIMARY marker
		 */
		uint16_t km_chunk_len;
		/*
		 * On secondary allocated chunks
		 */
		uint16_t km_page_idx;
	};
	LIST_ENTRY(kmem_page_meta) km_link;
} kmem_page_meta_t;

typedef LIST_HEAD(kmem_list_head, kmem_page_meta) kmem_list_head_t;
struct kmem_sizeclass {
	vm_map_size_t                   ks_size;
	uint32_t                        ks_num_chunk;
	uint32_t                        ks_num_elem;
	crypto_random_ctx_t __zpercpu   ks_rng_ctx;
	kmem_list_head_t                ks_allfree_head[KMEM_FRONTS];
	kmem_list_head_t                ks_partial_head[KMEM_FRONTS];
	kmem_list_head_t                ks_full_head[KMEM_FRONTS];
};

static struct kmem_sizeclass kmem_size_array[KMEM_NUM_SIZECLASS];

/*
 * Locks to synchronize metadata population
 */
static LCK_GRP_DECLARE(kmem_locks_grp, "kmem_locks");
static LCK_MTX_DECLARE(kmem_meta_region_lck, &kmem_locks_grp);
#define kmem_meta_lock()   lck_mtx_lock(&kmem_meta_region_lck)
#define kmem_meta_unlock() lck_mtx_unlock(&kmem_meta_region_lck)

static SECURITY_READ_ONLY_LATE(struct mach_vm_range)
kmem_meta_range[KMEM_RANGE_ID_NUM_PTR + 1];
static SECURITY_READ_ONLY_LATE(struct kmem_page_meta *)
kmem_meta_base[KMEM_RANGE_ID_NUM_PTR + 1];
/*
 * Keeps track of metadata high water mark for each front
 */
static struct kmem_page_meta *kmem_meta_hwm[KMEM_FRONTS];
static SECURITY_READ_ONLY_LATE(vm_map_t)
kmem_meta_map[KMEM_RANGE_ID_NUM_PTR + 1];
static vm_map_size_t kmem_meta_size;

static uint32_t
kmem_get_front(
	kmem_range_id_t         range_id,
	bool                    from_right)
{
	assert((range_id >= KMEM_RANGE_ID_FIRST) &&
	    (range_id <= KMEM_RANGE_ID_NUM_PTR));
	return (range_id - KMEM_RANGE_ID_FIRST) * 2 + from_right;
}

static inline uint32_t
kmem_slot_idx_to_bit(
	uint32_t                slot_idx,
	uint32_t                size_idx __unused)
{
	assert(slot_idx < kmem_size_array[size_idx].ks_num_elem);
	return 1ull << slot_idx;
}

static uint32_t
kmem_get_idx_from_size(vm_map_size_t size)
{
	assert(size >= KMEM_MIN_SIZE && size <= KMEM_MAX_SIZE);
	return kmem_log2down(size - 1) - KMEM_START_IDX + 1;
}

__abortlike
static void
kmem_invalid_size_idx(uint32_t idx)
{
	panic("Invalid sizeclass idx %u", idx);
}

static vm_map_size_t
kmem_get_size_from_idx(uint32_t idx)
{
	if (__improbable(idx >= KMEM_NUM_SIZECLASS)) {
		kmem_invalid_size_idx(idx);
	}
	return 1ul << (idx + KMEM_START_IDX);
}

static inline uint16_t
kmem_get_page_idx(struct kmem_page_meta *meta)
{
	uint8_t page_marker = meta->km_page_marker;

	return (page_marker == KMEM_META_PRIMARY) ? 0 : meta->km_page_idx;
}

__abortlike
static void
kmem_invalid_chunk_len(struct kmem_page_meta *meta)
{
	panic("Reading free chunks for meta %p where marker != KMEM_META_PRIMARY",
	    meta);
}

static inline uint16_t
kmem_get_chunk_len(struct kmem_page_meta *meta)
{
	if (__improbable(meta->km_page_marker != KMEM_META_PRIMARY)) {
		kmem_invalid_chunk_len(meta);
	}

	return meta->km_chunk_len;
}

__abortlike
static void
kmem_invalid_free_chunk_len(struct kmem_page_meta *meta)
{
	panic("Reading free chunks for meta %p where marker != KMEM_META_FREE",
	    meta);
}

static inline uint32_t
kmem_get_free_chunk_len(struct kmem_page_meta *meta)
{
	if (__improbable(meta->km_page_marker != KMEM_META_FREE)) {
		kmem_invalid_free_chunk_len(meta);
	}

	return meta->km_free_chunks;
}

/*
 * Return the metadata corresponding to the specified address
 */
static struct kmem_page_meta *
kmem_addr_to_meta(
	vm_map_offset_t         addr,
	vm_map_range_id_t       range_id,
	vm_map_offset_t        *range_start,
	uint64_t               *meta_idx)
{
	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];

	*range_start = kmem_ranges[range_id].min_address;
	*meta_idx = (addr - *range_start) / KMEM_CHUNK_SIZE_MIN;
	return &meta_base[*meta_idx];
}

/*
 * Return the metadata start of the chunk that the address belongs to
 */
static struct kmem_page_meta *
kmem_addr_to_meta_start(
	vm_address_t            addr,
	vm_map_range_id_t       range_id,
	vm_map_offset_t        *chunk_start)
{
	vm_map_offset_t range_start;
	uint64_t meta_idx;
	struct kmem_page_meta *meta;

	meta = kmem_addr_to_meta(addr, range_id, &range_start, &meta_idx);
	meta_idx -= kmem_get_page_idx(meta);
	meta -= kmem_get_page_idx(meta);
	assert(meta->km_page_marker == KMEM_META_PRIMARY);
	*chunk_start = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN);
	return meta;
}

__startup_func
static void
kmem_init_meta_front(
	struct kmem_page_meta  *meta,
	kmem_range_id_t         range_id,
	bool                    from_right)
{
	kernel_memory_populate(trunc_page((vm_map_offset_t) meta), PAGE_SIZE,
	    KMA_KOBJECT | KMA_ZERO | KMA_NOFAIL, VM_KERN_MEMORY_OSFMK);
	meta->km_page_marker = KMEM_META_START;
	if (!from_right) {
		meta++;
		kmem_meta_base[range_id] = meta;
	}
	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta;
}

__startup_func
static void
kmem_metadata_init(void)
{
	for (kmem_range_id_t i = KMEM_RANGE_ID_FIRST; i <= kmem_ptr_ranges; i++) {
		vm_map_offset_t addr = kmem_meta_range[i].min_address;
		struct kmem_page_meta *meta;
		uint64_t meta_idx;

		vm_map_will_allocate_early_map(&kmem_meta_map[i]);
		kmem_meta_map[i] = kmem_suballoc(kernel_map, &addr, kmem_meta_size,
		    VM_MAP_CREATE_NEVER_FAULTS | VM_MAP_CREATE_DISABLE_HOLELIST,
		    VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, KMS_PERMANENT | KMS_NOFAIL,
		    VM_KERN_MEMORY_OSFMK).kmr_submap;

		kmem_meta_range[i].min_address = addr;
		kmem_meta_range[i].max_address = addr + kmem_meta_size;

		meta = (struct kmem_page_meta *) kmem_meta_range[i].min_address;
		kmem_init_meta_front(meta, i, 0);

		meta = kmem_addr_to_meta(kmem_ranges[i].max_address, i, &addr,
		    &meta_idx);
		kmem_init_meta_front(meta, i, 1);
	}
}

__startup_func
static void
kmem_init_front_head(
	struct kmem_sizeclass  *ks,
	uint32_t                front)
{
	LIST_INIT(&ks->ks_allfree_head[front]);
	LIST_INIT(&ks->ks_partial_head[front]);
	LIST_INIT(&ks->ks_full_head[front]);
}

__startup_func
static void
kmem_sizeclass_init(void)
{
	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
		struct kmem_sizeclass *ks = &kmem_size_array[i];
		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST;

		ks->ks_size = kmem_get_size_from_idx(i);
		ks->ks_num_chunk = roundup(8 * ks->ks_size, KMEM_CHUNK_SIZE_MIN) /
		    KMEM_CHUNK_SIZE_MIN;
		ks->ks_num_elem = (ks->ks_num_chunk * KMEM_CHUNK_SIZE_MIN) / ks->ks_size;
		assert(ks->ks_num_elem <=
		    (sizeof(((struct kmem_page_meta *)0)->km_bitmap) * 8));
		for (; range_id <= KMEM_RANGE_ID_NUM_PTR; range_id++) {
			kmem_init_front_head(ks, kmem_get_front(range_id, 0));
			kmem_init_front_head(ks, kmem_get_front(range_id, 1));
		}
	}
}

/*
 * This is done during EARLY_BOOT as it needs the corecrypto module to be
 * set up.
 */
__startup_func
static void
kmem_crypto_init(void)
{
	vm_size_t ctx_size = crypto_random_kmem_ctx_size();

	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
		struct kmem_sizeclass *ks = &kmem_size_array[i];

		ks->ks_rng_ctx = zalloc_percpu_permanent(ctx_size, ZALIGN_PTR);
		zpercpu_foreach(ctx, ks->ks_rng_ctx) {
			crypto_random_kmem_init(ctx);
		}
	}
}
STARTUP(EARLY_BOOT, STARTUP_RANK_MIDDLE, kmem_crypto_init);

__abortlike
static void
kmem_validate_slot_panic(
	vm_map_offset_t         addr,
	struct kmem_page_meta  *meta,
	uint32_t                slot_idx,
	uint32_t                size_idx)
{
	if (meta->km_page_marker != KMEM_META_PRIMARY) {
		panic("Metadata (%p) for addr (%p) not primary", meta, (void *)addr);
	}
	if (meta->km_sizeclass != size_idx) {
		panic("Metadata's (%p) sizeclass (%u != %u) changed during deletion",
		    meta, meta->km_sizeclass, size_idx);
	}
	panic("Double free detected: Slot (%u) in meta (%p) for addr %p marked free",
	    slot_idx, meta, (void *)addr);
}

__abortlike
static void
kmem_invalid_slot_for_addr(
	mach_vm_range_t         slot,
	vm_map_offset_t         start,
	vm_map_offset_t         end)
{
	panic("Invalid kmem ptr slot [%p:%p] for allocation [%p:%p]",
	    (void *)slot->min_address, (void *)slot->max_address,
	    (void *)start, (void *)end);
}

void
kmem_validate_slot(
	vm_map_offset_t         addr,
	struct kmem_page_meta  *meta,
	uint32_t                size_idx,
	uint32_t                slot_idx)
{
	if ((meta->km_page_marker != KMEM_META_PRIMARY) ||
	    (meta->km_sizeclass != size_idx) ||
	    ((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) != 0)) {
		kmem_validate_slot_panic(addr, meta, size_idx, slot_idx);
	}
}

static void
kmem_validate_slot_initial(
	mach_vm_range_t         slot,
	vm_map_offset_t         start,
	vm_map_offset_t         end,
	struct kmem_page_meta  *meta,
	uint32_t                size_idx,
	uint32_t                slot_idx)
{
	if ((slot->min_address == 0) || (slot->max_address == 0) ||
	    (start < slot->min_address) || (start >= slot->max_address) ||
	    (end > slot->max_address)) {
		kmem_invalid_slot_for_addr(slot, start, end);
	}

	kmem_validate_slot(start, meta, size_idx, slot_idx);
}

uint32_t
kmem_addr_get_slot_idx(
	vm_map_offset_t         start,
	vm_map_offset_t         end,
	vm_map_range_id_t       range_id,
	struct kmem_page_meta **meta,
	uint32_t               *size_idx,
	mach_vm_range_t         slot)
{
	vm_map_offset_t chunk_start;
	vm_map_size_t slot_size;
	uint32_t slot_idx;

	*meta = kmem_addr_to_meta_start(start, range_id, &chunk_start);
	*size_idx = (*meta)->km_sizeclass;
	slot_size = kmem_get_size_from_idx(*size_idx);
	slot_idx = (start - chunk_start) / slot_size;
	slot->min_address = chunk_start + slot_idx * slot_size;
	slot->max_address = slot->min_address + slot_size;

	kmem_validate_slot_initial(slot, start, end, *meta, *size_idx, slot_idx);

	return slot_idx;
}

static bool
kmem_populate_needed(vm_offset_t from, vm_offset_t to)
{
#if KASAN
#pragma unused(from, to)
	return true;
#else
	vm_offset_t page_addr = trunc_page(from);

	for (; page_addr < to; page_addr += PAGE_SIZE) {
		/*
		 * This can race with another thread doing a populate on the same metadata
		 * page, where we see an updated pmap but unmapped KASan shadow, causing a
		 * fault in the shadow when we first access the metadata page. Avoid this
		 * by always synchronizing on the kmem_meta_lock with KASan.
		 */
		if (!pmap_find_phys(kernel_pmap, page_addr)) {
			return true;
		}
	}

	return false;
#endif /* !KASAN */
}

static void
kmem_populate_meta_locked(vm_offset_t from, vm_offset_t to)
{
	vm_offset_t page_addr = trunc_page(from);

	vm_map_unlock(kernel_map);

	for (; page_addr < to; page_addr += PAGE_SIZE) {
		for (;;) {
			kern_return_t ret = KERN_SUCCESS;

			/*
			 * All updates to kmem metadata are done under the kmem_meta_lock
			 */
			kmem_meta_lock();
			if (0 == pmap_find_phys(kernel_pmap, page_addr)) {
				ret = kernel_memory_populate(page_addr,
				    PAGE_SIZE, KMA_NOPAGEWAIT | KMA_KOBJECT | KMA_ZERO,
				    VM_KERN_MEMORY_OSFMK);
			}
			kmem_meta_unlock();

			if (ret == KERN_SUCCESS) {
				break;
			}

			/*
			 * We can't pass KMA_NOPAGEWAIT under a global lock as it leads
			 * to bad system deadlocks, so if the allocation failed,
			 * we need to do the VM_PAGE_WAIT() outside of the lock.
			 */
			VM_PAGE_WAIT();
		}
	}

	vm_map_lock(kernel_map);
}

__abortlike
static void
kmem_invalid_meta_panic(
	struct kmem_page_meta  *meta,
	uint32_t                slot_idx,
	struct kmem_sizeclass   sizeclass)
{
	uint32_t size_idx = kmem_get_idx_from_size(sizeclass.ks_size);

	if (slot_idx >= sizeclass.ks_num_elem) {
		panic("Invalid slot idx %u [0:%u] for meta %p", slot_idx,
		    sizeclass.ks_num_elem, meta);
	}
	if (meta->km_sizeclass != size_idx) {
		panic("Invalid size_idx (%u != %u) in meta %p", size_idx,
		    meta->km_sizeclass, meta);
	}
	panic("page_marker %u not primary in meta %p", meta->km_page_marker, meta);
}

__abortlike
static void
kmem_slot_has_entry_panic(
	vm_map_entry_t          entry,
	vm_map_offset_t         addr)
{
	panic("Entry (%p) already exists for addr (%p) being returned",
	    entry, (void *)addr);
}

__abortlike
static void
kmem_slot_not_found(
	struct kmem_page_meta  *meta,
	uint32_t                slot_idx)
{
	panic("%uth free slot not found for meta %p bitmap %u", slot_idx, meta,
	    meta->km_bitmap);
}

/*
 * Returns a 16bit random number between 0 and
 * upper_limit (inclusive)
 */
__startup_func
uint16_t
kmem_get_random16(
	uint16_t                upper_limit)
{
	static uint64_t random_entropy;
	assert(upper_limit < UINT16_MAX);
	if (random_entropy == 0) {
		random_entropy = early_random();
	}
	uint32_t result = random_entropy & UINT32_MAX;
	random_entropy >>= 32;
	return (uint16_t)(result % (upper_limit + 1));
}

static uint32_t
kmem_get_nth_free_slot(
	struct kmem_page_meta  *meta,
	uint32_t                n,
	uint32_t                bitmap)
{
	uint32_t zeros_seen = 0, ones_seen = 0;

	while (bitmap) {
		uint32_t count = __builtin_ctz(bitmap);

		zeros_seen += count;
		bitmap >>= count;
		if (__probable(~bitmap)) {
			count = __builtin_ctz(~bitmap);
		} else {
			count = 32;
		}
		if (count + ones_seen > n) {
			return zeros_seen + n;
		}
		ones_seen += count;
		bitmap >>= count;
	}

	kmem_slot_not_found(meta, n);
}


static uint32_t
kmem_get_next_slot(
	struct kmem_page_meta  *meta,
	struct kmem_sizeclass   sizeclass,
	uint32_t                bitmap)
{
	uint32_t num_slots = __builtin_popcount(bitmap);
	uint64_t slot_idx = 0;

	assert(num_slots > 0);
	if (__improbable(startup_phase < STARTUP_SUB_EARLY_BOOT)) {
		/*
		 * Use early random prior to early boot as the ks_rng_ctx requires
		 * the corecrypto module to be setup before it is initialized and
		 * used.
		 *
		 * num_slots can't be 0 as we take this path when we have more than
		 * one slot left.
		 */
		slot_idx = kmem_get_random16((uint16_t)num_slots - 1);
	} else {
		crypto_random_uniform(zpercpu_get(sizeclass.ks_rng_ctx), num_slots,
		    &slot_idx);
	}

	return kmem_get_nth_free_slot(meta, slot_idx, bitmap);
}

/*
 * Returns an unallocated slot from the given metadata
 */
static vm_map_offset_t
kmem_get_addr_from_meta(
	struct kmem_page_meta  *meta,
	vm_map_range_id_t       range_id,
	struct kmem_sizeclass   sizeclass,
	vm_map_entry_t         *entry)
{
	vm_map_offset_t addr;
	vm_map_size_t size = sizeclass.ks_size;
	uint32_t size_idx = kmem_get_idx_from_size(size);
	uint64_t meta_idx = meta - kmem_meta_base[range_id];
	mach_vm_offset_t range_start = kmem_ranges[range_id].min_address;
	uint32_t slot_bit;
	uint32_t slot_idx = kmem_get_next_slot(meta, sizeclass, meta->km_bitmap);

	if ((slot_idx >= sizeclass.ks_num_elem) ||
	    (meta->km_sizeclass != size_idx) ||
	    (meta->km_page_marker != KMEM_META_PRIMARY)) {
		kmem_invalid_meta_panic(meta, slot_idx, sizeclass);
	}

	slot_bit = kmem_slot_idx_to_bit(slot_idx, size_idx);
	meta->km_bitmap &= ~slot_bit;

	addr = range_start + (meta_idx * KMEM_CHUNK_SIZE_MIN) + (slot_idx * size);
	assert(kmem_range_contains_fully(range_id, addr, size));
	if (vm_map_lookup_entry(kernel_map, addr, entry)) {
		kmem_slot_has_entry_panic(*entry, addr);
	}
	if ((*entry != vm_map_to_entry(kernel_map)) &&
	    ((*entry)->vme_next != vm_map_to_entry(kernel_map)) &&
	    ((*entry)->vme_next->vme_start < (addr + size))) {
		kmem_slot_has_entry_panic(*entry, addr);
	}
	return addr;
}

__abortlike
static void
kmem_range_out_of_va(
	kmem_range_id_t         range_id,
	uint32_t                num_chunks)
{
	panic("No more VA to allocate %u chunks in range %u", num_chunks, range_id);
}

static void
kmem_init_allocated_chunk(
	struct kmem_page_meta  *meta,
	struct kmem_sizeclass   sizeclass,
	uint32_t                size_idx)
{
	uint32_t meta_num = sizeclass.ks_num_chunk;
	uint32_t num_elem = sizeclass.ks_num_elem;

	meta->km_bitmap = (1ull << num_elem) - 1;
	meta->km_chunk_len = (uint16_t)meta_num;
	assert(LIST_NEXT(meta, km_link) == NULL);
	assert(meta->km_link.le_prev == NULL);
	meta->km_sizeclass = (uint8_t)size_idx;
	meta->km_page_marker = KMEM_META_PRIMARY;
	meta++;
	for (uint32_t i = 1; i < meta_num; i++) {
		meta->km_page_idx = (uint16_t)i;
		meta->km_sizeclass = (uint8_t)size_idx;
		meta->km_page_marker = 0;
		meta->km_bitmap = 0;
		meta++;
	}
}

static uint32_t
kmem_get_additional_meta(
	struct kmem_page_meta  *meta,
	uint32_t                meta_req,
	bool                    from_right,
	struct kmem_page_meta **adj_free_meta)
{
	struct kmem_page_meta *meta_prev = from_right ? meta : (meta - 1);

	if (meta_prev->km_page_marker == KMEM_META_FREE) {
		uint32_t chunk_len = kmem_get_free_chunk_len(meta_prev);

		*adj_free_meta = from_right ? meta_prev : (meta_prev - chunk_len + 1);
		meta_req -= chunk_len;
	} else {
		*adj_free_meta = NULL;
	}

	return meta_req;
}


static struct kmem_page_meta *
kmem_get_new_chunk(
	vm_map_range_id_t       range_id,
	bool                    from_right,
	uint32_t                size_idx)
{
	struct kmem_sizeclass sizeclass = kmem_size_array[size_idx];
	struct kmem_page_meta *start, *end, *meta_update;
	struct kmem_page_meta *adj_free_meta = NULL;
	uint32_t meta_req = sizeclass.ks_num_chunk;

	for (;;) {
		struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
		struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
		struct kmem_page_meta *meta;
		vm_offset_t start_addr, end_addr;
		uint32_t meta_num;

		meta = from_right ? metab : metaf;
		meta_num = kmem_get_additional_meta(meta, meta_req, from_right,
		    &adj_free_meta);

		if (metaf + meta_num >= metab) {
			kmem_range_out_of_va(range_id, meta_num);
		}

		start = from_right ? (metab - meta_num) : metaf;
		end = from_right ? metab : (metaf + meta_num);

		start_addr = (vm_offset_t)start;
		end_addr   = (vm_offset_t)end;

		/*
		 * If the new high watermark stays on the same page,
		 * no need to populate and drop the lock.
		 */
		if (!page_aligned(from_right ? end_addr : start_addr) &&
		    trunc_page(start_addr) == trunc_page(end_addr - 1)) {
			break;
		}
		if (!kmem_populate_needed(start_addr, end_addr)) {
			break;
		}

		kmem_populate_meta_locked(start_addr, end_addr);

		/*
		 * Since we dropped the lock, reassess conditions still hold:
		 * - the HWM we are changing must not have moved
		 * - the other HWM must not intersect with ours
		 * - in case of coalescing, the adjacent free meta must still
		 *   be free and of the same size.
		 *
		 * If we failed to grow, reevaluate whether freelists have
		 * entries now by returning NULL.
		 */
		metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
		metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
		if (meta != (from_right ? metab : metaf)) {
			return NULL;
		}
		if (metaf + meta_num >= metab) {
			kmem_range_out_of_va(range_id, meta_num);
		}
		if (adj_free_meta) {
			if (adj_free_meta->km_page_marker != KMEM_META_FREE ||
			    kmem_get_free_chunk_len(adj_free_meta) !=
			    meta_req - meta_num) {
				return NULL;
			}
		}

		break;
	}

	/*
	 * If there is an adjacent free chunk remove it from free list
	 */
	if (adj_free_meta) {
		LIST_REMOVE(adj_free_meta, km_link);
		LIST_NEXT(adj_free_meta, km_link) = NULL;
		adj_free_meta->km_link.le_prev = NULL;
	}

	/*
	 * Update hwm
	 */
	meta_update = from_right ? start : end;
	kmem_meta_hwm[kmem_get_front(range_id, from_right)] = meta_update;

	/*
	 * Initialize metadata
	 */
	start = from_right ? start : (end - meta_req);
	kmem_init_allocated_chunk(start, sizeclass, size_idx);

	return start;
}

static void
kmem_requeue_meta(
	struct kmem_page_meta  *meta,
	struct kmem_list_head  *head)
{
	LIST_REMOVE(meta, km_link);
	LIST_INSERT_HEAD(head, meta, km_link);
}

/*
 * Return corresponding sizeclass to stash free chunks in
 */
__abortlike
static void
kmem_invalid_chunk_num(uint32_t chunks)
{
	panic("Invalid number of chunks %u\n", chunks);
}

static uint32_t
kmem_get_size_idx_for_chunks(uint32_t chunks)
{
	for (uint32_t i = KMEM_NUM_SIZECLASS - 1; i > 0; i--) {
		if (chunks >= kmem_size_array[i].ks_num_chunk) {
			return i;
		}
	}
	kmem_invalid_chunk_num(chunks);
}

static void
kmem_clear_meta_range(struct kmem_page_meta *meta, uint32_t count)
{
	bzero(meta, count * sizeof(struct kmem_page_meta));
}

static void
kmem_check_meta_range_is_clear(struct kmem_page_meta *meta, uint32_t count)
{
#if MACH_ASSERT
	size_t size = count * sizeof(struct kmem_page_meta);

	assert(memcmp_zero_ptr_aligned(meta, size) == 0);
#else
#pragma unused(meta, count)
#endif
}

/*!
 * @function kmem_init_free_chunk()
 *
 * @discussion
 * This function prepares a range of chunks to be put on a free list.
 * The first and last metadata might be dirty, but the "inner" ones
 * must be zero filled by the caller prior to calling this function.
 */
static void
kmem_init_free_chunk(
	struct kmem_page_meta  *meta,
	uint32_t                num_chunks,
	uint32_t                front)
{
	struct kmem_sizeclass *sizeclass;
	uint32_t size_idx = kmem_get_size_idx_for_chunks(num_chunks);

	if (num_chunks > 2) {
		kmem_check_meta_range_is_clear(meta + 1, num_chunks - 2);
	}

	meta[0] = (struct kmem_page_meta){
		.km_free_chunks = num_chunks,
		.km_page_marker = KMEM_META_FREE,
		.km_sizeclass   = (uint8_t)size_idx,
	};
	if (num_chunks > 1) {
		meta[num_chunks - 1] = (struct kmem_page_meta){
			.km_free_chunks = num_chunks,
			.km_page_marker = KMEM_META_FREE,
			.km_sizeclass   = (uint8_t)size_idx,
		};
	}

	sizeclass = &kmem_size_array[size_idx];
	LIST_INSERT_HEAD(&sizeclass->ks_allfree_head[front], meta, km_link);
}

static struct kmem_page_meta *
kmem_get_free_chunk_from_list(
	struct kmem_sizeclass  *org_sizeclass,
	uint32_t                size_idx,
	uint32_t                front)
{
	struct kmem_sizeclass *sizeclass;
	uint32_t num_chunks = org_sizeclass->ks_num_chunk;
	struct kmem_page_meta *meta;
	uint32_t idx = size_idx;

	while (idx < KMEM_NUM_SIZECLASS) {
		sizeclass = &kmem_size_array[idx];
		meta = LIST_FIRST(&sizeclass->ks_allfree_head[front]);
		if (meta) {
			break;
		}
		idx++;
	}

	/*
	 * Trim if larger in size
	 */
	if (meta) {
		uint32_t num_chunks_free = kmem_get_free_chunk_len(meta);

		assert(meta->km_page_marker == KMEM_META_FREE);
		LIST_REMOVE(meta, km_link);
		LIST_NEXT(meta, km_link) = NULL;
		meta->km_link.le_prev = NULL;
		if (num_chunks_free > num_chunks) {
			num_chunks_free -= num_chunks;
			kmem_init_free_chunk(meta + num_chunks, num_chunks_free, front);
		}

		kmem_init_allocated_chunk(meta, *org_sizeclass, size_idx);
	}

	return meta;
}

kern_return_t
kmem_locate_space(
	vm_map_size_t           size,
	vm_map_range_id_t       range_id,
	bool                    from_right,
	vm_map_offset_t        *start_inout,
	vm_map_entry_t         *entry_out)
{
	vm_map_entry_t entry;
	uint32_t size_idx = kmem_get_idx_from_size(size);
	uint32_t front = kmem_get_front(range_id, from_right);
	struct kmem_sizeclass *sizeclass = &kmem_size_array[size_idx];
	struct kmem_page_meta *meta;

	assert(size <= sizeclass->ks_size);
again:
	if ((meta = LIST_FIRST(&sizeclass->ks_partial_head[front])) != NULL) {
		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
		/*
		 * Requeue to full if necessary
		 */
		assert(meta->km_page_marker == KMEM_META_PRIMARY);
		if (__builtin_popcount(meta->km_bitmap) == KMEM_NUM_GUARDS) {
			kmem_requeue_meta(meta, &sizeclass->ks_full_head[front]);
		}
	} else if ((meta = kmem_get_free_chunk_from_list(sizeclass, size_idx,
	    front)) != NULL) {
		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
		/*
		 * Queue to partial
		 */
		assert(meta->km_page_marker == KMEM_META_PRIMARY);
		assert(__builtin_popcount(meta->km_bitmap) > KMEM_NUM_GUARDS);
		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
	} else {
		meta = kmem_get_new_chunk(range_id, from_right, size_idx);
		if (meta == NULL) {
			goto again;
		}
		*start_inout = kmem_get_addr_from_meta(meta, range_id, *sizeclass, &entry);
		assert(meta->km_page_marker == KMEM_META_PRIMARY);
		LIST_INSERT_HEAD(&sizeclass->ks_partial_head[front], meta, km_link);
	}

	if (entry_out) {
		*entry_out = entry;
	}

	return KERN_SUCCESS;
}

/*
 * Determine whether the given metadata was allocated from the right
 */
static bool
kmem_meta_is_from_right(
	kmem_range_id_t         range_id,
	struct kmem_page_meta  *meta)
{
	struct kmem_page_meta *metaf = kmem_meta_hwm[kmem_get_front(range_id, 0)];
	__assert_only struct kmem_page_meta *metab = kmem_meta_hwm[kmem_get_front(range_id, 1)];
	struct kmem_page_meta *meta_base = kmem_meta_base[range_id];
	struct kmem_page_meta *meta_end;

	meta_end = (struct kmem_page_meta *)kmem_meta_range[range_id].max_address;

	if ((meta >= meta_base) && (meta < metaf)) {
		return false;
	}

	assert(meta >= metab && meta < meta_end);
	return true;
}

static void
kmem_free_chunk(
	kmem_range_id_t         range_id,
	struct kmem_page_meta  *meta,
	bool                    from_right)
{
	struct kmem_page_meta *meta_coalesce = meta - 1;
	struct kmem_page_meta *meta_start = meta;
	uint32_t num_chunks = kmem_get_chunk_len(meta);
	uint32_t add_chunks;
	struct kmem_page_meta *meta_end = meta + num_chunks;
	struct kmem_page_meta *meta_hwm_l, *meta_hwm_r;
	uint32_t front = kmem_get_front(range_id, from_right);

	meta_hwm_l = kmem_meta_hwm[kmem_get_front(range_id, 0)];
	meta_hwm_r = kmem_meta_hwm[kmem_get_front(range_id, 1)];

	LIST_REMOVE(meta, km_link);
	kmem_clear_meta_range(meta, num_chunks);

	/*
	 * Coalesce left
	 */
	if (((from_right && (meta_coalesce >= meta_hwm_r)) || !from_right) &&
	    (meta_coalesce->km_page_marker == KMEM_META_FREE)) {
		meta_start = meta_coalesce - kmem_get_free_chunk_len(meta_coalesce) + 1;
		add_chunks = kmem_get_free_chunk_len(meta_start);
		num_chunks += add_chunks;
		LIST_REMOVE(meta_start, km_link);
		kmem_clear_meta_range(meta_start + add_chunks - 1, 1);
	}

	/*
	 * Coalesce right
	 */
	if (((!from_right && (meta_end < meta_hwm_l)) || from_right) &&
	    (meta_end->km_page_marker == KMEM_META_FREE)) {
		add_chunks = kmem_get_free_chunk_len(meta_end);
		LIST_REMOVE(meta_end, km_link);
		kmem_clear_meta_range(meta_end, 1);
		meta_end = meta_end + add_chunks;
		num_chunks += add_chunks;
	}

	kmem_init_free_chunk(meta_start, num_chunks, front);
}

static void
kmem_free_slot(
	kmem_range_id_t         range_id,
	mach_vm_range_t         slot)
{
	struct kmem_page_meta *meta;
	vm_map_offset_t chunk_start;
	uint32_t size_idx, chunk_elem, slot_idx, num_elem;
	struct kmem_sizeclass *sizeclass;
	vm_map_size_t slot_size;

	meta = kmem_addr_to_meta_start(slot->min_address, range_id, &chunk_start);
	size_idx = meta->km_sizeclass;
	slot_size = kmem_get_size_from_idx(size_idx);
	slot_idx = (slot->min_address - chunk_start) / slot_size;
	assert((meta->km_bitmap & kmem_slot_idx_to_bit(slot_idx, size_idx)) == 0);
	meta->km_bitmap |= kmem_slot_idx_to_bit(slot_idx, size_idx);

	sizeclass = &kmem_size_array[size_idx];
	chunk_elem = sizeclass->ks_num_elem;
	num_elem = __builtin_popcount(meta->km_bitmap);

	if (num_elem == chunk_elem) {
		/*
		 * If entire chunk empty add to emtpy list
		 */
		bool from_right = kmem_meta_is_from_right(range_id, meta);

		kmem_free_chunk(range_id, meta, from_right);
	} else if (num_elem == KMEM_NUM_GUARDS + 1) {
		/*
		 * If we freed to full chunk move it to partial
		 */
		uint32_t front = kmem_get_front(range_id,
		    kmem_meta_is_from_right(range_id, meta));

		kmem_requeue_meta(meta, &sizeclass->ks_partial_head[front]);
	}
}

void
kmem_free_space(
	vm_map_offset_t         start,
	vm_map_offset_t         end,
	vm_map_range_id_t       range_id,
	mach_vm_range_t         slot)
{
	bool entry_present = false;
	vm_map_entry_t prev_entry;
	vm_map_entry_t next_entry;

	if ((slot->min_address == start) && (slot->max_address == end)) {
		/*
		 * Entire slot is being freed at once
		 */
		return kmem_free_slot(range_id, slot);
	}

	entry_present = vm_map_lookup_entry(kernel_map, start, &prev_entry);
	assert(!entry_present);
	next_entry = prev_entry->vme_next;

	if (((prev_entry == vm_map_to_entry(kernel_map) ||
	    prev_entry->vme_end <= slot->min_address)) &&
	    (next_entry == vm_map_to_entry(kernel_map) ||
	    (next_entry->vme_start >= slot->max_address))) {
		/*
		 * Free entire slot
		 */
		kmem_free_slot(range_id, slot);
	}
}

#pragma mark kmem init

/*
 * The default percentage of memory that can be mlocked is scaled based on the total
 * amount of memory in the system. These percentages are caclulated
 * offline and stored in this table. We index this table by
 * log2(max_mem) - VM_USER_WIREABLE_MIN_CONFIG. We clamp this index in the range
 * [0, sizeof(wire_limit_percents) / sizeof(vm_map_size_t))
 *
 * Note that these values were picked for mac.
 * If we ever have very large memory config arm devices, we may want to revisit
 * since the kernel overhead is smaller there due to the larger page size.
 */

/* Start scaling iff we're managing > 2^32 = 4GB of RAM. */
#define VM_USER_WIREABLE_MIN_CONFIG 32
#if CONFIG_JETSAM
/* Systems with jetsam can wire a bit more b/c the system can relieve wired
 * pressure.
 */
static vm_map_size_t wire_limit_percents[] =
{ 80, 80, 80, 80, 82, 85, 88, 91, 94, 97};
#else
static vm_map_size_t wire_limit_percents[] =
{ 70, 73, 76, 79, 82, 85, 88, 91, 94, 97};
#endif /* CONFIG_JETSAM */

/*
 * Sets the default global user wire limit which limits the amount of
 * memory that can be locked via mlock() based on the above algorithm..
 * This can be overridden via a sysctl.
 */
static void
kmem_set_user_wire_limits(void)
{
	uint64_t available_mem_log;
	uint64_t max_wire_percent;
	size_t wire_limit_percents_length = sizeof(wire_limit_percents) /
	    sizeof(vm_map_size_t);
	vm_map_size_t limit;
	uint64_t config_memsize = max_mem;
#if defined(XNU_TARGET_OS_OSX)
	config_memsize = max_mem_actual;
#endif /* defined(XNU_TARGET_OS_OSX) */

	available_mem_log = bit_floor(config_memsize);

	if (available_mem_log < VM_USER_WIREABLE_MIN_CONFIG) {
		available_mem_log = 0;
	} else {
		available_mem_log -= VM_USER_WIREABLE_MIN_CONFIG;
	}
	if (available_mem_log >= wire_limit_percents_length) {
		available_mem_log = wire_limit_percents_length - 1;
	}
	max_wire_percent = wire_limit_percents[available_mem_log];

	limit = config_memsize * max_wire_percent / 100;
	/* Cap the number of non lockable bytes at VM_NOT_USER_WIREABLE_MAX */
	if (config_memsize - limit > VM_NOT_USER_WIREABLE_MAX) {
		limit = config_memsize - VM_NOT_USER_WIREABLE_MAX;
	}

	vm_global_user_wire_limit = limit;
	/* the default per task limit is the same as the global limit */
	vm_per_task_user_wire_limit = limit;
	vm_add_wire_count_over_global_limit = 0;
	vm_add_wire_count_over_user_limit = 0;
}

#define KMEM_MAX_CLAIMS 50
__startup_data
struct kmem_range_startup_spec kmem_claims[KMEM_MAX_CLAIMS] = {};
__startup_data
uint32_t kmem_claim_count = 0;

__startup_func
void
kmem_range_startup_init(
	struct kmem_range_startup_spec *sp)
{
	assert(kmem_claim_count < KMEM_MAX_CLAIMS - KMEM_RANGE_COUNT);
	if (sp->kc_calculate_sz) {
		sp->kc_size = (sp->kc_calculate_sz)();
	}
	if (sp->kc_size) {
		kmem_claims[kmem_claim_count] = *sp;
		kmem_claim_count++;
	}
}

static vm_offset_t
kmem_fuzz_start(void)
{
	vm_offset_t kmapoff_kaddr = 0;
	uint32_t kmapoff_pgcnt = (early_random() & 0x1ff) + 1; /* 9 bits */
	vm_map_size_t kmapoff_size = ptoa(kmapoff_pgcnt);

	kmem_alloc(kernel_map, &kmapoff_kaddr, kmapoff_size,
	    KMA_NOFAIL | KMA_KOBJECT | KMA_PERMANENT | KMA_VAONLY,
	    VM_KERN_MEMORY_OSFMK);
	return kmapoff_kaddr + kmapoff_size;
}

/*
 * Generate a randomly shuffled array of indices from 0 to count - 1
 */
__startup_func
void
kmem_shuffle(
	uint16_t       *shuffle_buf,
	uint16_t        count)
{
	for (uint16_t i = 0; i < count; i++) {
		uint16_t j = kmem_get_random16(i);
		if (j != i) {
			shuffle_buf[i] = shuffle_buf[j];
		}
		shuffle_buf[j] = i;
	}
}

__startup_func
static void
kmem_shuffle_claims(void)
{
	uint16_t shuffle_buf[KMEM_MAX_CLAIMS] = {};
	uint16_t limit = (uint16_t)kmem_claim_count;

	kmem_shuffle(&shuffle_buf[0], limit);
	for (uint16_t i = 0; i < limit; i++) {
		struct kmem_range_startup_spec tmp = kmem_claims[i];
		kmem_claims[i] = kmem_claims[shuffle_buf[i]];
		kmem_claims[shuffle_buf[i]] = tmp;
	}
}

__startup_func
static void
kmem_readjust_ranges(
	uint32_t        cur_idx)
{
	assert(cur_idx != 0);
	uint32_t j = cur_idx - 1, random;
	struct kmem_range_startup_spec sp = kmem_claims[cur_idx];
	struct mach_vm_range *sp_range = sp.kc_range;

	/*
	 * Find max index where restriction is met
	 */
	for (; j > 0; j--) {
		struct kmem_range_startup_spec spj = kmem_claims[j];
		vm_map_offset_t max_start = spj.kc_range->min_address;
		if (spj.kc_flags & KC_NO_MOVE) {
			panic("kmem_range_init: Can't scramble with multiple constraints");
		}
		if (max_start <= sp_range->min_address) {
			break;
		}
	}

	/*
	 * Pick a random index from 0 to max index and shift claims to the right
	 * to make room for restricted claim
	 */
	random = kmem_get_random16((uint16_t)j);
	assert(random <= j);

	sp_range->min_address = kmem_claims[random].kc_range->min_address;
	sp_range->max_address = sp_range->min_address + sp.kc_size;

	for (j = cur_idx - 1; j >= random && j != UINT32_MAX; j--) {
		struct kmem_range_startup_spec spj = kmem_claims[j];
		struct mach_vm_range *range = spj.kc_range;
		range->min_address += sp.kc_size;
		range->max_address += sp.kc_size;
		kmem_claims[j + 1] = spj;
	}

	sp.kc_flags = KC_NO_MOVE;
	kmem_claims[random] = sp;
}

__startup_func
static vm_map_size_t
kmem_add_ptr_claims(void)
{
	uint64_t kmem_meta_num, kmem_ptr_chunks;
	vm_map_size_t org_ptr_range_size = ptr_range_size;

	ptr_range_size -= PAGE_SIZE;
	ptr_range_size *= KMEM_CHUNK_SIZE_MIN;
	ptr_range_size /= (KMEM_CHUNK_SIZE_MIN + sizeof(struct kmem_page_meta));

	kmem_ptr_chunks = ptr_range_size / KMEM_CHUNK_SIZE_MIN;
	ptr_range_size = kmem_ptr_chunks * KMEM_CHUNK_SIZE_MIN;

	kmem_meta_num = kmem_ptr_chunks + 2;
	kmem_meta_size = round_page(kmem_meta_num * sizeof(struct kmem_page_meta));

	assert(kmem_meta_size + ptr_range_size <= org_ptr_range_size);
	/*
	 * Add claims for kmem's ranges
	 */
	for (uint32_t i = 0; i < kmem_ptr_ranges; i++) {
		struct kmem_range_startup_spec kmem_spec = {
			.kc_name = "kmem_ptr_range",
			.kc_range = &kmem_ranges[KMEM_RANGE_ID_PTR_0 + i],
			.kc_size = ptr_range_size,
			.kc_flags = KC_NO_ENTRY,
		};
		kmem_claims[kmem_claim_count++] = kmem_spec;

		struct kmem_range_startup_spec kmem_meta_spec = {
			.kc_name = "kmem_ptr_range_meta",
			.kc_range = &kmem_meta_range[KMEM_RANGE_ID_PTR_0 + i],
			.kc_size = kmem_meta_size,
			.kc_flags = KC_NONE,
		};
		kmem_claims[kmem_claim_count++] = kmem_meta_spec;
	}
	return (org_ptr_range_size - ptr_range_size - kmem_meta_size) *
	       kmem_ptr_ranges;
}

__startup_func
static void
kmem_add_extra_claims(void)
{
	vm_map_size_t largest_free_size = 0, total_claims = 0;

	vm_map_sizes(kernel_map, NULL, NULL, &largest_free_size);
	largest_free_size = trunc_page(largest_free_size);

	/*
	 * kasan and configs w/o *TRR need to have just one ptr range due to
	 * resource constraints.
	 */
#if !ZSECURITY_CONFIG(KERNEL_PTR_SPLIT)
	kmem_ptr_ranges = 1;
#endif
	/*
	 * Determine size of data and pointer kmem_ranges
	 */
	for (uint32_t i = 0; i < kmem_claim_count; i++) {
		total_claims += kmem_claims[i].kc_size;
	}
	assert((total_claims & PAGE_MASK) == 0);
	largest_free_size -= total_claims;

	/*
	 * Use half the total available VA for all pointer allocations (this
	 * includes the kmem_sprayqtn range). Given that we have 4 total
	 * ranges divide the available VA by 8.
	 */
	ptr_range_size = largest_free_size / ((kmem_ptr_ranges + 1) * 2);
	sprayqtn_range_size = ptr_range_size;

	if (sprayqtn_range_size > (sane_size / 2)) {
		sprayqtn_range_size = sane_size / 2;
	}

	ptr_range_size = round_page(ptr_range_size);
	sprayqtn_range_size = round_page(sprayqtn_range_size);


	data_range_size = largest_free_size
	    - (ptr_range_size * kmem_ptr_ranges)
	    - sprayqtn_range_size;

	/*
	 * Add claims for kmem's ranges
	 */
	data_range_size += kmem_add_ptr_claims();
	assert(data_range_size + sprayqtn_range_size +
	    ((ptr_range_size + kmem_meta_size) * kmem_ptr_ranges) <=
	    largest_free_size);

	struct kmem_range_startup_spec kmem_spec_sprayqtn = {
		.kc_name = "kmem_sprayqtn_range",
		.kc_range = &kmem_ranges[KMEM_RANGE_ID_SPRAYQTN],
		.kc_size = sprayqtn_range_size,
		.kc_flags = KC_NO_ENTRY,
	};
	kmem_claims[kmem_claim_count++] = kmem_spec_sprayqtn;

	struct kmem_range_startup_spec kmem_spec_data = {
		.kc_name = "kmem_data_range",
		.kc_range = &kmem_ranges[KMEM_RANGE_ID_DATA],
		.kc_size = data_range_size,
		.kc_flags = KC_NO_ENTRY,
	};
	kmem_claims[kmem_claim_count++] = kmem_spec_data;
}

__startup_func
static void
kmem_scramble_ranges(void)
{
	vm_map_offset_t start = 0;

	/*
	 * Initiatize KMEM_RANGE_ID_NONE range to use the entire map so that
	 * the vm can find the requested ranges.
	 */
	kmem_ranges[KMEM_RANGE_ID_NONE].min_address = MAX(kernel_map->min_offset,
	    VM_MAP_PAGE_SIZE(kernel_map));
	kmem_ranges[KMEM_RANGE_ID_NONE].max_address = kernel_map->max_offset;

	/*
	 * Allocating the g_kext_map prior to randomizing the remaining submaps as
	 * this map is 2G in size and starts at the end of kernel_text on x86. It
	 * could overflow into the heap.
	 */
	kext_alloc_init();

	/*
	 * Eat a random amount of kernel_map to fuzz subsequent heap, zone and
	 * stack addresses. (With a 4K page and 9 bits of randomness, this
	 * eats about 2M of VA from the map)
	 *
	 * Note that we always need to slide by at least one page because the VM
	 * pointer packing schemes using KERNEL_PMAP_HEAP_RANGE_START as a base
	 * do not admit this address to be part of any zone submap.
	 */
	start = kmem_fuzz_start();

	/*
	 * Add claims for ptr and data kmem_ranges
	 */
	kmem_add_extra_claims();

	/*
	 * Shuffle registered claims
	 */
	assert(kmem_claim_count < UINT16_MAX);
	kmem_shuffle_claims();

	/*
	 * Apply restrictions and determine range for each claim
	 */
	for (uint32_t i = 0; i < kmem_claim_count; i++) {
		vm_map_offset_t end = 0;
		struct kmem_range_startup_spec sp = kmem_claims[i];
		struct mach_vm_range *sp_range = sp.kc_range;

		if (vm_map_locate_space_anywhere(kernel_map, sp.kc_size, 0,
		    VM_MAP_KERNEL_FLAGS_ANYWHERE(), &start, NULL) != KERN_SUCCESS) {
			panic("kmem_range_init: vm_map_locate_space failing for claim %s",
			    sp.kc_name);
		}

		end = start + sp.kc_size;
		/*
		 * Re-adjust ranges if restriction not met
		 */
		if (sp_range->min_address && start > sp_range->min_address) {
			kmem_readjust_ranges(i);
		} else {
			sp_range->min_address = start;
			sp_range->max_address = end;
		}
		start = end;
	}

	/*
	 * We have settled on the ranges, now create temporary entries for the
	 * claims
	 */
	for (uint32_t i = 0; i < kmem_claim_count; i++) {
		struct kmem_range_startup_spec sp = kmem_claims[i];
		vm_map_entry_t entry = NULL;
		if (sp.kc_flags & KC_NO_ENTRY) {
			continue;
		}
		if (vm_map_find_space(kernel_map, sp.kc_range->min_address, sp.kc_size, 0,
		    VM_MAP_KERNEL_FLAGS_ANYWHERE(), &entry) != KERN_SUCCESS) {
			panic("kmem_range_init: vm_map_find_space failing for claim %s",
			    sp.kc_name);
		}
		vm_object_reference(kernel_object_default);
		VME_OBJECT_SET(entry, kernel_object_default, false, 0);
		VME_OFFSET_SET(entry, entry->vme_start);
		vm_map_unlock(kernel_map);
	}
	/*
	 * Now that we are done assigning all the ranges, reset
	 * kmem_ranges[KMEM_RANGE_ID_NONE]
	 */
	kmem_ranges[KMEM_RANGE_ID_NONE] = (struct mach_vm_range) {};

#if DEBUG || DEVELOPMENT
	for (uint32_t i = 0; i < kmem_claim_count; i++) {
		struct kmem_range_startup_spec sp = kmem_claims[i];

		printf("%-24s: %p - %p (%u%c)\n", sp.kc_name,
		    (void *)sp.kc_range->min_address,
		    (void *)sp.kc_range->max_address,
		    mach_vm_size_pretty(sp.kc_size),
		    mach_vm_size_unit(sp.kc_size));
	}
#endif /* DEBUG || DEVELOPMENT */
}

__startup_func
static void
kmem_range_init(void)
{
	vm_size_t range_adjustment;

	kmem_scramble_ranges();

	range_adjustment = sprayqtn_range_size >> 3;
	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address =
	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].min_address + range_adjustment;
	kmem_large_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address =
	    kmem_ranges[KMEM_RANGE_ID_SPRAYQTN].max_address;

	range_adjustment = data_range_size >> 3;
	kmem_large_ranges[KMEM_RANGE_ID_DATA].min_address =
	    kmem_ranges[KMEM_RANGE_ID_DATA].min_address + range_adjustment;
	kmem_large_ranges[KMEM_RANGE_ID_DATA].max_address =
	    kmem_ranges[KMEM_RANGE_ID_DATA].max_address;

	pmap_init();
	kmem_metadata_init();
	kmem_sizeclass_init();

#if DEBUG || DEVELOPMENT
	for (kmem_range_id_t i = 1; i < KMEM_RANGE_COUNT; i++) {
		vm_size_t range_size = mach_vm_range_size(&kmem_large_ranges[i]);
		printf("kmem_large_ranges[%d]    : %p - %p (%u%c)\n", i,
		    (void *)kmem_large_ranges[i].min_address,
		    (void *)kmem_large_ranges[i].max_address,
		    mach_vm_size_pretty(range_size),
		    mach_vm_size_unit(range_size));
	}
#endif
}
STARTUP(KMEM, STARTUP_RANK_THIRD, kmem_range_init);

#if DEBUG || DEVELOPMENT
__startup_func
static void
kmem_log_init(void)
{
	/*
	 * Log can only be created after the the kmem subsystem is initialized as
	 * btlog creation uses kmem
	 */
	kmem_outlier_log = btlog_create(BTLOG_LOG, KMEM_OUTLIER_LOG_SIZE, 0);
}
STARTUP(ZALLOC, STARTUP_RANK_FIRST, kmem_log_init);

kmem_gobj_stats
kmem_get_gobj_stats(void)
{
	kmem_gobj_stats stats = {};

	vm_map_lock(kernel_map);
	for (uint8_t i = 0; i < kmem_ptr_ranges; i++) {
		kmem_range_id_t range_id = KMEM_RANGE_ID_FIRST + i;
		struct mach_vm_range range = kmem_ranges[range_id];
		struct kmem_page_meta *meta = kmem_meta_hwm[kmem_get_front(range_id, 0)];
		struct kmem_page_meta *meta_end;
		uint64_t meta_idx = meta - kmem_meta_base[range_id];
		vm_map_size_t used = 0, va = 0, meta_sz = 0, pte_sz = 0;
		vm_map_offset_t addr;
		vm_map_entry_t entry;

		/*
		 * Left front
		 */
		va = (meta_idx * KMEM_CHUNK_SIZE_MIN);
		meta_sz = round_page(meta_idx * sizeof(struct kmem_page_meta));

		/*
		 * Right front
		 */
		meta = kmem_meta_hwm[kmem_get_front(range_id, 1)];
		meta_end = kmem_addr_to_meta(range.max_address, range_id, &addr,
		    &meta_idx);
		meta_idx = meta_end - meta;
		meta_sz += round_page(meta_idx * sizeof(struct kmem_page_meta));
		va += (meta_idx * KMEM_CHUNK_SIZE_MIN);

		/*
		 * Compute VA allocated in entire range
		 */
		if (vm_map_lookup_entry(kernel_map, range.min_address, &entry) == false) {
			entry = entry->vme_next;
		}
		while (entry != vm_map_to_entry(kernel_map) &&
		    entry->vme_start < range.max_address) {
			used += (entry->vme_end - entry->vme_start);
			entry = entry->vme_next;
		}

		pte_sz = round_page(atop(va - used) * 8);

		stats.total_used += used;
		stats.total_va += va;
		stats.pte_sz += pte_sz;
		stats.meta_sz += meta_sz;
	}
	vm_map_unlock(kernel_map);

	return stats;
}

#endif /* DEBUG || DEVELOPMENT */

/*
 *	kmem_init:
 *
 *	Initialize the kernel's virtual memory map, taking
 *	into account all memory allocated up to this time.
 */
__startup_func
void
kmem_init(
	vm_offset_t     start,
	vm_offset_t     end)
{
	vm_map_offset_t map_start;
	vm_map_offset_t map_end;

	map_start = vm_map_trunc_page(start,
	    VM_MAP_PAGE_MASK(kernel_map));
	map_end = vm_map_round_page(end,
	    VM_MAP_PAGE_MASK(kernel_map));

	vm_map_will_allocate_early_map(&kernel_map);
#if defined(__arm64__)
	kernel_map = vm_map_create_options(pmap_kernel(),
	    VM_MIN_KERNEL_AND_KEXT_ADDRESS,
	    VM_MAX_KERNEL_ADDRESS,
	    VM_MAP_CREATE_DEFAULT);
	/*
	 *	Reserve virtual memory allocated up to this time.
	 */
	{
		unsigned int    region_select = 0;
		vm_map_offset_t region_start;
		vm_map_size_t   region_size;
		vm_map_offset_t map_addr;
		kern_return_t kr;

		while (pmap_virtual_region(region_select, &region_start, &region_size)) {
			map_addr = region_start;
			kr = vm_map_enter(kernel_map, &map_addr,
			    vm_map_round_page(region_size,
			    VM_MAP_PAGE_MASK(kernel_map)),
			    (vm_map_offset_t) 0,
			    VM_MAP_KERNEL_FLAGS_FIXED_PERMANENT(.vmkf_no_pmap_check = true),
			    VM_OBJECT_NULL,
			    (vm_object_offset_t) 0, FALSE, VM_PROT_NONE, VM_PROT_NONE,
			    VM_INHERIT_DEFAULT);

			if (kr != KERN_SUCCESS) {
				panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
				    (uint64_t) start, (uint64_t) end, (uint64_t) region_start,
				    (uint64_t) region_size, kr);
			}

			region_select++;
		}
	}
#else
	kernel_map = vm_map_create_options(pmap_kernel(),
	    VM_MIN_KERNEL_AND_KEXT_ADDRESS, map_end,
	    VM_MAP_CREATE_DEFAULT);
	/*
	 *	Reserve virtual memory allocated up to this time.
	 */
	if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
		vm_map_offset_t map_addr;
		kern_return_t kr;

		map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
		kr = vm_map_enter(kernel_map,
		    &map_addr,
		    (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
		    (vm_map_offset_t) 0,
		    VM_MAP_KERNEL_FLAGS_FIXED(.vmkf_no_pmap_check = true),
		    VM_OBJECT_NULL,
		    (vm_object_offset_t) 0, FALSE,
		    VM_PROT_NONE, VM_PROT_NONE,
		    VM_INHERIT_DEFAULT);

		if (kr != KERN_SUCCESS) {
			panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x",
			    (uint64_t) start, (uint64_t) end,
			    (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS,
			    (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS),
			    kr);
		}
	}
#endif

	kmem_set_user_wire_limits();
}


#pragma mark map copyio
/*
 * Note: semantic types aren't used as `copyio` already validates.
 */

kern_return_t
copyinmap(
	vm_map_t                map,
	vm_map_offset_t         fromaddr,
	void                   *todata,
	vm_size_t               length)
{
	kern_return_t   kr = KERN_SUCCESS;
	vm_map_t oldmap;

	if (vm_map_pmap(map) == pmap_kernel()) {
		/* assume a correct copy */
		memcpy(todata, CAST_DOWN(void *, fromaddr), length);
	} else if (current_map() == map) {
		if (copyin(fromaddr, todata, length) != 0) {
			kr = KERN_INVALID_ADDRESS;
		}
	} else {
		vm_map_reference(map);
		oldmap = vm_map_switch(map);
		if (copyin(fromaddr, todata, length) != 0) {
			kr = KERN_INVALID_ADDRESS;
		}
		vm_map_switch(oldmap);
		vm_map_deallocate(map);
	}
	return kr;
}

kern_return_t
copyoutmap(
	vm_map_t                map,
	void                   *fromdata,
	vm_map_address_t        toaddr,
	vm_size_t               length)
{
	kern_return_t   kr = KERN_SUCCESS;
	vm_map_t        oldmap;

	if (vm_map_pmap(map) == pmap_kernel()) {
		/* assume a correct copy */
		memcpy(CAST_DOWN(void *, toaddr), fromdata, length);
	} else if (current_map() == map) {
		if (copyout(fromdata, toaddr, length) != 0) {
			ktriage_record(thread_tid(current_thread()),
			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
			    KDBG_TRIAGE_RESERVED,
			    KDBG_TRIAGE_VM_COPYOUTMAP_SAMEMAP_ERROR),
			    KERN_INVALID_ADDRESS /* arg */);
			kr = KERN_INVALID_ADDRESS;
		}
	} else {
		vm_map_reference(map);
		oldmap = vm_map_switch(map);
		if (copyout(fromdata, toaddr, length) != 0) {
			ktriage_record(thread_tid(current_thread()),
			    KDBG_TRIAGE_EVENTID(KDBG_TRIAGE_SUBSYS_VM,
			    KDBG_TRIAGE_RESERVED,
			    KDBG_TRIAGE_VM_COPYOUTMAP_DIFFERENTMAP_ERROR),
			    KERN_INVALID_ADDRESS /* arg */);
			kr = KERN_INVALID_ADDRESS;
		}
		vm_map_switch(oldmap);
		vm_map_deallocate(map);
	}
	return kr;
}

kern_return_t
copyoutmap_atomic32(
	vm_map_t                map,
	uint32_t                value,
	vm_map_address_t        toaddr)
{
	kern_return_t   kr = KERN_SUCCESS;
	vm_map_t        oldmap;

	if (vm_map_pmap(map) == pmap_kernel()) {
		/* assume a correct toaddr */
		*(uint32_t *)toaddr = value;
	} else if (current_map() == map) {
		if (copyout_atomic32(value, toaddr) != 0) {
			kr = KERN_INVALID_ADDRESS;
		}
	} else {
		vm_map_reference(map);
		oldmap = vm_map_switch(map);
		if (copyout_atomic32(value, toaddr) != 0) {
			kr = KERN_INVALID_ADDRESS;
		}
		vm_map_switch(oldmap);
		vm_map_deallocate(map);
	}
	return kr;
}

kern_return_t
copyoutmap_atomic64(
	vm_map_t                map,
	uint64_t                value,
	vm_map_address_t        toaddr)
{
	kern_return_t   kr = KERN_SUCCESS;
	vm_map_t        oldmap;

	if (vm_map_pmap(map) == pmap_kernel()) {
		/* assume a correct toaddr */
		*(uint64_t *)toaddr = value;
	} else if (current_map() == map) {
		if (copyout_atomic64(value, toaddr) != 0) {
			kr = KERN_INVALID_ADDRESS;
		}
	} else {
		vm_map_reference(map);
		oldmap = vm_map_switch(map);
		if (copyout_atomic64(value, toaddr) != 0) {
			kr = KERN_INVALID_ADDRESS;
		}
		vm_map_switch(oldmap);
		vm_map_deallocate(map);
	}
	return kr;
}


#pragma mark pointer obfuscation / packing

/*
 *
 *	The following two functions are to be used when exposing kernel
 *	addresses to userspace via any of the various debug or info
 *	facilities that exist. These are basically the same as VM_KERNEL_ADDRPERM()
 *	and VM_KERNEL_UNSLIDE_OR_PERM() except they use a different random seed and
 *	are exported to KEXTs.
 *
 *	NOTE: USE THE MACRO VERSIONS OF THESE FUNCTIONS (in vm_param.h) FROM WITHIN THE KERNEL
 */

vm_offset_t
vm_kernel_addrhash_internal(vm_offset_t addr, uint64_t salt)
{
	assert(salt != 0);

	if (addr == 0) {
		return 0ul;
	}

	if (VM_KERNEL_IS_SLID(addr)) {
		return VM_KERNEL_UNSLIDE(addr);
	}

	vm_offset_t sha_digest[SHA256_DIGEST_LENGTH / sizeof(vm_offset_t)];
	SHA256_CTX sha_ctx;

	SHA256_Init(&sha_ctx);
	SHA256_Update(&sha_ctx, &salt, sizeof(salt));
	SHA256_Update(&sha_ctx, &addr, sizeof(addr));
	SHA256_Final(sha_digest, &sha_ctx);

	return sha_digest[0];
}

__exported vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr);
vm_offset_t
vm_kernel_addrhash_external(vm_offset_t addr)
{
	return vm_kernel_addrhash_internal(addr, vm_kernel_addrhash_salt_ext);
}

void
vm_kernel_addrhide(
	vm_offset_t addr,
	vm_offset_t *hide_addr)
{
	*hide_addr = VM_KERNEL_ADDRHIDE(addr);
}

void
vm_kernel_addrperm_external(
	vm_offset_t addr,
	vm_offset_t *perm_addr)
{
	if (VM_KERNEL_IS_SLID(addr)) {
		*perm_addr = VM_KERNEL_UNSLIDE(addr);
	} else if (VM_KERNEL_ADDRESS(addr)) {
		*perm_addr = addr + vm_kernel_addrperm_ext;
	} else {
		*perm_addr = addr;
	}
}

void
vm_kernel_unslide_or_perm_external(
	vm_offset_t addr,
	vm_offset_t *up_addr)
{
	vm_kernel_addrperm_external(addr, up_addr);
}

void
vm_packing_pointer_invalid(vm_offset_t ptr, vm_packing_params_t params)
{
	if (ptr & ((1ul << params.vmpp_shift) - 1)) {
		panic("pointer %p can't be packed: low %d bits aren't 0",
		    (void *)ptr, params.vmpp_shift);
	} else if (ptr <= params.vmpp_base) {
		panic("pointer %p can't be packed: below base %p",
		    (void *)ptr, (void *)params.vmpp_base);
	} else {
		panic("pointer %p can't be packed: maximum encodable pointer is %p",
		    (void *)ptr, (void *)vm_packing_max_packable(params));
	}
}

void
vm_packing_verify_range(
	const char *subsystem,
	vm_offset_t min_address,
	vm_offset_t max_address,
	vm_packing_params_t params)
{
	if (min_address > max_address) {
		panic("%s: %s range invalid min:%p > max:%p",
		    __func__, subsystem, (void *)min_address, (void *)max_address);
	}

	if (!params.vmpp_base_relative) {
		return;
	}

	if (min_address <= params.vmpp_base) {
		panic("%s: %s range invalid min:%p <= base:%p",
		    __func__, subsystem, (void *)min_address, (void *)params.vmpp_base);
	}

	if (max_address > vm_packing_max_packable(params)) {
		panic("%s: %s range invalid max:%p >= max packable:%p",
		    __func__, subsystem, (void *)max_address,
		    (void *)vm_packing_max_packable(params));
	}
}

#pragma mark tests
#if MACH_ASSERT
#include <sys/errno.h>

static void
kmem_test_for_entry(
	vm_map_t                map,
	vm_offset_t             addr,
	void                  (^block)(vm_map_entry_t))
{
	vm_map_entry_t entry;

	vm_map_lock(map);
	block(vm_map_lookup_entry(map, addr, &entry) ? entry : NULL);
	vm_map_unlock(map);
}

#define kmem_test_assert_map(map, pg, entries) ({ \
	assert3u((map)->size, ==, ptoa(pg)); \
	assert3u((map)->hdr.nentries, ==, entries); \
})

static bool
can_write_at(vm_offset_t offs, uint32_t page)
{
	static const int zero;

	return verify_write(&zero, (void *)(offs + ptoa(page) + 128), 1) == 0;
}
#define assert_writeable(offs, page) \
	assertf(can_write_at(offs, page), \
	    "can write at %p + ptoa(%d)", (void *)offs, page)

#define assert_faults(offs, page) \
	assertf(!can_write_at(offs, page), \
	    "can write at %p + ptoa(%d)", (void *)offs, page)

#define peek(offs, page) \
	(*(uint32_t *)((offs) + ptoa(page)))

#define poke(offs, page, v) \
	(*(uint32_t *)((offs) + ptoa(page)) = (v))

__attribute__((noinline))
static void
kmem_alloc_basic_test(vm_map_t map)
{
	kmem_guard_t guard = {
		.kmg_tag = VM_KERN_MEMORY_DIAG,
	};
	vm_offset_t addr;

	/*
	 * Test wired basics:
	 * - KMA_KOBJECT
	 * - KMA_GUARD_FIRST, KMA_GUARD_LAST
	 * - allocation alignment
	 */
	addr = kmem_alloc_guard(map, ptoa(10), ptoa(2) - 1,
	    KMA_KOBJECT | KMA_GUARD_FIRST | KMA_GUARD_LAST, guard).kmr_address;
	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | GF | GL)", map);
	assert3u((addr + PAGE_SIZE) % ptoa(2), ==, 0);
	kmem_test_assert_map(map, 10, 1);

	kmem_test_for_entry(map, addr, ^(__assert_only vm_map_entry_t e){
		assertf(e, "unable to find address %p in map %p", (void *)addr, map);
		assert(e->vme_kernel_object);
		assert(!e->vme_atomic);
		assert3u(e->vme_start, <=, addr);
		assert3u(addr + ptoa(10), <=, e->vme_end);
	});

	assert_faults(addr, 0);
	for (int i = 1; i < 9; i++) {
		assert_writeable(addr, i);
	}
	assert_faults(addr, 9);

	kmem_free(map, addr, ptoa(10));
	kmem_test_assert_map(map, 0, 0);

	/*
	 * Test pageable basics.
	 */
	addr = kmem_alloc_guard(map, ptoa(10), 0,
	    KMA_PAGEABLE, guard).kmr_address;
	assertf(addr != 0ull, "kma(%p, 10p, 0, KO | PG)", map);
	kmem_test_assert_map(map, 10, 1);

	for (int i = 0; i < 9; i++) {
		assert_faults(addr, i);
		poke(addr, i, 42);
		assert_writeable(addr, i);
	}

	kmem_free(map, addr, ptoa(10));
	kmem_test_assert_map(map, 0, 0);
}

__attribute__((noinline))
static void
kmem_realloc_basic_test(vm_map_t map, kmr_flags_t kind)
{
	kmem_guard_t guard = {
		.kmg_atomic  = !(kind & KMR_DATA),
		.kmg_tag     = VM_KERN_MEMORY_DIAG,
		.kmg_context = 0xefface,
	};
	vm_offset_t addr, newaddr;
	const int N = 10;

	/*
	 *	This isn't something kmem_realloc_guard() _needs_ to do,
	 *	we could conceive an implementation where it grows in place
	 *	if there's space after it.
	 *
	 *	However, this is what the implementation does today.
	 */
	bool realloc_growth_changes_address = true;
	bool GL = (kind & KMR_GUARD_LAST);

	/*
	 *	Initial N page allocation
	 */
	addr = kmem_alloc_guard(map, ptoa(N), 0,
	    (kind & (KMA_KOBJECT | KMA_GUARD_LAST | KMA_DATA)) | KMA_ZERO,
	    guard).kmr_address;
	assert3u(addr, !=, 0);
	kmem_test_assert_map(map, N, 1);
	for (int pg = 0; pg < N - GL; pg++) {
		poke(addr, pg, 42 + pg);
	}
	for (int pg = N - GL; pg < N; pg++) {
		assert_faults(addr, pg);
	}


	/*
	 *	Grow to N + 3 pages
	 */
	newaddr = kmem_realloc_guard(map, addr, ptoa(N), ptoa(N + 3),
	    kind | KMR_ZERO, guard).kmr_address;
	assert3u(newaddr, !=, 0);
	if (realloc_growth_changes_address) {
		assert3u(addr, !=, newaddr);
	}
	if ((kind & KMR_FREEOLD) || (addr == newaddr)) {
		kmem_test_assert_map(map, N + 3, 1);
	} else {
		kmem_test_assert_map(map, 2 * N + 3, 2);
	}
	for (int pg = 0; pg < N - GL; pg++) {
		assert3u(peek(newaddr, pg), ==, 42 + pg);
	}
	if ((kind & KMR_FREEOLD) == 0) {
		for (int pg = 0; pg < N - GL; pg++) {
			assert3u(peek(addr, pg), ==, 42 + pg);
		}
		/* check for tru-share */
		poke(addr + 16, 0, 1234);
		assert3u(peek(newaddr + 16, 0), ==, 1234);
		kmem_free_guard(map, addr, ptoa(N), KMF_NONE, guard);
		kmem_test_assert_map(map, N + 3, 1);
	}
	if (addr != newaddr) {
		for (int pg = 0; pg < N - GL; pg++) {
			assert_faults(addr, pg);
		}
	}
	for (int pg = N - GL; pg < N + 3 - GL; pg++) {
		assert3u(peek(newaddr, pg), ==, 0);
	}
	for (int pg = N + 3 - GL; pg < N + 3; pg++) {
		assert_faults(newaddr, pg);
	}
	addr = newaddr;


	/*
	 *	Shrink to N - 2 pages
	 */
	newaddr = kmem_realloc_guard(map, addr, ptoa(N + 3), ptoa(N - 2),
	    kind | KMR_ZERO, guard).kmr_address;
	assert3u(map->size, ==, ptoa(N - 2));
	assert3u(newaddr, ==, addr);
	kmem_test_assert_map(map, N - 2, 1);

	for (int pg = 0; pg < N - 2 - GL; pg++) {
		assert3u(peek(addr, pg), ==, 42 + pg);
	}
	for (int pg = N - 2 - GL; pg < N + 3; pg++) {
		assert_faults(addr, pg);
	}

	kmem_free_guard(map, addr, ptoa(N - 2), KMF_NONE, guard);
	kmem_test_assert_map(map, 0, 0);
}

static int
kmem_basic_test(__unused int64_t in, int64_t *out)
{
	mach_vm_offset_t addr;
	vm_map_t map;

	printf("%s: test running\n", __func__);

	map = kmem_suballoc(kernel_map, &addr, 64U << 20,
	        VM_MAP_CREATE_DEFAULT, VM_FLAGS_ANYWHERE,
	        KMS_NOFAIL | KMS_DATA, VM_KERN_MEMORY_DIAG).kmr_submap;

	printf("%s: kmem_alloc ...\n", __func__);
	kmem_alloc_basic_test(map);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_FREEOLD) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_FREEOLD);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_LAST);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_KOBJECT | KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_LAST) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_LAST);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_FREEOLD | KMR_GUARD_FIRST | KMR_GUARD_LAST);
	printf("%s:     PASS\n", __func__);

	/* using KMR_DATA signals to test the non atomic realloc path */
	printf("%s: kmem_realloc (KMR_DATA | KMR_FREEOLD) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_DATA | KMR_FREEOLD);
	printf("%s:     PASS\n", __func__);

	printf("%s: kmem_realloc (KMR_DATA) ...\n", __func__);
	kmem_realloc_basic_test(map, KMR_DATA);
	printf("%s:     PASS\n", __func__);

	kmem_free_guard(kernel_map, addr, 64U << 20, KMF_NONE, KMEM_GUARD_SUBMAP);
	vm_map_deallocate(map);

	printf("%s: test passed\n", __func__);
	*out = 1;
	return 0;
}
SYSCTL_TEST_REGISTER(kmem_basic, kmem_basic_test);

static void
kmem_test_get_size_idx_for_chunks(uint32_t chunks)
{
	__assert_only uint32_t idx = kmem_get_size_idx_for_chunks(chunks);

	assert(chunks >= kmem_size_array[idx].ks_num_chunk);
}

__attribute__((noinline))
static void
kmem_test_get_size_idx_for_all_chunks()
{
	for (uint32_t i = 0; i < KMEM_NUM_SIZECLASS; i++) {
		uint32_t chunks = kmem_size_array[i].ks_num_chunk;

		if (chunks != 1) {
			kmem_test_get_size_idx_for_chunks(chunks - 1);
		}
		kmem_test_get_size_idx_for_chunks(chunks);
		kmem_test_get_size_idx_for_chunks(chunks + 1);
	}
}

static int
kmem_guard_obj_test(__unused int64_t in, int64_t *out)
{
	printf("%s: test running\n", __func__);

	printf("%s: kmem_get_size_idx_for_chunks\n", __func__);
	kmem_test_get_size_idx_for_all_chunks();
	printf("%s:     PASS\n", __func__);

	printf("%s: test passed\n", __func__);
	*out = 1;
	return 0;
}
SYSCTL_TEST_REGISTER(kmem_guard_obj, kmem_guard_obj_test);
#endif /* MACH_ASSERT */