This is xnu-11215.1.10. See this file in:
/*
* Copyright (c) 2022 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#define LOCK_PRIVATE 1
#include <mach_ldebug.h>
#include <kern/locks_internal.h>
#include <kern/lock_stat.h>
#include <kern/lock_ptr.h>
#include <mach/mach_time.h>
#include <mach/machine/sdt.h>
#include <mach/vm_param.h>
#include <machine/cpu_data.h>
#include <machine/machine_cpu.h>
#pragma mark hw_lck_ptr_t: helpers
static_assert(VM_KERNEL_POINTER_SIGNIFICANT_BITS < HW_LCK_PTR_BITS,
"sign extension of lck_ptr_bits does the right thing");
static inline void
__hw_lck_ptr_encode(hw_lck_ptr_t *lck, const void *ptr)
{
lck->lck_ptr_bits = (intptr_t)ptr;
#if CONFIG_KERNEL_TAGGING
lck->lck_ptr_tag = vm_memtag_extract_tag((vm_offset_t)ptr);
#endif /* CONFIG_KERNEL_TAGGING */
}
__abortlike
static void
__hw_lck_ptr_invalid_panic(hw_lck_ptr_t *lck)
{
hw_lck_ptr_t tmp = os_atomic_load(lck, relaxed);
panic("Invalid/destroyed ptr spinlock %p: <%p %d 0x%04x>",
lck, __hw_lck_ptr_value(tmp), tmp.lck_ptr_locked,
tmp.lck_ptr_mcs_tail);
}
__attribute__((always_inline, overloadable))
static inline bool
hw_lck_ptr_take_slowpath(hw_lck_ptr_t tmp)
{
hw_lck_ptr_t check_bits = {
#if CONFIG_DTRACE
.lck_ptr_stats = true,
#endif /* CONFIG_DTRACE */
};
unsigned long take_slowpath = 0;
take_slowpath = tmp.lck_ptr_value & check_bits.lck_ptr_value;
#if CONFIG_DTRACE
take_slowpath |= lockstat_enabled();
#endif /* CONFIG_DTRACE */
return take_slowpath;
}
#pragma mark hw_lck_ptr_t: init/destroy
void
hw_lck_ptr_init(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp)
{
hw_lck_ptr_t init = { };
#if LCK_GRP_USE_ARG
if (grp) {
#if CONFIG_DTRACE
if (grp->lck_grp_attr_id & LCK_GRP_ATTR_STAT) {
init.lck_ptr_stats = true;
}
#endif /* CONFIG_DTRACE */
lck_grp_reference(grp, &grp->lck_grp_spincnt);
}
#endif /* LCK_GRP_USE_ARG */
__hw_lck_ptr_encode(&init, val);
os_atomic_init(lck, init);
}
void
hw_lck_ptr_destroy(hw_lck_ptr_t *lck, lck_grp_t *grp)
{
hw_lck_ptr_t tmp = os_atomic_load(lck, relaxed);
if (tmp.lck_ptr_locked || tmp.lck_ptr_mcs_tail) {
__hw_lck_ptr_invalid_panic(lck);
}
#if LCK_GRP_USE_ARG
if (grp) {
lck_grp_deallocate(grp, &grp->lck_grp_spincnt);
}
#endif /* LCK_GRP_USE_ARG */
/* make clients spin forever, and use an invalid MCS ID */
tmp.lck_ptr_locked = true;
tmp.lck_ptr_stats = false;
tmp.lck_ptr_mcs_tail = 0xffff;
os_atomic_store(lck, tmp, relaxed);
}
bool
hw_lck_ptr_held(hw_lck_ptr_t *lck)
{
return os_atomic_load(lck, relaxed).lck_ptr_locked;
}
#pragma mark hw_lck_ptr_t: hw_lck_ptr_lock
__abortlike
static hw_spin_timeout_status_t
hw_lck_ptr_timeout_panic(void *_lock, hw_spin_timeout_t to, hw_spin_state_t st)
{
hw_lck_ptr_t *lck = _lock;
hw_lck_ptr_t tmp;
tmp = os_atomic_load(lck, relaxed);
panic("Ptr spinlock[%p] " HW_SPIN_TIMEOUT_FMT "; "
"ptr_value: %p, mcs_tail: 0x%04x, "
HW_SPIN_TIMEOUT_DETAILS_FMT,
lck, HW_SPIN_TIMEOUT_ARG(to, st),
__hw_lck_ptr_value(tmp), tmp.lck_ptr_mcs_tail,
HW_SPIN_TIMEOUT_DETAILS_ARG(to, st));
}
static const struct hw_spin_policy hw_lck_ptr_spin_policy = {
.hwsp_name = "hw_lck_ptr_lock",
.hwsp_timeout_atomic = &lock_panic_timeout,
.hwsp_op_timeout = hw_lck_ptr_timeout_panic,
};
static void * __attribute__((noinline))
hw_lck_ptr_contended(hw_lck_ptr_t *lck LCK_GRP_ARG(lck_grp_t *grp))
{
hw_spin_policy_t pol = &hw_lck_ptr_spin_policy;
hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
hw_spin_state_t ss = { };
hw_lck_ptr_t value, nvalue;
lck_mcs_id_t pidx;
lck_spin_txn_t txn;
#if CONFIG_DTRACE || LOCK_STATS
uint64_t spin_start;
lck_grp_spin_update_miss(lck LCK_GRP_ARG(grp));
if (__improbable(lck_grp_spin_spin_enabled(lck LCK_GRP_ARG(grp)))) {
spin_start = mach_absolute_time();
}
#endif /* LOCK_STATS || CONFIG_DTRACE */
/*
* Take a spot in the MCS queue,
* and then spin until we're at the head of it.
*/
txn = lck_spin_txn_begin(lck);
pidx = os_atomic_xchg(&lck->lck_ptr_mcs_tail, txn.txn_mcs_id, release);
if (pidx) {
lck_spin_mcs_t pnode;
unsigned long ready;
pnode = lck_spin_mcs_decode(pidx);
os_atomic_store(&pnode->lsm_next, txn.txn_slot, relaxed);
while (!hw_spin_wait_until(&txn.txn_slot->lsm_ready, ready, ready)) {
hw_spin_should_keep_spinning(lck, pol, to, &ss);
}
}
/*
* We're now the first in line, wait for the lock bit
* to look ready and take it.
*/
do {
while (!hw_spin_wait_until(&lck->lck_ptr_value,
value.lck_ptr_value, value.lck_ptr_locked == 0)) {
hw_spin_should_keep_spinning(lck, pol, to, &ss);
}
nvalue = value;
nvalue.lck_ptr_locked = true;
if (nvalue.lck_ptr_mcs_tail == txn.txn_mcs_id) {
nvalue.lck_ptr_mcs_tail = 0;
}
} while (!os_atomic_cmpxchg(lck, value, nvalue, acquire));
/*
* We now have the lock, let's cleanup the MCS state.
*
* If there is a node after us, notify that it
* is at the head of the interlock queue.
*
* Then, clear the MCS node.
*/
if (value.lck_ptr_mcs_tail != txn.txn_mcs_id) {
lck_spin_mcs_t nnode;
while (!hw_spin_wait_until(&txn.txn_slot->lsm_next, nnode, nnode)) {
hw_spin_should_keep_spinning(lck, pol, to, &ss);
}
os_atomic_store(&nnode->lsm_ready, 1, relaxed);
}
lck_spin_txn_end(&txn);
#if CONFIG_DTRACE || LOCK_STATS
if (__improbable(spin_start)) {
lck_grp_spin_update_spin(lck LCK_GRP_ARG(grp),
mach_absolute_time() - spin_start);
}
#endif /* CONFIG_DTRACE || LCK_GRP_STAT */
return __hw_lck_ptr_value(value);
}
#if CONFIG_DTRACE
__attribute__((noinline))
#else /* !CONFIG_DTRACE */
__attribute__((always_inline))
#endif /* !CONFIG_DTRACE */
static void *
hw_lck_ptr_lock_slow(
hw_lck_ptr_t *lck,
hw_lck_ptr_t tmp
LCK_GRP_ARG(lck_grp_t *grp))
{
lck_grp_spin_update_held(lck LCK_GRP_ARG(grp));
return __hw_lck_ptr_value(tmp);
}
static inline void *
hw_lck_ptr_lock_fastpath(hw_lck_ptr_t *lck LCK_GRP_ARG(lck_grp_t *grp))
{
hw_lck_ptr_t lock_bit = { .lck_ptr_locked = 1 };
hw_lck_ptr_t tmp;
tmp = os_atomic_load(lck, relaxed);
if (__probable(tmp.lck_ptr_locked == 0 && tmp.lck_ptr_mcs_tail == 0)) {
tmp.lck_ptr_value = os_atomic_or_orig(&lck->lck_ptr_value,
lock_bit.lck_ptr_value, acquire);
if (__probable(tmp.lck_ptr_locked == 0)) {
if (__probable(!hw_lck_ptr_take_slowpath(tmp))) {
return __hw_lck_ptr_value(tmp);
}
return hw_lck_ptr_lock_slow(lck, tmp LCK_GRP_ARG(grp));
}
}
return hw_lck_ptr_contended(lck LCK_GRP_ARG(grp));
}
void *
hw_lck_ptr_lock_nopreempt(hw_lck_ptr_t *lck, lck_grp_t *grp)
{
return hw_lck_ptr_lock_fastpath(lck LCK_GRP_ARG(grp));
}
void *
hw_lck_ptr_lock(hw_lck_ptr_t *lck, lck_grp_t *grp)
{
lock_disable_preemption_for_thread(current_thread());
return hw_lck_ptr_lock_fastpath(lck LCK_GRP_ARG(grp));
}
#pragma mark hw_lck_ptr_t: hw_lck_ptr_unlock
#if CONFIG_DTRACE
__attribute__((noinline))
static void
hw_lck_ptr_unlock_slow(
hw_lck_ptr_t *lck,
bool do_preempt
LCK_GRP_ARG(lck_grp_t *grp))
{
if (do_preempt) {
lock_enable_preemption();
}
LOCKSTAT_RECORD(LS_LCK_SPIN_UNLOCK_RELEASE, lck,
(uintptr_t)LCK_GRP_PROBEARG(grp));
}
#endif /* CONFIG_DTRACE */
static inline void
hw_lck_ptr_unlock_fastpath(
hw_lck_ptr_t *lck,
void *val,
bool do_preempt
LCK_GRP_ARG(lck_grp_t *grp))
{
hw_lck_ptr_t curv;
hw_lck_ptr_t xorv = { };
/*
* compute the value to xor in order to unlock + change the pointer
* value, but leaving the lck_ptr_stats and lck_ptr_mcs_tail unmodified.
*
* (the latter might change while we unlock and this avoids a CAS loop.
*/
curv = atomic_load_explicit((hw_lck_ptr_t _Atomic *)lck,
memory_order_relaxed);
curv.lck_ptr_stats = false;
curv.lck_ptr_mcs_tail = 0;
__hw_lck_ptr_encode(&xorv, val);
xorv.lck_ptr_value ^= curv.lck_ptr_value;
curv.lck_ptr_value =
os_atomic_xor(&lck->lck_ptr_value, xorv.lck_ptr_value, release);
#if CONFIG_DTRACE
if (__improbable(hw_lck_ptr_take_slowpath(curv))) {
return hw_lck_ptr_unlock_slow(lck, do_preempt LCK_GRP_ARG(grp));
}
#endif /* CONFIG_DTRACE */
if (do_preempt) {
lock_enable_preemption();
}
}
void
hw_lck_ptr_unlock_nopreempt(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp)
{
hw_lck_ptr_unlock_fastpath(lck, val, false LCK_GRP_ARG(grp));
}
void
hw_lck_ptr_unlock(hw_lck_ptr_t *lck, void *val, lck_grp_t *grp)
{
hw_lck_ptr_unlock_fastpath(lck, val, true LCK_GRP_ARG(grp));
}
#pragma mark hw_lck_ptr_t: hw_lck_ptr_wait_for_value
static void __attribute__((noinline))
hw_lck_ptr_wait_for_value_contended(
hw_lck_ptr_t *lck,
void *val
LCK_GRP_ARG(lck_grp_t *grp))
{
hw_spin_policy_t pol = &hw_lck_ptr_spin_policy;
hw_spin_timeout_t to = hw_spin_compute_timeout(pol);
hw_spin_state_t ss = { };
hw_lck_ptr_t tmp;
#if CONFIG_DTRACE || LOCK_STATS
uint64_t spin_start;
if (__improbable(lck_grp_spin_spin_enabled(lck LCK_GRP_ARG(grp)))) {
spin_start = mach_absolute_time();
}
#endif /* LOCK_STATS || CONFIG_DTRACE */
while (__improbable(!hw_spin_wait_until(&lck->lck_ptr_value,
tmp.lck_ptr_value, __hw_lck_ptr_value(tmp) == val))) {
hw_spin_should_keep_spinning(lck, pol, to, &ss);
}
#if CONFIG_DTRACE || LOCK_STATS
if (__improbable(spin_start)) {
lck_grp_spin_update_spin(lck LCK_GRP_ARG(grp),
mach_absolute_time() - spin_start);
}
#endif /* CONFIG_DTRACE || LCK_GRP_STAT */
os_atomic_thread_fence(acquire);
}
void
hw_lck_ptr_wait_for_value(
hw_lck_ptr_t *lck,
void *val,
lck_grp_t *grp)
{
hw_lck_ptr_t tmp = os_atomic_load(lck, acquire);
if (__probable(__hw_lck_ptr_value(tmp) == val)) {
return;
}
hw_lck_ptr_wait_for_value_contended(lck, val LCK_GRP_ARG(grp));
}