/*
* Copyright (c) 2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include <machine/asm.h>
/* This section has all the code necessary for the atomic operations supported by
* OSAtomicFifoEnqueue, OSAtomicFifoDequeue APIs in libplatform.
*
* This code needs to be compiled as 1 section and should not make branches
* outside of this section. This allows us to copy the entire section to the
* text comm page once it is created - see osfmk/arm/commpage/commpage.c
*
* This section is split into 2 parts - the preemption-free zone (PFZ) routines
* and the preemptible routines (non-PFZ). The PFZ routines will not be
* preempted by the scheduler if the pc of the userspace process is in that
* region while handling asynchronous interrupts (note that traps are still
* possible in the PFZ). Instead, the scheduler will mark x15 (known through
* coordination with the functions in the commpage section) to indicate to the
* userspace code that it needs to take a delayed preemption. The PFZ functions
* may make callouts to preemptible routines and vice-versa. When a function
* returns to a preemptible routine after a callout to a function in the PFZ, it
* needs to check x15 to determine if a delayed preemption needs to be taken. In
* addition, functions in the PFZ should not have backwards branches.
*
* The entry point to execute code in the commpage text section is through the
* jump table at the very top of the section. The base of the jump table is
* exposed to userspace via the APPLE array and the offsets from the base of the
* jump table are listed in the arm/cpu_capabilities.h header. Adding any new
* functions in the PFZ requires a lockstep change to the cpu_capabilities.h
* header.
*
* Functions in PFZ:
* Enqueue function
* Dequeue function
*
* Functions not in PFZ:
* Backoff function as part of spin loop
* Preempt function to take delayed preemption as indicated by kernel
*
* ----------------------------------------------------------------------
*
* The high level goal of the asm code in this section is to enqueue and dequeue
* from a FIFO linked list.
*
* typedef volatile struct {
* void *opaque1; <-- ptr to first queue element or null
* void *opaque2; <-- ptr to second queue element or null
* int opaque3; <-- spinlock
* } OSFifoQueueHead;
*
* This is done through a userspace spin lock stored in the linked list head
* for synchronization.
*
* Here is the pseudocode for the spin lock acquire algorithm which is split
* between the PFZ and the non-PFZ areas of the commpage text section. The
* pseudocode here is just for the enqueue operation but it is symmetrical for
* the dequeue operation.
*
* // Not in the PFZ. Entry from jump table.
* ENQUEUE()
* enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr);
* // We're running here after running the TRY_LOCK_AND_ENQUEUE code in
* // the PFZ so we need to check if we need to take a delayed
* // preemption.
* if (kernel_wants_to_preempt_us){
* // This is done through the pfz_exit() mach trap which is a dummy
* // syscall whose sole purpose is to allow the thread to enter the
* // kernel so that it can be preempted at AST.
* enter_kernel_to_take_delayed_preemption()
* }
*
* if (!enqueued) {
* ARM_MONITOR;
* WFE;
* enqueued = TRY_LOCK_AND_ENQUEUE(lock_addr);
* if (!enqueued) {
* // We failed twice, take a backoff
* BACKOFF();
* goto ENQUEUE()
* } else {
* // We got here from PFZ, check for delayed preemption
* if (kernel_wants_to_preempt_us){
* enter_kernel_to_take_delayed_preemption()
* }
* }
* }
*
* // in PFZ
* TRY_LOCK_AND_ENQUEUE():
* is_locked = try_lock(lock_addr);
* if (is_locked) {
* <do enqueue operation>
* return true
* } else {
* return false
* }
*
*
* // Not in the PFZ
* BACKOFF():
* // We're running here after running the TRY_LOCK_AND_ENQUEUE code in
* // the PFZ so we need to check if we need to take a delayed
* // preemption.
* if (kernel_wants_to_preempt_us) {
* enter_kernel_to_take_preemption()
* } else {
* // Note that it is safe to do this loop here since the entire
* // BACKOFF function isn't in the PFZ and so can be preempted at any
* // time
* do {
* lock_is_free = peek(lock_addr);
* if (lock_is_free) {
* return
* } else {
* pause_with_monitor(lock_addr)
* }
* } while (1)
* }
*/
/* Macros and helpers */
.macro BACKOFF lock_addr
// Save registers we can't clobber
stp x0, x1, [sp, #-16]!
stp x2, x9, [sp, #-16]!
// Pass in lock addr to backoff function
mov x0, \lock_addr
bl _backoff // Jump out of the PFZ zone now
// Restore registers
ldp x2, x9, [sp], #16
ldp x0, x1, [sp], #16
.endmacro
/* x0 = pointer to queue head
* x1 = pointer to new elem to enqueue
* x2 = offset of link field inside element
* x3 = Address of lock
*
* Moves result of the helper function to the register specified
*/
.macro TRYLOCK_ENQUEUE result
stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value
bl _pfz_trylock_and_enqueue
mov \result, x0
ldp x0, xzr, [sp], #16 // Restore saved registers
.endmacro
/* x0 = pointer to queue head
* x1 = offset of link field inside element
* x2 = Address of lock
*
* Moves result of the helper function to the register specified
*/
.macro TRYLOCK_DEQUEUE result
stp x0, xzr, [sp, #-16]! // Save x0 since it'll be clobbered by return value
bl _pfz_trylock_and_dequeue
mov \result, x0
ldp x0, xzr, [sp], #16 // Restore saved registers
.endmacro
/*
* Takes a delayed preemption if needed and then branches to the label
* specified.
*
* Modifies x15
*/
.macro PREEMPT_SELF_THEN branch_to_take_on_success
cbz x15, \branch_to_take_on_success // No delayed preemption to take, just try again
mov x15, xzr // zero out the preemption pending field
bl _preempt_self
b \branch_to_take_on_success
.endmacro
.section __TEXT_EXEC,__commpage_text,regular,pure_instructions
/* Preemption free functions */
.align 2
_jump_table: // 32 entry jump table, only 2 are used
b _pfz_enqueue
b _pfz_dequeue
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
brk #666
/*
* typedef volatile struct {
* void *opaque1; <-- ptr to first queue element or null
* void *opaque2; <-- ptr to second queue element or null
* int opaque3; <-- spinlock
* } osfifoqueuehead;
*/
/* Non-preemptible helper routine to FIFO enqueue:
* int pfz_trylock_and_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset, uint32_t *lock_addr);
*
* x0 = pointer to queue head structure
* x1 = pointer to new element to enqueue
* x2 = offset of link field inside element
* x3 = address of lock
*
* Only caller save registers (x9 - x15) are used in this function
*
* Returns 0 on success and non-zero value on failure
*/
.globl _pfz_trylock_and_enqueue
.align 2
_pfz_trylock_and_enqueue:
ARM64_STACK_PROLOG
PUSH_FRAME
mov w10, wzr // unlock value = w10 = 0
mov w11, #1 // locked value = w11 = 1
// Try to grab the lock
casa w10, w11, [x3] // Atomic CAS with acquire barrier
cbz w10, Ltrylock_enqueue_success
mov x0, #-1 // Failed
b Ltrylock_enqueue_exit
/* We got the lock, enqueue the element */
Ltrylock_enqueue_success:
ldr x10, [x0, #8] // x10 = tail of the queue
cbnz x10, Lnon_empty_queue // tail not NULL
str x1, [x0] // Set head to new element
b Lset_new_tail
Lnon_empty_queue:
str x1, [x10, x2] // Set old tail -> offset = new elem
Lset_new_tail:
str x1, [x0, #8] // Set tail = new elem
// Drop spin lock with release barrier (pairs with acquire in casa)
stlr wzr, [x3]
mov x0, xzr // Mark success
Ltrylock_enqueue_exit:
POP_FRAME
ARM64_STACK_EPILOG
/* Non-preemptible helper routine to FIFO dequeue:
* void *pfz_trylock_and_dequeue(OSFifoQueueHead *__list, size_t __offset, uint32_t *lock_addr);
*
* x0 = pointer to queue head structure
* x1 = pointer to new element to enqueue
* x2 = address of lock
*
* Only caller save registers (x9 - x15) are used in this function
*
* Returns -1 on failure, and the pointer on success (can be NULL)
*/
.globl _pfz_trylock_and_dequeue
.align 2
_pfz_trylock_and_dequeue:
ARM64_STACK_PROLOG
PUSH_FRAME
// Try to grab the lock
mov w10, wzr // unlock value = w10 = 0
mov w11, #1 // locked value = w11 = 1
casa w10, w11, [x2] // Atomic CAS with acquire barrier
cbz w10, Ltrylock_dequeue_success
mov x0, #-1 // Failed
b Ltrylock_dequeue_exit
/* We got the lock, dequeue the element */
Ltrylock_dequeue_success:
ldr x10, [x0] // x10 = head of the queue
cbz x10, Lreturn_head // if head is null, return
ldr x11, [x10, x1] // get ptr to new head
cbnz x11, Lupdate_new_head // If new head != NULL, then not singleton. Only need to update head
// Singleton case
str xzr, [x0, #8] // dequeuing from singleton queue, update tail to NULL
Lupdate_new_head:
str xzr, [x10, x1] // zero the link in the old head
str x11, [x0] // Set up a new head
Lreturn_head:
mov x0, x10 // Move head to x0
stlr wzr, [x2] // Drop spin lock with release barrier (pairs with acquire in casa)
Ltrylock_dequeue_exit:
POP_FRAME
ARM64_STACK_EPILOG
/* Preemptible functions */
.private_extern _commpage_text_preemptible_functions
_commpage_text_preemptible_functions:
/*
* void pfz_enqueue(OSFifoQueueHead *__list, void *__new, size_t __offset);
* x0 = pointer to queue head
* x1 = pointer to new elem to enqueue
* x2 = offset of link field inside element
*/
.globl _pfz_enqueue
.align 2
_pfz_enqueue:
ARM64_STACK_PROLOG
PUSH_FRAME
str xzr, [x1, x2] // Zero the forward link in the new element
mov x15, xzr // zero out the register used to communicate with kernel
add x3, x0, #16 // address of lock = x3 = x0 + 16
Lenqueue_trylock_loop:
// Attempt #1
TRYLOCK_ENQUEUE x9
PREEMPT_SELF_THEN Lenqueue_determine_success
Lenqueue_determine_success:
cbz x9, Lenqueue_success // did we succeed? if so, exit
ldxr w9, [x3] // arm the monitor for the lock address
cbz w9, Lenqueue_clear_monitor // lock is available, retry.
wfe // Wait with monitor armed
// Attempt #2
TRYLOCK_ENQUEUE x9
cbz x9, Lenqueue_take_delayed_preemption_upon_success // did we succeed? if so, exit
// We failed twice - backoff then try again
BACKOFF x3
b Lenqueue_trylock_loop
Lenqueue_clear_monitor:
clrex // Pairs with the ldxr
// Take a preemption if needed then branch to enqueue_trylock_loop
PREEMPT_SELF_THEN Lenqueue_trylock_loop
Lenqueue_take_delayed_preemption_upon_success:
PREEMPT_SELF_THEN Lenqueue_success
Lenqueue_success:
POP_FRAME
ARM64_STACK_EPILOG
/*
* void *pfz_dequeue(OSFifoQueueHead *__list, size_t __offset);
* x0 = pointer to queue head
* x1 = offset of link field inside element
*
* This function is not in the PFZ but calls out to a helper which is in the PFZ
* (_pfz_trylock_and_dequeue)
*/
.globl _pfz_dequeue
.align 2
_pfz_dequeue:
ARM64_STACK_PROLOG
PUSH_FRAME
mov x15, xzr // zero out the register used to communicate with kernel
add x2, x0, #16 // address of lock = x2 = x0 + 16
Ldequeue_trylock_loop:
// Attempt #1
TRYLOCK_DEQUEUE x9
PREEMPT_SELF_THEN Ldequeue_determine_success
Ldequeue_determine_success:
cmp x9, #-1 // is result of dequeue == -1?
b.ne Ldequeue_success // no, we succeeded
ldxr w9, [x2] // arm the monitor for the lock address
cbz w9, Ldequeue_clear_monitor // lock is available, retry.
wfe // Wait with monitor armed
// Attempt #2
TRYLOCK_DEQUEUE x9
cmp x9, #-1 // did we fail?
b.ne Ldequeue_take_delayed_preemption_upon_success // no, we succeeded
// We failed twice - backoff then try again
BACKOFF x2
b Ldequeue_trylock_loop
Ldequeue_take_delayed_preemption_upon_success:
// We just got here after executing PFZ code, check if we need a preemption
PREEMPT_SELF_THEN Ldequeue_success
Ldequeue_clear_monitor:
clrex // Pairs with the ldxr
// Take a preemption if needed then branch to dequeue_trylock_loop.
PREEMPT_SELF_THEN Ldequeue_trylock_loop
Ldequeue_success:
mov x0, x9 // Move x9 (where result was stored earlier) to x0
POP_FRAME
ARM64_STACK_EPILOG
/* void preempt_self(void)
*
* Make a syscall to take a preemption. This function is not in the PFZ.
*/
.align 2
_preempt_self:
ARM64_STACK_PROLOG
PUSH_FRAME
// Save registers on which will be clobbered by mach trap on stack and keep
// it 16 byte aligned
stp x0, x1, [sp, #-16]!
// Note: We don't need to caller save registers since svc will trigger an
// exception and kernel will save and restore register state
// Make syscall to take delayed preemption
mov x16, #-58 // -58 = pfz_exit
svc #0x80
// Restore registers from stack
ldp x0, x1, [sp], #16
POP_FRAME
ARM64_STACK_EPILOG
/*
* void backoff(uint32_t *lock_addr);
* The function returns when it observes that the lock has become available.
* This function is not in the PFZ.
*
* x0 = lock address
*/
.align 2
.globl _backoff
_backoff:
ARM64_STACK_PROLOG
PUSH_FRAME
cbz x15, Lno_preempt // Kernel doesn't want to preempt us, jump to loop
mov x15, xzr // zero out the preemption pending field
bl _preempt_self
Lno_preempt:
ldxr w9, [x0] // Snoop on lock and arm the monitor
cbz w9, Lend_backoff // The lock seems to be available, return
wfe // pause
b Lno_preempt
Lend_backoff:
clrex
POP_FRAME
ARM64_STACK_EPILOG