/*
* Copyright (c) 2008 Apple Computer, Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include <sys/appleapiopts.h>
#include <machine/cpu_capabilities.h>
#include <machine/commpage.h>
#include <mach/i386/syscall_sw.h>
/* PREEMPTION FREE ZONE (PFZ)
*
* A portion of the commpage is speacial-cased by the kernel to be "preemption free",
* ie as if we had disabled interrupts in user mode. This facilitates writing
* "nearly-lockless" code, for example code that must be serialized by a spinlock but
* which we do not want to preempt while the spinlock is held.
*
* The PFZ is implemented by collecting all the "preemption-free" code into a single
* contiguous region of the commpage. Register %ebx is used as a flag register;
* before entering the PFZ, %ebx is cleared. If some event occurs that would normally
* result in a premption while in the PFZ, the kernel sets %ebx nonzero instead of
* preempting. Then, when the routine leaves the PFZ we check %ebx and
* if nonzero execute a special "pfz_exit" syscall to take the delayed preemption.
*
* PFZ code must bound the amount of time spent in the PFZ, in order to control
* latency. Backward branches are dangerous and must not be used in a way that
* could inadvertently create a long-running loop.
*
* Because they cannot be implemented reasonably without a lock, we put the "atomic"
* FIFO enqueue and dequeue in the PFZ. As long as we don't take a page fault trying to
* access queue elements, these implementations behave nearly-locklessly.
* But we still must take a spinlock to serialize, and in case of page faults.
*/
/* Work around 10062261 with a dummy non-local symbol */
fifo_queue_dummy_symbol:
/*
* typedef volatile struct {
* void *opaque1; <-- ptr to first queue element or null
* void *opaque2; <-- ptr to last queue element or null
* int opaque3; <-- spinlock
* } OSFifoQueueHead;
*
* void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset);
*/
/* Subroutine to make a preempt syscall. Called when we notice %ebx is
* nonzero after returning from a PFZ subroutine.
* When we enter kernel:
* %edx = return address
* %ecx = stack ptr
* Destroys %eax, %ecx, and %edx.
*/
COMMPAGE_FUNCTION_START(preempt, 32, 4)
popl %edx // get return address
movl %esp,%ecx // save stack ptr here
movl $(-58),%eax /* 58 = pfz_exit */
xorl %ebx,%ebx // clear "preemption pending" flag
sysenter
COMMPAGE_DESCRIPTOR(preempt,_COMM_PAGE_PREEMPT)
/* Subroutine to back off if we cannot get the spinlock. Called
* after a few attempts inline in the PFZ subroutines. This code is
* not in the PFZ.
* %edi = ptr to queue head structure
* %ebx = preemption flag (nonzero if preemption pending)
* Destroys %eax.
*/
COMMPAGE_FUNCTION_START(backoff, 32, 4)
testl %ebx,%ebx // does kernel want to preempt us?
jz 1f // no
xorl %ebx,%ebx // yes, clear flag
pushl %edx // preserve regs used by preempt syscall
pushl %ecx
COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff)
popl %ecx
popl %edx
1:
pause // SMT-friendly backoff
cmpl $0,8(%edi) // sniff the lockword
jnz 1b // loop if still taken
ret // lockword is free, so reenter PFZ
COMMPAGE_DESCRIPTOR(backoff,_COMM_PAGE_BACKOFF)
/* Preemption-free-zone routine to FIFO Enqueue:
* %edi = ptr to queue head structure
* %esi = ptr to element to enqueue
* %edx = offset of link field in elements
* %ebx = preemption flag (kernel sets nonzero if we should preempt)
*/
COMMPAGE_FUNCTION_START(pfz_enqueue, 32, 4)
movl $0,(%edx,%esi) // zero forward link in new element
1:
xorl %eax, %eax
orl $-1, %ecx
lock
cmpxchgl %ecx, 8(%edi) // try to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock
jz 2f // got it
COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue)
jmp 1b // loop to try again
2:
movl 4(%edi),%eax // get ptr to last element in q
testl %eax,%eax // q null?
jnz 3f // no
movl %esi,(%edi) // q empty so this is first element
jmp 4f
3:
movl %esi,(%edx,%eax) // point to new element from last
4:
movl %esi,4(%edi) // new element becomes last in q
movl $0,8(%edi) // unlock spinlock
COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue)
COMMPAGE_DESCRIPTOR(pfz_enqueue,_COMM_PAGE_PFZ_ENQUEUE)
/* Preemption-free-zone routine to FIFO Dequeue:
* %edi = ptr to queue head structure
* %edx = offset of link field in elements
* %ebx = preemption flag (kernel sets nonzero if we should preempt)
*
* Returns with next element (or 0) in %eax.
*/
COMMPAGE_FUNCTION_START(pfz_dequeue, 32, 4)
1:
xorl %eax, %eax
orl $-1, %ecx
lock
cmpxchgl %ecx, 8(%edi) // try to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx, 8(%edi) // try 2nd time to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx, 8(%edi) // try 3rd time to take the spinlock
jz 2f // got it
COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue)
jmp 1b // loop to try again
2:
movl (%edi),%eax // get ptr to first element in q
testl %eax,%eax // q null?
jz 4f // yes
movl (%edx,%eax),%esi// get ptr to 2nd element in q
testl %esi,%esi // is there a 2nd element?
jnz 3f // yes
movl %esi,4(%edi) // clear "last" field of q head
3:
movl %esi,(%edi) // update "first" field of q head
4:
movl $0,8(%edi) // unlock spinlock
COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue)
COMMPAGE_DESCRIPTOR(pfz_dequeue,_COMM_PAGE_PFZ_DEQUEUE)
COMMPAGE_FUNCTION_START(ret, 32, 4)
ret
COMMPAGE_DESCRIPTOR(ret,_COMM_PAGE_RET)
/************************* x86_64 versions follow **************************/
/*
* typedef volatile struct {
* void *opaque1; <-- ptr to first queue element or null
* void *opaque2; <-- ptr to last queue element or null
* int opaque3; <-- spinlock
* } OSFifoQueueHead;
*
* void OSAtomicFifoEnqueue( OSFifoQueueHead *list, void *new, size_t offset);
*/
/* Subroutine to make a preempt syscall. Called when we notice %ebx is
* nonzero after returning from a PFZ subroutine. Not in PFZ.
*
* All registers preserved (but does clear the %ebx preemption flag).
*/
COMMPAGE_FUNCTION_START(preempt_64, 64, 4)
pushq %rax
pushq %rcx
pushq %r11
movl $(SYSCALL_CONSTRUCT_MACH(58)),%eax /* 58 = pfz_exit */
xorl %ebx,%ebx
syscall
popq %r11
popq %rcx
popq %rax
ret
COMMPAGE_DESCRIPTOR(preempt_64,_COMM_PAGE_PREEMPT)
/* Subroutine to back off if we cannot get the spinlock. Called
* after a few attempts inline in the PFZ subroutines. This code is
* not in the PFZ.
* %rdi = ptr to queue head structure
* %ebx = preemption flag (nonzero if preemption pending)
* Uses: %rax.
*/
COMMPAGE_FUNCTION_START(backoff_64, 64, 4)
testl %ebx,%ebx // does kernel want to preempt us?
jz 1f // no
COMMPAGE_CALL(_COMM_PAGE_PREEMPT,_COMM_PAGE_BACKOFF,backoff_64)
1:
pause // SMT-friendly backoff
cmpl $0,16(%rdi) // sniff the lockword
jnz 1b // loop if still taken
ret // lockword is free, so reenter PFZ
COMMPAGE_DESCRIPTOR(backoff_64,_COMM_PAGE_BACKOFF)
/* Preemption-free-zone routine to FIFO Enqueue:
* %rdi = ptr to queue head structure
* %rsi = ptr to new element to enqueue
* %rdx = offset of link field in elements
* %ebx = preemption flag (kernel sets nonzero if we should preempt)
*/
COMMPAGE_FUNCTION_START(pfz_enqueue_64, 64, 4)
movq $0,(%rdx,%rsi) // zero forward link in new element
1:
xorl %eax, %eax
orl $-1, %ecx
lock
cmpxchgl %ecx,16(%rdi) // try to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock
jz 2f // got it
COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64)
jmp 1b // loop to try again
2:
movq 8(%rdi),%rax // get ptr to last element in q
testq %rax,%rax // q null?
jnz 3f // no
movq %rsi,(%rdi) // q empty so this is first element
jmp 4f
3:
movq %rsi,(%rdx,%rax) // point to new element from last
4:
movq %rsi,8(%rdi) // new element becomes last in q
movl $0,16(%rdi) // unlock spinlock
COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_ENQUEUE,pfz_enqueue_64)
COMMPAGE_DESCRIPTOR(pfz_enqueue_64,_COMM_PAGE_PFZ_ENQUEUE)
/* Preemption-free-zone routine to FIFO Dequeue:
* %rdi = ptr to queue head structure
* %rdx = offset of link field in elements
* %ebx = preemption flag (kernel sets nonzero if we should preempt)
*
* Returns with next element (or 0) in %rax.
*/
COMMPAGE_FUNCTION_START(pfz_dequeue_64, 64, 4)
1:
xorl %eax, %eax
orl $-1, %ecx
lock
cmpxchgl %ecx,16(%rdi) // try to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx,16(%rdi) // try 2nd time to take the spinlock
jz 2f // got it
pause
xorl %eax, %eax
lock
cmpxchgl %ecx,16(%rdi) // try 3rd time to take the spinlock
jz 2f // got it
COMMPAGE_CALL(_COMM_PAGE_BACKOFF,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64)
jmp 1b // loop to try again
2:
movq (%rdi),%rax // get ptr to first element in q
testq %rax,%rax // q null?
jz 4f // yes
movq (%rdx,%rax),%rsi// get ptr to 2nd element in q
testq %rsi,%rsi // is there a 2nd element?
jnz 3f // yes
movq %rsi,8(%rdi) // no - clear "last" field of q head
3:
movq %rsi,(%rdi) // update "first" field of q head
4:
movl $0,16(%rdi) // unlock spinlock
COMMPAGE_JMP(_COMM_PAGE_RET,_COMM_PAGE_PFZ_DEQUEUE,pfz_dequeue_64)
COMMPAGE_DESCRIPTOR(pfz_dequeue_64,_COMM_PAGE_PFZ_DEQUEUE)
COMMPAGE_FUNCTION_START(ret_64, 64, 4)
ret
COMMPAGE_DESCRIPTOR(ret_64,_COMM_PAGE_RET)