This is xnu-8019. See this file in:
/*
 * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include <sys/appleapiopts.h>
#include <machine/cpu_capabilities.h>

#if defined(__i386__)

/* return mach_absolute_time in %edx:%eax
 *
 * The algorithm we use is:
 *
 *	ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
 *
 * rnt_shift, a constant computed during initialization, is the smallest value for which:
 *
 *  (tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
 *
 * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreq is greater
 * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
 *
 *	rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
 */

	.globl _mach_absolute_time
_mach_absolute_time:
	pushl	%ebp
	movl	%esp,%ebp
	pushl	%esi
	pushl	%ebx

0:
	movl	_COMM_PAGE_NT_GENERATION,%esi	/* get generation (0 if being changed) */
	testl	%esi,%esi			/* if being updated, loop until stable */
	jz	0b

	lfence
	rdtsc					/* get TSC in %edx:%eax */
	lfence

	subl	_COMM_PAGE_NT_TSC_BASE,%eax
	sbbl	_COMM_PAGE_NT_TSC_BASE+4,%edx
    
    /*
     * Prior to supporting "slow" processors, xnu always set _NT_SHIFT to 32.
     * Now it defaults to 0, unless the processor is slow.  The shifts
     * below implicitly mask the count down to 5 bits, handling either default.
     */
	movl    _COMM_PAGE_NT_SHIFT,%ecx
	shldl   %cl,%eax,%edx			/* shift %edx left, filling in from %eax */
	shll    %cl,%eax			/* finish shifting %edx:%eax left by _COMM_PAGE_NT_SHIFT bits */

	movl	_COMM_PAGE_NT_SCALE,%ecx

	movl	%edx,%ebx
	mull	%ecx
	movl	%ebx,%eax
	movl	%edx,%ebx
	mull	%ecx
	addl	%ebx,%eax
	adcl	$0,%edx

	addl	_COMM_PAGE_NT_NS_BASE,%eax
	adcl	_COMM_PAGE_NT_NS_BASE+4,%edx

	cmpl	_COMM_PAGE_NT_GENERATION,%esi	/* have the parameters changed? */
	jne	0b				/* yes, loop until stable */

	popl	%ebx
	popl	%esi
	popl	%ebp
	ret

#elif defined(__x86_64__)

/*
 * 64-bit version _mach_absolute_time.  We return the 64-bit nanotime in %rax.
 *
 * The algorithm we use is:
 *
 *	ns = ((((rdtsc - rnt_tsc_base)<<rnt_shift)*rnt_tsc_scale) / 2**32) + rnt_ns_base;
 *
 * rnt_shift, a constant computed during initialization, is the smallest value for which:
 *
 *	tscFreq << rnt_shift) > SLOW_TSC_THRESHOLD
 *
 * Where SLOW_TSC_THRESHOLD is about 10e9.  Since most processor's tscFreqs are greater
 * than 1GHz, rnt_shift is usually 0.  rnt_tsc_scale is also a 32-bit constant:
 *
 *	rnt_tsc_scale = (10e9 * 2**32) / (tscFreq << rnt_shift);
 *
 */
	.globl	_mach_absolute_time
_mach_absolute_time:
	pushq	%rbp				// set up a frame for backtraces
	movq	%rsp,%rbp
	movq	$(_COMM_PAGE_TIME_DATA_START),%rsi
1:
	movl	_NT_GENERATION(%rsi),%r8d	// get generation
	testl	%r8d,%r8d			// if 0, data is being changed...
	jz      1b				// ...so loop until stable
	lfence
	rdtsc					// edx:eax := tsc
	lfence
	shlq	$32,%rdx			// rax := ((edx << 32) | eax), ie 64-bit tsc
	orq     %rdx,%rax

    /*
     * Prior to supporting "slow" processors, xnu always set _NT_SHIFT to 32.
     * Now it defaults to 0, unless the processor is slow.  In order to maintain
     * compatibility with both old and new versions of xnu, we mask the shift
     * down to 0x1F, which maps the old default (32) into the new default (0).
     */
	movl    _NT_SHIFT(%rsi),%ecx
	andl    $0x1F,%ecx			// *** remove this line once 10.9 is GM ***
	subq	_NT_TSC_BASE(%rsi), %rax	// rax := (tsc - base_tsc)
	shlq    %cl,%rax			// rax := (tsc - base_tsc) << NT_SHIFT
	movl	_NT_SCALE(%rsi),%ecx
	mulq	%rcx				// rdx:rax := ((tsc - base_tsc)<<shift) * scale
	shrdq	$32,%rdx,%rax			// divide by 2**32
	addq	_NT_NS_BASE(%rsi),%rax		// (((tsc - base_tsc) * scale) >> 32) + ns_base
	
	cmpl	_NT_GENERATION(%rsi),%r8d	// did the data change during computation?
	jne     1b
	popq	%rbp
	ret

#elif defined(__arm__)

#include <mach/arm/syscall_sw.h>
#include <mach/arm/traps.h>

/*
 * If userspace access to the timebase is supported (indicated through the commpage),
 * directly reads the timebase and uses it and the current timebase offset (also in
 * the commpage, and updated whenever the system wakes from sleep) to construct the
 * current time value; otherwise, traps to the kernel to handle this.
 *
 * If we do this in user mode, there are two cases where we may need to redrive the
 * read.  We do 3 reads (high-low-high) to the timebase, because we only have a
 * 32-bit interface to it (despite the use of mrrc).  If the high bits change, we
 * need to reread the register (as our returned value could otherwise be off by
 * 2^32 mach absolute time units).
 *
 * We do two reads of the offset, before and after the register reads.  If the offset
 * changes, we have gone to sleep in the midst of doing a read.  This case should be
 * exceedingly rare, but could result in a terribly inaccurate result, so we need
 * to get a fresh timebase value.
 */
	.text
	.align 2
	.globl _mach_absolute_time
_mach_absolute_time:
	movw	ip, #((_COMM_PAGE_TIMEBASE_OFFSET) & 0x0000FFFF)
	movt	ip, #(((_COMM_PAGE_TIMEBASE_OFFSET) >> 16) & 0x0000FFFF)
	ldrb	r0, [ip, #((_COMM_PAGE_USER_TIMEBASE) - (_COMM_PAGE_TIMEBASE_OFFSET))]
	cmp	r0, #USER_TIMEBASE_NONE		// Are userspace reads supported?
	beq	_mach_absolute_time_kernel	// If not, go to the kernel
	isb					// Prevent speculation on CNTVCT across calls
						// (see ARMV7C.b section B8.1.2, ARMv8 section D6.1.2)
	push	{r4, r5, r7, lr}		// Push a frame
	add	r7, sp, #8
L_mach_absolute_time_user:
	ldr	r4, [ip]			// Load offset low bits
	ldr	r5, [ip, #4]			// Load offset high bits
	mrrc	p15, 0, r3, r1, c14		// Read timebase high to r1
	mrrc	p15, 0, r0, r3, c14		// Read timebase low to r0
	mrrc	p15, 0, r3, r2, c14		// Read timebase high to r2
	cmp	r1, r2				// Did the high bits change?
	bne	L_mach_absolute_time_user	// Loop if timebase high changed
	ldr	r2, [ip]			// Load offset low bits
	ldr	r3, [ip, #4]			// Load offset high bits
	eor	r4, r2				// Compare our offset values...
	eor	r5, r3
	orrs	r5, r4
	bne	L_mach_absolute_time_user	// If they changed, try again
	adds	r0, r0, r2			// Construct mach_absolute_time
	adcs	r1, r1, r3
	pop	{r4, r5, r7, pc}		// Pop the frame

	.text
	.align 2
	.globl _mach_absolute_time_kernel
_mach_absolute_time_kernel:
	mov	r12, #MACH_ARM_TRAP_ABSTIME	// Load the magic MAT number
	swi	#SWI_SYSCALL
	bx	lr

	.text
	.align 2
	.globl _mach_continuous_time_kernel
_mach_continuous_time_kernel:
	mov	r12, #MACH_ARM_TRAP_CONTTIME	// Load the magic MCT number
	swi	#SWI_SYSCALL
	bx	lr

#elif defined(__arm64__)

#include <mach/arm/syscall_sw.h>
#include <mach/arm/traps.h>

.macro CALC_MACH_ABSOLUTE_TIME reg
1:
	ldr	x1, [x3]			// Load the offset
	mrs	x0, \reg			// Read the timebase
	ldr	x2, [x3]			// Load the offset
	cmp	x1, x2				// Compare our offset values...
	b.ne	1b				// If they changed, try again
	add	x0, x0, x1			// Construct mach_absolute_time
.endm

/*
 * If userspace access to the timebase is supported (indicated through the commpage),
 * directly reads the timebase and uses it and the current timebase offset (also in
 * the commpage, and updated whenever the system wakes from sleep) to construct the
 * current time value; otherwise, traps to the kernel to handle this.
 *
 * If we do this in user mode, we do two reads of the offset, before and after we
 * read the register.  If the offset changes, we have gone to sleep in the midst of
 * doing a read.  This case should be exceedingly rare, but could result in a terribly
 * inaccurate result, so we need to get a fresh timebase value.
 */
	.text
	.align 2
	.globl _mach_absolute_time
_mach_absolute_time:
	movk	x3, #(((_COMM_PAGE_TIMEBASE_OFFSET) >> 48) & 0x000000000000FFFF), lsl #48
	movk	x3, #(((_COMM_PAGE_TIMEBASE_OFFSET) >> 32) & 0x000000000000FFFF), lsl #32
	movk	x3, #(((_COMM_PAGE_TIMEBASE_OFFSET) >> 16) & 0x000000000000FFFF), lsl #16
	movk	x3, #((_COMM_PAGE_TIMEBASE_OFFSET) & 0x000000000000FFFF)
	ldrb	w2, [x3, #((_COMM_PAGE_USER_TIMEBASE) - (_COMM_PAGE_TIMEBASE_OFFSET))]

	cmp	x2, #USER_TIMEBASE_NONE		// Are userspace reads supported?
	b.eq	_mach_absolute_time_kernel	// If not, go to the kernel
	cmp	x2, #USER_TIMEBASE_NOSPEC
	b.eq	L_mach_absolute_time_user_nospec
	// Fallthrough to USER_TIMEBASE_SPEC case below

	isb					// Prevent speculation on CNTVCT across calls
	CALC_MACH_ABSOLUTE_TIME	CNTVCT_EL0	// (see ARMV7C.b section B8.1.2, ARMv8 section D6.1.2)
	ret

L_mach_absolute_time_user_nospec:
#define CNTVCTSS_EL0    S3_3_c14_c0_6
	CALC_MACH_ABSOLUTE_TIME	CNTVCTSS_EL0
	ret



	.text
	.align 2
	.globl _mach_absolute_time_kernel
_mach_absolute_time_kernel:
	mov	w16, #MACH_ARM_TRAP_ABSTIME	// Load the magic MAT number
	svc	#SWI_SYSCALL
	ret

	.text
	.align 2
	.globl _mach_continuous_time_kernel
_mach_continuous_time_kernel:
	mov	w16, #MACH_ARM_TRAP_CONTTIME	// Load the magic MCT number
	svc	#SWI_SYSCALL
	ret

#else
#error Unsupported architecture
#endif