This is xnu-11215.1.10. See this file in:
/*
 * Copyright (c) 2010-2020 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
#include <i386/asm.h>
#include <assym.s>
#include <debug.h>
#include "dwarf_unwind.h"
#include <i386/eflags.h>
#include <i386/rtclock_asm.h>
#include <i386/trap.h>
#define _ARCH_I386_ASM_HELP_H_	/* Prevent inclusion of user header */
#include <mach/i386/syscall_sw.h>
#include <i386/postcode.h>
#include <i386/proc_reg.h>
#include <mach/exception_types.h>

#if DEBUG
#define	DEBUG_IDT64 		1	
#endif

/*
 * This is the low-level trap and interrupt handling code associated with
 * the IDT. It also includes system call handlers for sysenter/syscall.
 * The IDT itself is defined in mp_desc.c.
 *
 * Code here is structured as follows:
 *
 * stubs 	Code called directly from an IDT vector.
 *		All entry points have the "idt64_" prefix and they are built
 *		using macros expanded by the inclusion of idt_table.h.
 *		This code performs vector-dependent identification and jumps
 * 		into the dispatch code.
 *
 * dispatch	The dispatch code is responsible for saving the thread state
 *		(which is either 64-bit or 32-bit) and then jumping to the
 *		class handler identified by the stub.
 *
 * returns	Code to restore state and return to the previous context.
 *
 * handlers	There are several classes of handlers:
 *   interrupt	- asynchronous events typically from external devices
 *   trap	- synchronous events due to thread execution
 *   syscall	- synchronous system call request
 *   fatal	- fatal traps
 */
/*
 * Indices of handlers for each exception type.
 */
#define	HNDL_ALLINTRS		0
#define	HNDL_ALLTRAPS		1
#define	HNDL_SYSENTER		2
#define	HNDL_SYSCALL		3
#define	HNDL_UNIX_SCALL		4
#define	HNDL_MACH_SCALL		5
#define	HNDL_MDEP_SCALL		6
#define	HNDL_DOUBLE_FAULT	7
#define	HNDL_MACHINE_CHECK	8

	
/* Begin double-mapped descriptor section */
	
.section	__HIB, __desc
.globl EXT(idt64_hndl_table0)
EXT(idt64_hndl_table0):
/* 0x00 */	.quad EXT(ks_dispatch)
/* 0x08 */	.quad EXT(ks_64bit_return)
/* 0x10 */	.quad 0 /* Populated with CPU shadow displacement*/
/* 0x18 */	.quad EXT(ks_32bit_return)
#define	TBL0_OFF_DISP_USER_WITH_POPRAX	0x20
/* 0x20 */	.quad EXT(ks_dispatch_user_with_pop_rax)
#define	TBL0_OFF_DISP_KERN_WITH_POPRAX	0x28
/* 0x28 */	.quad EXT(ks_dispatch_kernel_with_pop_rax)
#define	TBL0_OFF_PTR_KERNEL_STACK_MASK	0x30
/* 0x30 */	.quad 0 /* &kernel_stack_mask */

EXT(idt64_hndl_table1):
	.quad	EXT(hndl_allintrs)
	.quad	EXT(hndl_alltraps)
	.quad	EXT(hndl_sysenter)
	.quad	EXT(hndl_syscall)
	.quad	EXT(hndl_unix_scall)
	.quad	EXT(hndl_mach_scall)
	.quad	EXT(hndl_mdep_scall)
	.quad	EXT(hndl_double_fault)
	.quad	EXT(hndl_machine_check)
.text


/* The wrapper for all non-special traps/interrupts */
/* Everything up to PUSH_FUNCTION is just to output 
 * the interrupt number out to the postcode display
 */
#if DEBUG_IDT64
#define IDT_ENTRY_WRAPPER(n, f)			 \
	push	%rax				;\
	POSTCODE2(0x6400+n)			;\
	pop	%rax				;\
	pushq	$(f)				;\
	pushq	$(n)				;\
	jmp L_dispatch
#else
#define IDT_ENTRY_WRAPPER(n, f)			 \
	pushq	$(f)				;\
	pushq	$(n)				;\
	jmp L_dispatch
#endif

/* A trap that comes with an error code already on the stack */
#define TRAP_ERR(n, f)				 \
	Entry(f)				;\
	IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)

/* A normal trap */
#define TRAP(n, f)				 \
	Entry(f)				;\
	pushq	$0          			;\
	IDT_ENTRY_WRAPPER(n, HNDL_ALLTRAPS)

#define USER_TRAP TRAP

/* An interrupt */
#define INTERRUPT(n)			 	\
	Entry(_intr_ ## n)			;\
	pushq	$0          			;\
	IDT_ENTRY_WRAPPER(n, HNDL_ALLINTRS)

/* A trap with a special-case handler, hence we don't need to define anything */
#define TRAP_SPC(n, f)
#define TRAP_IST1(n, f)
#define TRAP_IST2(n, f)
#define USER_TRAP_SPC(n, f)

/* Begin double-mapped text section */
.section __HIB, __text
/* Generate all the stubs */
#include "idt_table.h"

Entry(idt64_page_fault)
	pushq	$(HNDL_ALLTRAPS)
#if !(DEVELOPMENT || DEBUG)
	pushq	$(T_PAGE_FAULT)
	jmp	L_dispatch
#else
	pushq	$(T_PAGE_FAULT)

	pushq	%rax
	pushq	%rbx
	pushq	%rcx
	testb	$3, 8+8+8+ISF64_CS(%rsp)	/* Coming from userspace? */
	jz	L_pfkern		/* No? (relatively uncommon), goto L_pfkern */

	/*
	 * We faulted from the user; if the fault address is at the user's %rip,
	 * abort trying to save the cacheline since that adds another page fault's
	 * overhead when we recover, below.
	 */
	movq	8+8+8+ISF64_RIP(%rsp), %rbx
	movq	%cr2, %rcx
	cmpq	%rbx, %rcx

	/* note that the next 3 instructions do not affect RFLAGS */
	swapgs
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	mov	16(%rax), %rax	/* Offset of per-CPU shadow */

	jne	L_dispatch_from_user_with_rbx_rcx_pushes
	jmp	abort_rip_cacheline_read

L_pfkern:
	/*
	 * Kernel page fault
	 * If the fault occurred on while reading from the user's code cache line, abort the cache line read;
	 * otherwise, treat this as a regular kernel fault
	 */
	movq	8+8+8+ISF64_RIP(%rsp), %rbx
	leaq	rip_cacheline_read(%rip), %rcx
	cmpq	%rcx, %rbx
	jb	regular_kernel_page_fault
	leaq	rip_cacheline_read_end(%rip), %rcx
	cmpq	%rcx, %rbx
	jbe	L_pf_on_clread	/* Did we hit a #PF within the cacheline read? */

regular_kernel_page_fault:
	/* No, regular kernel #PF */
	popq	%rcx
	popq	%rbx
	jmp	L_dispatch_from_kernel_no_push_rax

L_pf_on_clread:
	/*
	 * We faulted while trying to read user instruction memory at the parent fault's %rip; abort that action by
	 * changing the return address on the stack, restoring cr2 to its previous value, peeling off the pushes we
	 * added on entry to the page fault handler, then performing an iretq
	 */
	popq	%rcx
	movq	%rcx, %cr2
	popq	%rbx
	leaq	abort_rip_cacheline_read(%rip), %rax
	movq	%rax, 8+ISF64_RIP(%rsp)
	popq	%rax
	addq	$24, %rsp	/* pop the 2 pushes + the error code */
	iretq			/* Resume previous trap/fault processing */
#endif /* !(DEVELOPMENT || DEBUG) */

/*
 * #DB handler, which runs on IST1, will treat as spurious any #DB received while executing in the
 * kernel while not on the kernel's gsbase.
 */
Entry(idt64_debug)
	/* Synthesize common interrupt stack frame */
	push	$0			/* error code */
	pushq	$(HNDL_ALLTRAPS)
	pushq	$(T_DEBUG)
	/* Spill prior to RDMSR */
	push	%rax
	push	%rcx
	push	%rdx
	mov	$(MSR_IA32_GS_BASE), %ecx
	rdmsr					/* Check contents of GSBASE MSR */
	test	$0x80000000, %edx		/* MSB set? Already swapped to kernel's */
	jnz	1f

	/*
	 * If we're not already swapped to the kernel's gsbase AND this #DB originated from kernel space,
	 * it must have happened within the very small window on entry or exit before or after (respectively)
	 * swapgs occurred.  In those cases, consider the #DB spurious and immediately return.
	 */
	testb	$3, 8+8+8+ISF64_CS(%rsp)
	jnz	2f
	pop	%rdx
	pop	%rcx
	pop	%rax
	addq	$0x18, %rsp	/* Remove synthesized interrupt stack frame */
	jmp	EXT(ret64_iret)
2:
	swapgs					/* direct from user */
1:
	pop	%rdx

	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	mov	16(%rax), %rax /* Offset of per-CPU shadow */

	mov	%gs:CPU_SHADOWTASK_CR3(%rax), %rax
	mov	%rax, %cr3

	pop	%rcx

	/* Note that %rax will be popped from the stack in ks_dispatch, below */

	leaq    EXT(idt64_hndl_table0)(%rip), %rax
	jmp	*(%rax)

/*
 * Legacy interrupt gate System call handlers.
 * These are entered via a syscall interrupt. The system call number in %rax
 * is saved to the error code slot in the stack frame. We then branch to the
 * common state saving code.
 */

#ifndef UNIX_INT
#error NO UNIX INT!!!
#endif
Entry(idt64_unix_scall)
	pushq	%rax			/* save system call number */
	pushq	$(HNDL_UNIX_SCALL)
	pushq	$(UNIX_INT)
	jmp	L_u64bit_entry_check
	
Entry(idt64_mach_scall)
	pushq	%rax			/* save system call number */
	pushq	$(HNDL_MACH_SCALL)
	pushq	$(MACH_INT)
	jmp	L_u64bit_entry_check
	
Entry(idt64_mdep_scall)
	pushq	%rax			/* save system call number */
	pushq	$(HNDL_MDEP_SCALL)
	pushq	$(MACHDEP_INT)
	jmp	L_u64bit_entry_check

/*
 * For GP/NP/SS faults, we use the IST1 stack.
 * For faults from user-space, we have to copy the machine state to the
 * PCB stack and then dispatch as normal.
 * For faults in kernel-space, we need to scrub for kernel exit faults and
 * treat these as user-space faults. But for all other kernel-space faults
 * we continue to run on the IST1 stack as we dispatch to handle the fault
 * as fatal.
 */
Entry(idt64_segnp)
	pushq	$(HNDL_ALLTRAPS)
	pushq	$(T_SEGMENT_NOT_PRESENT)
	jmp	L_check_for_kern_flt

Entry(idt64_gen_prot)
	pushq	$(HNDL_ALLTRAPS)
	pushq	$(T_GENERAL_PROTECTION)
	jmp	L_check_for_kern_flt

Entry(idt64_stack_fault)
	pushq	$(HNDL_ALLTRAPS)
	pushq	$(T_STACK_FAULT)
	jmp	L_check_for_kern_flt

L_check_for_kern_flt:
	/*
	 * If we took a #GP or #SS from the kernel, check if we took them
	 * from either ret32_iret or ret64_iret.  If we did, we need to
	 * jump into L_dispatch at the swapgs so that the code in L_dispatch
	 * can proceed with the correct GSbase.
	 */
	pushq	%rax
	testb	$3, 8+ISF64_CS(%rsp)
	jnz	L_dispatch_from_user_no_push_rax		/* Fault from user, go straight to dispatch */

	/* Check if the fault occurred in the 32-bit segment restoration window (which executes with user gsb) */
	leaq	L_32bit_seg_restore_begin(%rip), %rax
	cmpq	%rax, 8+ISF64_RIP(%rsp)
	jb	L_not_32bit_segrestores
	leaq	L_32bit_seg_restore_done(%rip), %rax
	cmpq	%rax, 8+ISF64_RIP(%rsp)
	jae	L_not_32bit_segrestores
	jmp	1f
L_not_32bit_segrestores:
	leaq	EXT(ret32_iret)(%rip), %rax
	cmpq	%rax, 8+ISF64_RIP(%rsp)
	je	1f
	leaq	EXT(ret64_iret)(%rip), %rax
	cmpq	%rax, 8+ISF64_RIP(%rsp)
	je	1f
	jmp	L_dispatch_from_kernel_no_push_rax
	/*
	 * We hit the fault on iretq, so check the original return %cs.  If
	 * it's a user %cs, fixup the stack and then jump to dispatch..
	 *
	 * With this type of fault, the stack is layed-out as follows:
	 *
	 * 
	 * orig %ss      saved_rsp+32
	 * orig %rsp     saved_rsp+24
	 * orig %rflags  saved_rsp+16
	 * orig %cs      saved_rsp+8
	 * orig %rip     saved_rsp
         * ^^^^^^^^^ (maybe on another stack, since we switched to IST1)
	 * %ss           +64            -8
	 * saved_rsp     +56           -16
	 * %rflags       +48           -24
	 * %cs           +40           -32
	 * %rip          +32           -40
	 * error code    +24           -48
	 * hander        +16           -56
	 * trap number   +8            -64
	 * <saved %rax>  <== %rsp      -72
	 */
1:
	pushq	%rbx
	movq	16+ISF64_RSP(%rsp), %rbx
	movq	ISF64_CS-24(%rbx), %rax
	testb	$3, %al					/* If the original return destination was to user */
	jnz	2f
	popq	%rbx
	jmp	L_dispatch_from_kernel_no_push_rax	/* Fault occurred when trying to return to kernel */
2:
	/*
	 * Fix the stack so the original trap frame is current, then jump to dispatch
	 */

	movq	%rax, 16+ISF64_CS(%rsp)

	movq	ISF64_RSP-24(%rbx), %rax
	movq	%rax, 16+ISF64_RSP(%rsp)

	movq	ISF64_RIP-24(%rbx), %rax
	movq	%rax, 16+ISF64_RIP(%rsp)

	movq	ISF64_SS-24(%rbx), %rax
	movq	%rax, 16+ISF64_SS(%rsp)

	movq	ISF64_RFLAGS-24(%rbx), %rax
	movq	%rax, 16+ISF64_RFLAGS(%rsp)

	popq	%rbx
	jmp	L_dispatch_from_user_no_push_rax


/*
 * Fatal exception handlers:
 */
Entry(idt64_db_task_dbl_fault)
	pushq	$(HNDL_DOUBLE_FAULT)
	pushq	$(T_DOUBLE_FAULT)
	jmp	L_dispatch

Entry(idt64_db_task_stk_fault)
	pushq	$(HNDL_DOUBLE_FAULT)
	pushq	$(T_STACK_FAULT)
	jmp	L_dispatch

Entry(idt64_mc)
	push	$(0)			/* Error */
	pushq	$(HNDL_MACHINE_CHECK)
	pushq	$(T_MACHINE_CHECK)
	jmp	L_dispatch

/*
 * NMI
 * This may or may not be fatal but extreme care is required
 * because it may fall when control was already in another trampoline.
 *
 * We get here on IST2 stack which is used exclusively for NMIs.
 * Machine checks, doublefaults and similar use IST1
 */
Entry(idt64_nmi)
	push	%rax
	push	%rcx
	push	%rdx
	testb	$3, ISF64_CS(%rsp)
	jz	1f

	/* From user-space: copy interrupt state to user PCB */
	swapgs

	leaq    EXT(idt64_hndl_table0)(%rip), %rax
	mov     16(%rax), %rax /* Offset of per-CPU shadow */
	mov     %gs:CPU_SHADOWTASK_CR3(%rax), %rax
	mov     %rax, %cr3			/* note that SMAP is enabled in L_common_dispatch (on Broadwell+) */

	mov	%gs:CPU_UBER_ISF, %rcx		/* PCB stack addr */
	add	$(ISF64_SIZE), %rcx		/* adjust to base of ISF */

	leaq    TBL0_OFF_DISP_USER_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax		/* ks_dispatch_user_with_pop_rax */
	jmp	4f						/* Copy state to PCB */

1:
	/*
	 * From kernel-space:
	 * Determine whether the kernel or user GS is set.
	 * Sets the high 32 bits of the return CS to 1 to ensure that we'll swapgs back correctly at IRET.
	 */
	mov	$(MSR_IA32_GS_BASE), %ecx
	rdmsr					/* read kernel gsbase */
	test	$0x80000000, %edx		/* test MSB of address */
	jnz	2f
	swapgs					/* so swap */
	movl	$1, ISF64_CS+4(%rsp)		/* and set flag in CS slot */
2:

	leaq    EXT(idt64_hndl_table0)(%rip), %rax
	mov     16(%rax), %rax /* Offset of per-CPU shadow */
	mov	%cr3, %rdx
	mov     %gs:CPU_SHADOWTASK_CR3(%rax), %rax
	mov     %rax, %cr3 /* Unconditionally switch to primary kernel pagetables */

	/*
	 * Determine whether we're on the kernel or interrupt stack
	 * when the NMI hit.
	 */
	mov	ISF64_RSP(%rsp), %rcx
	mov	%gs:CPU_KERNEL_STACK, %rax
	xor	%rcx, %rax
	movq	TBL0_OFF_PTR_KERNEL_STACK_MASK+EXT(idt64_hndl_table0)(%rip), %rdx
	mov	(%rdx), %rdx		/* Load kernel_stack_mask */
	and	%rdx, %rax
	test	%rax, %rax		/* are we on the kernel stack? */
	jz	3f			/* yes */

	mov	%gs:CPU_INT_STACK_TOP, %rax
	cmp	%rcx, %rax		/* are we on the interrupt stack? */
	jb	5f			/* no */
	leaq	-INTSTACK_SIZE(%rax), %rax
	cmp	%rcx, %rax
	jb	3f			/* yes */
5:
	mov    %gs:CPU_KERNEL_STACK, %rcx
3:
	/* 16-byte-align kernel/interrupt stack for state push */
	and	$0xFFFFFFFFFFFFFFF0, %rcx

	leaq    TBL0_OFF_DISP_KERN_WITH_POPRAX+EXT(idt64_hndl_table0)(%rip), %rax		/* ks_dispatch_kernel_with_pop_rax */
4:
	/*
	 * Copy state from NMI stack (RSP) to the save area (RCX) which is
	 * the PCB for user or kernel/interrupt stack from kernel.
	 * ISF64_ERR(RSP)    saved RAX
	 * ISF64_TRAPFN(RSP) saved RCX
	 * ISF64_TRAPNO(RSP) saved RDX
	 */
	xchg	%rsp, %rcx			/* set for pushes */
	push	ISF64_SS(%rcx)
	push	ISF64_RSP(%rcx)
	push	ISF64_RFLAGS(%rcx)
	push	ISF64_CS(%rcx)
	push	ISF64_RIP(%rcx)
	/* Synthesize common interrupt stack frame */
	push	$(0)				/* error code 0 */
	push	$(HNDL_ALLINTRS)		/* trapfn allintrs */
	push	$(T_NMI)			/* trapno T_NMI */
	push	ISF64_ERR(%rcx)			/* saved %rax is popped in ks_dispatch_{kernel|user}_with_pop_rax */
	mov	ISF64_TRAPNO(%rcx), %rdx
	mov	ISF64_TRAPFN(%rcx), %rcx

	jmp	*(%rax)		/* ks_dispatch_{kernel|user}_with_pop_rax */

Entry(idt64_double_fault)
	pushq	$(HNDL_DOUBLE_FAULT)
	pushq	$(T_DOUBLE_FAULT)
	jmp	L_dispatch

Entry(hi64_syscall)
Entry(idt64_syscall)
	swapgs
     /* Use RAX as a temporary by shifting its contents into R11[32:63]
      * The systemcall number is defined to be a 32-bit quantity, as is
      * RFLAGS.
      */
	shlq	$32, %rax
	or 	%rax, %r11
.globl EXT(dblsyscall_patch_point)
EXT(dblsyscall_patch_point):
//	movabsq	$0x12345678ABCDEFFFULL, %rax
     /* Generate offset to the double-mapped per-CPU data shadow
      * into RAX
      */
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	mov	16(%rax), %rax
	mov     %rsp, %gs:CPU_UBER_TMP(%rax)  /* save user stack */
	mov     %gs:CPU_ESTACK(%rax), %rsp  /* switch stack to per-cpu estack */
	sub	$(ISF64_SIZE), %rsp

	/*
	 * Synthesize an ISF frame on the exception stack
	 */
	movl	$(USER_DS), ISF64_SS(%rsp)
	mov	%rcx, ISF64_RIP(%rsp)		/* rip */

	mov	%gs:CPU_UBER_TMP(%rax), %rcx
	mov	%rcx, ISF64_RSP(%rsp)		/* user stack --changed */

	mov	%r11, %rax
	shrq	$32, %rax		/* Restore RAX */
	mov	%r11d, %r11d		/* Clear r11[32:63] */

	mov	%r11, ISF64_RFLAGS(%rsp)	/* rflags */
	movl	$(SYSCALL_CS), ISF64_CS(%rsp)	/* cs - a pseudo-segment */
	mov	%rax, ISF64_ERR(%rsp)		/* err/rax - syscall code */
	movq	$(HNDL_SYSCALL), ISF64_TRAPFN(%rsp)
	movq	$(T_SYSCALL), ISF64_TRAPNO(%rsp)	/* trapno */
	swapgs
	jmp	L_dispatch			/* this can only be 64-bit */

Entry(hi64_sysenter)
Entry(idt64_sysenter)
	/* Synthesize an interrupt stack frame onto the
	 * exception stack.
	 */
	push	$(USER_DS)		/* ss */
	push	%rcx			/* uesp */
	pushf				/* flags */
	/*
	 * Clear, among others, the Nested Task (NT) flags bit;
	 * this is zeroed by INT, but not by SYSENTER.
	 */
	push	$0
	popf
	push	$(SYSENTER_CS)		/* cs */ 
L_sysenter_continue:
	push	%rdx			/* eip */
	push	%rax			/* err/eax - syscall code */
	pushq	$(HNDL_SYSENTER)
	pushq	$(T_SYSENTER)
	orl	$(EFL_IF), ISF64_RFLAGS(%rsp)
	jmp	L_u64bit_entry_check

#if DEVELOPMENT || DEBUG
do_cacheline_stash:
	/*
	 * Copy the cache line that includes the user's EIP/RIP into the shadow cpu structure
	 * for later extraction/sanity-checking in user_trap().
	 */

	pushq	%rbx
	pushq	%rcx
L_dispatch_from_user_with_rbx_rcx_pushes:
	movq	8+8+8+ISF64_RIP(%rsp), %rbx
	andq	$-64, %rbx	/* Round address to cacheline boundary */
	pushf
	/*
	 * disable SMAP, if it's enabled (note that CLAC is present in BDW and later only, so we're
	 * using generic instructions here without checking whether the CPU supports SMAP first)
	 */
	orq	$(1 << 18), (%rsp)
	popf
	/*
	 * Note that we only check for a faulting read on the first read, since if the first read
	 * succeeds, the rest of the cache line should also be readible since we are running with
	 * interrupts disabled here and a TLB invalidation cannot sneak in and pull the rug out.
	 */
	movq	%cr2, %rcx	/* stash the original %cr2 in case the first cacheline read triggers a #PF */
				/* This value of %cr2 is restored in the page fault handler if it detects */
				/* that the fault occurrent on the next instruction, so the original #PF can */
				/* continue to be handled without issue. */
rip_cacheline_read:
	mov	(%rbx), %rcx
	/* Note that CPU_RTIMES in the shadow cpu struct was just a convenient place to stash the cacheline */
	mov	%rcx, %gs:CPU_RTIMES(%rax)
	movq    %cr2, %rcx
	mov	8(%rbx), %rcx
	mov	%rcx, %gs:8+CPU_RTIMES(%rax)
	movq    %cr2, %rcx
	mov	16(%rbx), %rcx
	mov	%rcx, %gs:16+CPU_RTIMES(%rax)
	movq    %cr2, %rcx
	mov	24(%rbx), %rcx
	mov	%rcx, %gs:24+CPU_RTIMES(%rax)
	movq    %cr2, %rcx
	mov	32(%rbx), %rcx
	mov	%rcx, %gs:32+CPU_RTIMES(%rax)
	movq    %cr2, %rcx
	mov	40(%rbx), %rcx
	mov	%rcx, %gs:40+CPU_RTIMES(%rax)
	movq    %cr2, %rcx
	mov	48(%rbx), %rcx
	mov	%rcx, %gs:48+CPU_RTIMES(%rax)
	movq    %cr2, %rcx
rip_cacheline_read_end:
	mov	56(%rbx), %rcx
	mov	%rcx, %gs:56+CPU_RTIMES(%rax)

	pushf
	andq	$~(1 << 18), (%rsp) 	/* reenable SMAP */
	popf

	jmp	cacheline_read_cleanup_stack

abort_rip_cacheline_read:
	pushf
	andq	$~(1 << 18), (%rsp) 	/* reenable SMAP */
	popf
abort_rip_cacheline_read_no_smap_reenable:
	movl	$0xdeadc0de, %ecx			/* Write a sentinel so higher-level code knows this was aborted */
	shlq	$32, %rcx
	movl	$0xbeefcafe, %ebx
	orq	%rbx, %rcx
	movq	%rcx, %gs:CPU_RTIMES(%rax)
	movq	%rcx, %gs:8+CPU_RTIMES(%rax)

cacheline_read_cleanup_stack:
	popq	%rcx
	popq	%rbx
	jmp	L_dispatch_kgsb
#endif /* if DEVELOPMENT || DEBUG */

/*
 * Common dispatch point.
 * Determine what mode has been interrupted and save state accordingly.
 * Here with:
 *	rsp	from user-space:   interrupt state in PCB, or
 *		from kernel-space: interrupt state in kernel or interrupt stack
 *	GSBASE	from user-space:   pthread area, or
 *		from kernel-space: cpu_data
 */

L_dispatch:
	pushq	%rax
	testb	$3, 8+ISF64_CS(%rsp)
	jz	1f
L_dispatch_from_user_no_push_rax:
	swapgs
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	mov	16(%rax), %rax	/* Offset of per-CPU shadow */

#if DEVELOPMENT || DEBUG
	/* Stash the cacheline for #UD, #PF, and #GP */
	cmpl	$(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
	je	do_cacheline_stash
	cmpl	$(T_PAGE_FAULT), 8+ISF64_TRAPNO(%rsp)
	je	do_cacheline_stash
	cmpl	$(T_GENERAL_PROTECTION), 8+ISF64_TRAPNO(%rsp)
	je	do_cacheline_stash
#endif

L_dispatch_kgsb:
	mov	%gs:CPU_SHADOWTASK_CR3(%rax), %rax
	mov	%rax, %cr3
#if	DEBUG
	mov	%rax, %gs:CPU_ENTRY_CR3
#endif
L_dispatch_from_kernel_no_push_rax:
1:
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	/* The text/data relationship here must be preserved in the doublemap, and the contents must be remapped */
	/* Indirect branch to non-doublemapped trampolines */
	jmp *(%rax)
/* User return: register restoration and address space switch sequence */
Entry(ks_64bit_return)

	mov	R64_R14(%r15), %r14
	mov	R64_R13(%r15), %r13
	mov	R64_R12(%r15), %r12
	mov	R64_R11(%r15), %r11
	mov	R64_R10(%r15), %r10
	mov	R64_R9(%r15),  %r9
	mov	R64_R8(%r15),  %r8
	mov	R64_RSI(%r15), %rsi
	mov	R64_RDI(%r15), %rdi
	mov	R64_RBP(%r15), %rbp
	mov	R64_RDX(%r15), %rdx
	mov	R64_RCX(%r15), %rcx
	mov	R64_RBX(%r15), %rbx
	mov	R64_RAX(%r15), %rax
	/* Switch to per-CPU exception stack */
	mov	%gs:CPU_ESTACK, %rsp

	/* Synthesize interrupt stack frame from PCB savearea to exception stack */
	push	R64_SS(%r15)
	push	R64_RSP(%r15)
	push	R64_RFLAGS(%r15)
	push	R64_CS(%r15)
	push	R64_RIP(%r15)

	cmpw	$(KERNEL64_CS), 8(%rsp)
	jne	1f			/* Returning to user (%r15 will be restored after the segment checks) */
	mov	R64_R15(%r15), %r15
	jmp	L_64b_kernel_return	/* Returning to kernel */

1:
	push	%rax				/* [A] */
	movl	%gs:CPU_NEED_SEGCHK, %eax
	push	%rax				/* [B] */

	/* Returning to user */
	cmpl	$0, %gs:CPU_CURTASK_HAS_LDT	/* If the current task has an LDT, check and restore segment regs */
	jne	L_64b_segops_island

	/*
	 * Restore %r15, since we're now done accessing saved state
	 * and (%r15) won't be accessible after the %cr3 load anyway.
	 * Note that %r15 is restored below for the segment-restore
	 * case, just after we no longer need to access register state
	 * relative to %r15.
	 */
	mov	R64_R15(%r15), %r15

	/*
	 * Note that this %cr3 sequence is duplicated here to save
	 * [at least] a load and comparison that would be required if
	 * this block were shared.
	 */
	/* Discover user cr3/ASID */
	mov	%gs:CPU_UCR3, %rax
#if	DEBUG
	mov	%rax, %gs:CPU_EXIT_CR3
#endif
	mov	%rax, %cr3
	/* Continue execution on the shared/doublemapped trampoline */
	swapgs

L_chk_sysret:
	pop	%rax	/* Matched to [B], above (segchk required) */

	/*
	 * At this point, the stack contains:
	 *
	 * +--------------+
	 * |  Return SS   | +40
	 * |  Return RSP  | +32
	 * |  Return RFL  | +24
	 * |  Return CS   | +16
	 * |  Return RIP  | +8
	 * |  Saved RAX   |  <-- rsp
	 * +--------------+
	 */

	cmpw	$(SYSCALL_CS), 16(%rsp) /* test for exit via SYSRET */
	je      L_sysret

	testl	$(MTHR_SEGCHK), %eax
	jnz	L_verw_island_2

	pop	%rax		/* Matched to [A], above */

L_64b_kernel_return:
.globl EXT(ret64_iret)
EXT(ret64_iret):
        iretq			/* return from interrupt */


L_sysret:
	testl	$(MTHR_SEGCHK), %eax
	jnz	L_verw_island_3

	pop	%rax		/* Matched to [A], above */
	/*
	 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
	 * 	rcx	user rip
	 *	r11	user rflags
	 *	rsp	user stack pointer
	 */
	pop	%rcx
	add	$8, %rsp
	pop	%r11
	pop	%rsp
	sysretq			/* return from system call */


L_verw_island_2:

	pop	%rax		/* Matched to [A], above */
	verw	32(%rsp)	/* verw operates on the %ss value already on the stack */
	jmp	EXT(ret64_iret)


L_verw_island_3:

	pop	%rax		/* Matched to [A], above */

	/*
	 * Here to restore rcx/r11/rsp and perform the sysret back to user-space.
	 * 	rcx	user rip
	 *	r11	user rflags
	 *	rsp	user stack pointer
	 */
	pop	%rcx
	add	$8, %rsp
	pop	%r11
	verw	8(%rsp)		/* verw operates on the %ss value already on the stack */
	pop	%rsp
	sysretq			/* return from system call */


L_64b_segops_island:

	/* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
	/* Exempt "known good" statically configured selectors, e.g. USER64_CS and 0 */
	cmpw	$(USER64_CS), R64_CS(%r15)
	jz 	11f
	larw	R64_CS(%r15), %ax
	jnz	L_64_reset_cs
	/* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
	testw	$0x800, %ax
	jz	L_64_reset_cs		/* Update stored %cs with known-good selector if ZF == 1 */
	jmp	11f
L_64_reset_cs:
	movl	$(USER64_CS), R64_CS(%r15)
11:
	cmpw	$0, R64_DS(%r15)
	jz 	22f
	larw	R64_DS(%r15), %ax
	jz	22f
	movl	$0, R64_DS(%r15)
22:
	cmpw	$0, R64_ES(%r15)
	jz 	33f
	larw	R64_ES(%r15), %ax
	jz	33f
	movl	$0, R64_ES(%r15)
33:
	cmpw	$0, R64_FS(%r15)
	jz 	44f
	larw	R64_FS(%r15), %ax
	jz	44f
	movl	$0, R64_FS(%r15)
44:
	cmpw	$0, R64_GS(%r15)
	jz	55f
	larw	R64_GS(%r15), %ax
	jz	55f
	movl	$0, R64_GS(%r15)
55:
	/*
	 * Pack the segment registers in %rax since (%r15) will not
	 * be accessible after the %cr3 switch.
	 * Only restore %gs if cthread_self is zero, (indicate
	 * this to the code below with a value of 0xffff)
	 */
	mov	%gs:CPU_ACTIVE_THREAD, %rax	/* Get the active thread */
	cmpq	$0, TH_CTH_SELF(%rax)
	je	L_restore_gs
	movw	$0xFFFF, %ax
	jmp	1f
L_restore_gs:
	movw	R64_GS(%r15), %ax
1:
	shlq	$16, %rax
	movw	R64_FS(%r15), %ax
	shlq	$16, %rax
	movw	R64_ES(%r15), %ax
	shlq	$16, %rax
	movw	R64_DS(%r15), %ax

	/*
	 * Restore %r15, since we're done accessing saved state
	 * and (%r15) won't be accessible after the %cr3 switch.
	 */
	mov	R64_R15(%r15), %r15

	/* Discover user cr3/ASID */
	push	%rax
	mov	%gs:CPU_UCR3, %rax
#if	DEBUG
	mov	%rax, %gs:CPU_EXIT_CR3
#endif
	mov	%rax, %cr3
	/* Continue execution on the shared/doublemapped trampoline */
	pop	%rax
	swapgs

	/*
	 * Returning to user; restore segment registers that might be used
	 * by compatibility-mode code in a 64-bit user process.
	 *
	 * Note that if we take a fault here, it's OK that we haven't yet
	 * popped %rax from the stack, because %rsp will be reset to
	 * the value pushed onto the exception stack (above).
	 */
	movw	%ax, %ds
	shrq	$16, %rax

	movw	%ax, %es
	shrq	$16, %rax

	movw	%ax, %fs
	shrq	$16, %rax

	/*
	 * 0xFFFF is the sentinel set above that indicates we should
	 * not restore %gs (because GS.base was already set elsewhere
	 * (e.g.: in act_machine_set_pcb or machine_thread_set_tsd_base))
	 */
	cmpw	$0xFFFF, %ax
	je	L_chk_sysret
	movw	%ax, %gs		/* Restore %gs to user-set value */
	jmp	L_chk_sysret


L_u64bit_entry_check:
	/*
	 * Check we're not a confused 64-bit user.
	 */
	pushq	%rax
	swapgs
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	mov	16(%rax), %rax

	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP(%rax)
	jne	L_64bit_entry_reject
	jmp	L_dispatch_kgsb

L_64bit_entry_reject:
	/*
	 * Here for a 64-bit user attempting an invalid kernel entry.
	 */
	movq	$(HNDL_ALLTRAPS), 8+ISF64_TRAPFN(%rsp)
	movq	$(T_INVALID_OPCODE), 8+ISF64_TRAPNO(%rsp)
	jmp 	L_dispatch_kgsb

Entry(ks_32bit_return)

	/* Validate CS/DS/ES/FS/GS segment selectors with the Load Access Rights instruction prior to restoration */
	/* Exempt "known good" statically configured selectors, e.g. USER_CS, USER_DS and 0 */
	cmpw	$(USER_CS), R32_CS(%r15)
	jz 	11f
	larw	R32_CS(%r15), %ax
	jnz	L_32_reset_cs
	/* Ensure that the segment referenced by CS in the saved state is a code segment (bit 11 == 1) */
	testw	$0x800, %ax
	jz	L_32_reset_cs		/* Update stored %cs with known-good selector if ZF == 1 */
	jmp	11f
L_32_reset_cs:
	movl	$(USER_CS), R32_CS(%r15)
11:
	cmpw	$(USER_DS), R32_DS(%r15)
	jz	22f
	cmpw	$0, R32_DS(%r15)
	jz 	22f
	larw	R32_DS(%r15), %ax
	jz	22f
	movl	$(USER_DS), R32_DS(%r15)
22:
	cmpw	$(USER_DS), R32_ES(%r15)
	jz	33f
	cmpw	$0, R32_ES(%r15)
	jz 	33f
	larw	R32_ES(%r15), %ax
	jz	33f
	movl	$(USER_DS), R32_ES(%r15)
33:
	cmpw	$(USER_DS), R32_FS(%r15)
	jz	44f
	cmpw	$0, R32_FS(%r15)
	jz 	44f
	larw	R32_FS(%r15), %ax
	jz	44f
	movl	$(USER_DS), R32_FS(%r15)
44:
	cmpw	$(USER_CTHREAD), R32_GS(%r15)
	jz	55f
	cmpw	$0, R32_GS(%r15)
	jz 	55f
	larw	R32_GS(%r15), %ax
	jz	55f
	movl	$(USER_CTHREAD), R32_GS(%r15)
55:

	/*
	 * Restore general 32-bit registers
	 */
	movl	R32_EAX(%r15), %eax
	movl	R32_EBX(%r15), %ebx
	movl	R32_ECX(%r15), %ecx
	movl	R32_EDX(%r15), %edx
	movl	R32_EBP(%r15), %ebp
	movl	R32_ESI(%r15), %esi
	movl	R32_EDI(%r15), %edi
	movl	R32_DS(%r15), %r8d
	movl	R32_ES(%r15), %r9d
	movl	R32_FS(%r15), %r10d
	movl	R32_GS(%r15), %r11d

	/* Switch to the per-cpu (doublemapped) exception stack */
	mov	%gs:CPU_ESTACK, %rsp

	/* Now transfer the ISF to the exception stack in preparation for iret, below */
	movl	R32_SS(%r15), %r12d
	push	%r12
	movl	R32_UESP(%r15), %r12d
	push	%r12
	movl	R32_EFLAGS(%r15), %r12d
	push	%r12
	movl	R32_CS(%r15), %r12d
	push	%r12
	movl	R32_EIP(%r15), %r12d
	push	%r12

	movl	%gs:CPU_NEED_SEGCHK, %r14d	/* %r14 will be zeroed just before we return */

	/*
	 * Finally, switch to the user pagetables.  After this, all %gs-relative
	 * accesses MUST be to cpu shadow data ONLY.  Note that after we restore %gs
	 * (after the swapgs), no %gs-relative accesses should be performed.
	 */
	/* Discover user cr3/ASID */
	mov	%gs:CPU_UCR3, %r13
#if	DEBUG
	mov	%r13, %gs:CPU_EXIT_CR3
#endif
	mov	%r13, %cr3

	swapgs

	/*
	 * Restore segment registers. A #GP taken here will push state onto IST1,
	 * not the exception stack.  Note that the placement of the labels here
	 * corresponds to the fault address-detection logic (so do not change them
	 * without also changing that code).
	 */
L_32bit_seg_restore_begin:
	mov	%r8, %ds
	mov	%r9, %es
	mov	%r10, %fs
	mov	%r11, %gs
L_32bit_seg_restore_done:

	/* Zero 64-bit-exclusive GPRs to prevent data leaks */
	xor	%r8, %r8
	xor	%r9, %r9
	xor	%r10, %r10
	xor	%r11, %r11
	xor	%r12, %r12
	xor	%r13, %r13
	xor	%r15, %r15

	/*
	 * At this point, the stack contains:
	 *
	 * +--------------+
	 * |  Return SS   | +32
	 * |  Return RSP  | +24
	 * |  Return RFL  | +16
	 * |  Return CS   | +8
	 * |  Return RIP  | <-- rsp
	 * +--------------+
	 */

	cmpw	$(SYSENTER_CS), 8(%rsp)		/* test for sysexit */
	je      L_rtu_via_sysexit

	testl	$(MTHR_SEGCHK), %r14d
	jnz	L_verw_island

L_after_verw:
	xor	%r14, %r14

.globl EXT(ret32_iret)
EXT(ret32_iret):
	iretq				/* return from interrupt */

L_verw_island:
	verw	32(%rsp)
	jmp	L_after_verw

L_verw_island_1:
	verw	16(%rsp)
	jmp	L_after_verw_1

L_rtu_via_sysexit:
	pop	%rdx			/* user return eip */
	pop	%rcx			/* pop and toss cs */
	andl	$(~EFL_IF), (%rsp)	/* clear interrupts enable, sti below */

	/*
	 * %ss is now at 16(%rsp)
	 */
	testl	$(MTHR_SEGCHK), %r14d
	je	L_verw_island_1
L_after_verw_1:
	xor	%r14, %r14

	popf				/* flags - carry denotes failure */
	pop	%rcx			/* user return esp */


	sti				/* interrupts enabled after sysexit */
	sysexitl			/* 32-bit sysexit */

/* End of double-mapped TEXT */
.text

Entry(ks_dispatch)
	popq	%rax
	cmpw	$(KERNEL64_CS), ISF64_CS(%rsp)
	je	EXT(ks_dispatch_kernel)

	mov 	%rax, %gs:CPU_UBER_TMP
	mov 	%gs:CPU_UBER_ISF, %rax
	add 	$(ISF64_SIZE), %rax

	xchg	%rsp, %rax
/* Memory to memory moves (aint x86 wonderful):
 * Transfer the exception frame from the per-CPU exception stack to the
 * 'PCB' stack programmed at cswitch.
 */
	push	ISF64_SS(%rax)
	push	ISF64_RSP(%rax)
	push	ISF64_RFLAGS(%rax)
	push	ISF64_CS(%rax)
	push	ISF64_RIP(%rax)
	push	ISF64_ERR(%rax)
	push	ISF64_TRAPFN(%rax)
	push 	ISF64_TRAPNO(%rax)
	mov	%gs:CPU_UBER_TMP, %rax
	jmp	EXT(ks_dispatch_user)

Entry(ks_dispatch_user_with_pop_rax)
	pop	%rax
	jmp	EXT(ks_dispatch_user)

Entry(ks_dispatch_user)
	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP
	je	L_dispatch_U32		/* 32-bit user task */

L_dispatch_U64:
	subq	$(ISS64_OFFSET), %rsp
	mov	%r15, R64_R15(%rsp)
	mov	%rsp, %r15
	mov	%gs:CPU_KERNEL_STACK, %rsp
	jmp	L_dispatch_64bit

Entry(ks_dispatch_kernel_with_pop_rax)
	pop	%rax
	jmp	EXT(ks_dispatch_kernel)

Entry(ks_dispatch_kernel)
	subq	$(ISS64_OFFSET), %rsp
	mov	%r15, R64_R15(%rsp)
	mov	%rsp, %r15

/*
 * Here for 64-bit user task or kernel
 */
L_dispatch_64bit:
	movl	$(SS_64), SS_FLAVOR(%r15)

	/*
	 * Save segment regs if a 64-bit task has
	 * installed customized segments in the LDT
	 */
	cmpl	$0, %gs:CPU_CURTASK_HAS_LDT
	je	L_skip_save_extra_segregs

	mov	%ds, R64_DS(%r15)
	mov	%es, R64_ES(%r15)

L_skip_save_extra_segregs:
	mov	%fs, R64_FS(%r15)
	mov	%gs, R64_GS(%r15)


	/* Save general-purpose registers */
	mov	%rax, R64_RAX(%r15)
	mov	%rbx, R64_RBX(%r15)
	mov	%rcx, R64_RCX(%r15)
	mov	%rdx, R64_RDX(%r15)
	mov	%rbp, R64_RBP(%r15)
	mov	%rdi, R64_RDI(%r15)
	mov	%rsi, R64_RSI(%r15)
	mov	%r8,  R64_R8(%r15)
	mov	%r9,  R64_R9(%r15)
	mov	%r10, R64_R10(%r15)
	mov	%r11, R64_R11(%r15)
	mov	%r12, R64_R12(%r15)
	mov	%r13, R64_R13(%r15)
	mov	%r14, R64_R14(%r15)

	/* Zero unused GPRs. BX/DX/SI are clobbered elsewhere across the exception handler, and are skipped. */
	xor	%ecx, %ecx
	xor	%edi, %edi
	xor	%r8, %r8
	xor	%r9, %r9
	xor	%r10, %r10
	xor	%r11, %r11
	xor	%r12, %r12
	xor	%r13, %r13
	xor	%r14, %r14

	/* cr2 is significant only for page-faults */
	xor	%rax, %rax
	cmpl	$T_PAGE_FAULT, R64_TRAPNO(%r15)
	jne	1f
	mov	%cr2, %rax
1:
	mov	%rax, R64_CR2(%r15)

L_dispatch_U64_after_fault:
	mov	R64_TRAPNO(%r15), %ebx	/* %ebx := trapno for later */
	mov	R64_TRAPFN(%r15), %rdx	/* %rdx := trapfn for later */
	mov	R64_CS(%r15), %esi	/* %esi := cs for later */

	jmp	L_common_dispatch

L_dispatch_U32: /* 32-bit user task */
	subq	$(ISS64_OFFSET), %rsp
	mov	%rsp, %r15
	mov	%gs:CPU_KERNEL_STACK, %rsp
	movl	$(SS_32), SS_FLAVOR(%r15)

	/*
	 * Save segment regs
	 */
	mov	%ds, R32_DS(%r15)
	mov	%es, R32_ES(%r15)
	mov	%fs, R32_FS(%r15)
	mov	%gs, R32_GS(%r15)

	/*
	 * Save general 32-bit registers
	 */
	mov	%eax, R32_EAX(%r15)
	mov	%ebx, R32_EBX(%r15)
	mov	%ecx, R32_ECX(%r15)
	mov	%edx, R32_EDX(%r15)
	mov	%ebp, R32_EBP(%r15)
	mov	%esi, R32_ESI(%r15)
	mov	%edi, R32_EDI(%r15)

	/* Unconditionally save cr2; only meaningful on page faults */
	xor	%eax, %eax
	cmpl	$T_PAGE_FAULT, R64_TRAPNO(%r15)
	jne	1f
	mov	%cr2, %rax
1:
	mov	%eax, R32_CR2(%r15)
	/* Zero unused GPRs. BX/DX/SI/R15 are clobbered elsewhere across the exception handler, and are skipped. */
	xor	%ecx, %ecx
	xor	%edi, %edi
	xor	%r8, %r8
	xor	%r9, %r9
	xor	%r10, %r10
	xor	%r11, %r11
	xor	%r12, %r12
	xor	%r13, %r13
	xor	%r14, %r14

	/*
	 * Copy registers already saved in the machine state 
	 * (in the interrupt stack frame) into the compat save area.
	 */
	mov	R64_RIP(%r15), %eax
	mov	%eax, R32_EIP(%r15)
	mov	R64_RFLAGS(%r15), %eax
	mov	%eax, R32_EFLAGS(%r15)
	mov	R64_RSP(%r15), %eax
	mov	%eax, R32_UESP(%r15)
	mov	R64_SS(%r15), %eax
	mov	%eax, R32_SS(%r15)
L_dispatch_U32_after_fault:
	mov	R64_CS(%r15), %esi		/* %esi := %cs for later */
	mov	%esi, R32_CS(%r15)
	mov	R64_TRAPNO(%r15), %ebx		/* %ebx := trapno for later */
	mov	%ebx, R32_TRAPNO(%r15)
	mov	R64_ERR(%r15), %eax
	mov	%eax, R32_ERR(%r15)
	mov	R64_TRAPFN(%r15), %rdx		/* %rdx := trapfn for later */

L_common_dispatch:
	cld		/* Ensure the direction flag is clear in the kernel */
	cmpl    $0, EXT(pmap_smap_enabled)(%rip)
	je	1f
	clac		/* Clear EFLAGS.AC if SMAP is present/enabled */
1:
	/*
	 * We mark the kernel's cr3 as "active" for TLB coherency evaluation
	 * For threads with a mapped pagezero (some WINE games) on non-SMAP platforms,
	 * we switch to the kernel's address space on entry. Also, 
	 * if the global no_shared_cr3 is TRUE we do switch to the kernel's cr3
	 * so that illicit accesses to userspace can be trapped.
	 */
	mov	%gs:CPU_KERNEL_CR3, %rcx
	mov	%rcx, %gs:CPU_ACTIVE_CR3
	test	$3, %esi			/* CS: user/kernel? */
	jz	2f				/* skip CR3 reload if from kernel */
	xor	%ebp, %ebp
	cmpl	$0, %gs:CPU_PAGEZERO_MAPPED
	jnz	11f
	cmpl	$0, EXT(no_shared_cr3)(%rip)
	je	2f
11:
	xor	%eax, %eax
	movw	%gs:CPU_KERNEL_PCID, %ax
	or	%rax, %rcx
	mov	%rcx, %cr3			/* load kernel cr3 */
	jmp	4f
2:
	/* Deferred processing of pending kernel address space TLB invalidations */
	mov     %gs:CPU_ACTIVE_CR3+4, %rcx
	shr     $32, %rcx
	testl   %ecx, %ecx
	jz      4f
	movl    $0, %gs:CPU_TLB_INVALID
	cmpb	$0, EXT(invpcid_enabled)(%rip)
	jz	L_cr4_island
	movl	$2, %ecx
	invpcid %gs:CPU_IP_DESC, %rcx
4:
L_set_act:
	mov	%gs:CPU_ACTIVE_THREAD, %rcx	/* Get the active thread */
	testq	%rcx, %rcx
	je	L_intcnt
	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling trap */
	cmpq	$0, TH_PCB_IDS(%rcx)	/* Is there a debug register state? */
	jnz	L_dr7_island
L_intcnt:
	incl	%gs:hwIntCnt(,%ebx,4)		// Bump the trap/intr count
	/* Dispatch the designated handler */
	cmp	EXT(dblmap_base)(%rip), %rsp
	jb	66f
	cmp	EXT(dblmap_max)(%rip), %rsp
	jge	66f
	subq	EXT(dblmap_dist)(%rip), %rsp
	subq	EXT(dblmap_dist)(%rip), %r15
66:
	leaq	EXT(idt64_hndl_table1)(%rip), %rax
	jmp	*(%rax, %rdx, 8)

L_cr4_island:
	mov	%cr4, %rcx      /* RMWW CR4, for lack of an alternative*/
	and	$(~CR4_PGE), %rcx
	mov	%rcx, %cr4
	or	$(CR4_PGE), %rcx
	mov	%rcx, %cr4
	jmp	L_set_act
L_dr7_island:
	xor	%ecx, %ecx		/* If so, reset DR7 (the control) */
	mov	%rcx, %dr7
	jmp	L_intcnt
/*
 * Control is passed here to return to user.
 */ 
Entry(return_to_user)
	TIME_TRAP_UEXIT

Entry(ret_to_user)
	mov	%gs:CPU_ACTIVE_THREAD, %rdx
	cmpq	$0, TH_PCB_IDS(%rdx)	/* Is there a debug register context? */
	jnz	L_dr_restore_island
L_post_dr_restore:
	/*
	 * We now mark the task's address space as active for TLB coherency.
	 * Handle special cases such as pagezero-less tasks here.
	 */
	mov	%gs:CPU_TASK_CR3, %rcx
	mov	%rcx, %gs:CPU_ACTIVE_CR3
	cmpl	$0, %gs:CPU_PAGEZERO_MAPPED
	jnz	L_cr3_switch_island
	movl	EXT(no_shared_cr3)(%rip), %eax
	test	%eax, %eax		/* -no_shared_cr3 */
	jnz	L_cr3_switch_island

L_cr3_switch_return:
	mov	%gs:CPU_DR7, %rax	/* Is there a debug control register?*/
	cmp	$0, %rax
	je	4f
	mov	%rax, %dr7		/* Set DR7 */
	movq	$0, %gs:CPU_DR7
4:
	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* 64-bit state? */
	jne	L_32bit_return

	/*
	 * Restore general 64-bit registers.
	 * Here on fault stack and PCB address in R15.
	 */
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	jmp	*8(%rax)


L_32bit_return:
#if DEBUG_IDT64
	cmpl	$(SS_32), SS_FLAVOR(%r15)	/* 32-bit state? */
	je	1f
	cli
	POSTCODE2(0x6432)
	CCALL1(panic_idt64, %r15)
1:
#endif /* DEBUG_IDT64 */

	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	jmp	*0x18(%rax)


L_dr_restore_island:
	movq    TH_PCB_IDS(%rdx),%rax   /* Obtain this thread's debug state */
	cmpl	$(TASK_MAP_32BIT), %gs:CPU_TASK_MAP /* Are we a 32-bit task? */
	jne	1f
	movl	DS_DR0(%rax), %ecx	/* If so, load the 32 bit DRs */
	movq	%rcx, %dr0
	movl	DS_DR1(%rax), %ecx
	movq	%rcx, %dr1
	movl	DS_DR2(%rax), %ecx
	movq	%rcx, %dr2
	movl	DS_DR3(%rax), %ecx
	movq	%rcx, %dr3
	movl	DS_DR7(%rax), %ecx
	movq 	%rcx, %gs:CPU_DR7
	jmp 	2f
1:
	mov	DS64_DR0(%rax), %rcx	/* Load the full width DRs*/
	mov	%rcx, %dr0
	mov	DS64_DR1(%rax), %rcx
	mov	%rcx, %dr1
	mov	DS64_DR2(%rax), %rcx
	mov	%rcx, %dr2
	mov	DS64_DR3(%rax), %rcx
	mov	%rcx, %dr3
	mov	DS64_DR7(%rax), %rcx
	mov 	%rcx, %gs:CPU_DR7
2:
	jmp	L_post_dr_restore
L_cr3_switch_island:
	xor	%eax, %eax
	movw	%gs:CPU_ACTIVE_PCID, %ax
	or	%rax, %rcx
	mov	%rcx, %cr3
	jmp	L_cr3_switch_return

ret_to_kernel:
#if DEBUG_IDT64
	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* 64-bit state? */
	je	1f
	cli
	POSTCODE2(0x6464)
	CCALL1(panic_idt64, %r15)
	hlt
1:
	cmpw	$(KERNEL64_CS), R64_CS(%r15)
	je	2f
	CCALL1(panic_idt64, %r15)
	hlt
2:
#endif
	/*
	 * Restore general 64-bit registers.
	 * Here on fault stack and PCB address in R15.
	 */
	leaq	EXT(idt64_hndl_table0)(%rip), %rax
	jmp *8(%rax)

/* All 'exceptions' enter hndl_alltraps, with:
 *	r15	x86_saved_state_t address
 *	rsp	kernel stack if user-space, otherwise interrupt or kernel stack
 *	esi	cs at trap
 * 
 * The rest of the state is set up as:	
 *	both rsp and r15 are 16-byte aligned
 *	interrupts disabled
 *	direction flag cleared
 */
Entry(hndl_alltraps)
	mov	%esi, %eax
	testb	$3, %al
	jz	trap_from_kernel

	TIME_TRAP_UENTRY

	/* Check for active vtimers in the current task */
	mov	%gs:CPU_ACTIVE_THREAD, %rcx
	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling trap/exception */
	mov	TH_TASK(%rcx), %rbx
	TASK_VTIMER_CHECK(%rbx, %rcx)

	CCALL1(user_trap, %r15)			/* call user trap routine */
	/* user_trap() unmasks interrupts */
	cli					/* hold off intrs - critical section */
	xorl	%ecx, %ecx			/* don't check if we're in the PFZ */


Entry(return_from_trap)
	movq	%gs:CPU_ACTIVE_THREAD,%r15	/* Get current thread */
	movl	$-1, TH_IOTIER_OVERRIDE(%r15)	/* Reset IO tier override to -1 before returning to userspace */
	movq	TH_PCB_ISS(%r15), %r15		/* PCB stack */
	movl	%gs:CPU_PENDING_AST,%eax
	testl	%eax,%eax
	je	EXT(return_to_user)		/* branch if no AST */

L_return_from_trap_with_ast:
	testl	%ecx, %ecx		/* see if we need to check for an EIP in the PFZ */
	je	2f			/* no, go handle the AST */
	cmpl	$(SS_64), SS_FLAVOR(%r15)	/* are we a 64-bit task? */
	je	1f
					/* no... 32-bit user mode */
	movl	R32_EIP(%r15), %edi
	xorq	%rbp, %rbp		/* clear framepointer */
	CCALL(commpage_is_in_pfz32)
	testl	%eax, %eax
	je	2f			/* not in the PFZ... go service AST */
	movl	%eax, R32_EBX(%r15)	/* let the PFZ know we've pended an AST */
	jmp	EXT(return_to_user)
1:
	movq	R64_RIP(%r15), %rdi
	xorq	%rbp, %rbp		/* clear framepointer */
	CCALL(commpage_is_in_pfz64)
	testl	%eax, %eax
	je	2f			/* not in the PFZ... go service AST */
	movl	%eax, R64_RBX(%r15)	/* let the PFZ know we've pended an AST */
	jmp	EXT(return_to_user)
2:

	xorq	%rbp, %rbp		/* clear framepointer */
	CCALL(ast_taken_user)		/* handle all ASTs (enables interrupts, may return via continuation) */

	cli
	mov	%rsp, %r15		/* AST changes stack, saved state */
	xorl	%ecx, %ecx		/* don't check if we're in the PFZ */
	jmp	EXT(return_from_trap)	/* and check again (rare) */

/*
 * Trap from kernel mode.  No need to switch stacks.
 * Interrupts must be off here - we will set them to state at time of trap
 * as soon as it's safe for us to do so and not recurse doing preemption
 * 
 */
trap_from_kernel:

UNWIND_PROLOGUE	
	
	movq	%r15, %rdi		/* saved state addr */

UNWIND_DIRECTIVES	

	pushq   R64_RIP(%r15)           /* Simulate a CALL from fault point */
	pushq   %rbp                    /* Extend framepointer chain */
	movq    %rsp, %rbp
	CCALLWITHSP(kernel_trap)	/* to kernel trap routine */
	popq    %rbp
	addq    $8, %rsp
	mov	%rsp, %r15		/* DTrace slides stack/saved-state */
	cli

	movl	%gs:CPU_PENDING_AST,%eax	/* get pending asts */
	testl	$(AST_URGENT),%eax		/* any urgent preemption? */
	je	ret_to_kernel			/* no, nothing to do */
	cmpl	$(T_PREEMPT),R64_TRAPNO(%r15)
	je	ret_to_kernel			/* T_PREEMPT handled in kernel_trap() */
	testl	$(EFL_IF),R64_RFLAGS(%r15)	/* interrupts disabled? */
	je	ret_to_kernel
	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
	jne	ret_to_kernel
	movq	%gs:CPU_KERNEL_STACK,%rax
	movq	%rsp,%rcx
	xorq	%rax,%rcx
	andq	EXT(kernel_stack_mask)(%rip),%rcx
	testq	%rcx,%rcx		/* are we on the kernel stack? */
	jne	ret_to_kernel		/* no, skip it */

	CCALL(ast_taken_kernel)         /* take the AST */

	mov	%rsp, %r15		/* AST changes stack, saved state */
	jmp	ret_to_kernel

UNWIND_EPILOGUE
	
/*
 * All interrupts on all tasks enter here with:
 *	r15	 x86_saved_state_t
 *	rsp	 kernel or interrupt stack
 *	esi	 cs at trap
 *
 *	both rsp and r15 are 16-byte aligned
 *	interrupts disabled
 *	direction flag cleared
 */
Entry(hndl_allintrs)

UNWIND_PROLOGUE	
	
	/*
	 * test whether already on interrupt stack
	 */
	movq	%gs:CPU_INT_STACK_TOP,%rcx
	cmpq	%rsp,%rcx
	jb	1f
	leaq	-INTSTACK_SIZE(%rcx),%rdx
	cmpq	%rsp,%rdx
	jb	int_from_intstack
1:
	xchgq	%rcx,%rsp		/* switch to interrupt stack */

	mov	%cr0,%rax		/* get cr0 */
	orl	$(CR0_TS),%eax		/* or in TS bit */
	mov	%rax,%cr0		/* set cr0 */

	pushq	%rcx			/* save pointer to old stack */
	pushq	%gs:CPU_INT_STATE	/* save previous intr state */
	movq	%r15,%gs:CPU_INT_STATE	/* set intr state */

UNWIND_DIRECTIVES	
	
	CCALL1(recount_enter_intel_interrupt, %r15) /* update time and PMCs */

	/* Check for active vtimers in the current task */
	mov	%gs:CPU_ACTIVE_THREAD, %rcx
	mov	TH_TASK(%rcx), %rbx
	TASK_VTIMER_CHECK(%rbx, %rcx)

	incl	%gs:CPU_PREEMPTION_LEVEL
	incl	%gs:CPU_INTERRUPT_LEVEL

	CCALL1(interrupt, %r15)		/* call generic interrupt routine */
	
UNWIND_EPILOGUE

.globl	EXT(return_to_iret)
LEXT(return_to_iret)			/* (label for kdb_kintr and hardclock) */

	decl	%gs:CPU_INTERRUPT_LEVEL
	decl	%gs:CPU_PREEMPTION_LEVEL

	CCALL(recount_leave_intel_interrupt) /* update time and PMCs */

	popq	%gs:CPU_INT_STATE 	/* reset/clear intr state pointer */
	popq	%rsp			/* switch back to old stack */

	movq	%gs:CPU_ACTIVE_THREAD,%rax
	movq	TH_PCB_FPS(%rax),%rax	/* get pcb's ifps */
	cmpq	$0,%rax			/* Is there a context */
	je	1f			/* Branch if not */
	movl	FP_VALID(%rax),%eax	/* Load fp_valid */
	cmpl	$0,%eax			/* Check if valid */
	jne	1f			/* Branch if valid */
	clts				/* Clear TS */
	jmp	2f
1:
	mov	%cr0,%rax		/* get cr0 */
	orl	$(CR0_TS),%eax		/* or in TS bit */
	mov	%rax,%cr0		/* set cr0 */
2:
	/* Load interrupted code segment into %eax */
	movl	R64_CS(%r15), %eax	/* assume 64-bit state */
	cmpl	$(SS_32), SS_FLAVOR(%r15) /* 32-bit? */
#if DEBUG_IDT64
	jne	5f
	movl	R32_CS(%r15),%eax	/* 32-bit user mode */
	jmp	3f
5:
	cmpl    $(SS_64),SS_FLAVOR(%r15)
	je	3f
	POSTCODE2(0x6431)
	CCALL1(panic_idt64, %r15)
	hlt
#else
	je	4f
#endif
3:
	testb	$3,%al			/* user mode, */
	jnz	ast_from_interrupt_user	/* go handle potential ASTs */
	/*
	 * we only want to handle preemption requests if
	 * the interrupt fell in the kernel context
	 * and preemption isn't disabled
	 */
	movl	%gs:CPU_PENDING_AST,%eax	
	testl	$(AST_URGENT),%eax		/* any urgent requests? */
	je	ret_to_kernel			/* no, nothing to do */

	cmpl	$0,%gs:CPU_PREEMPTION_LEVEL	/* preemption disabled? */
	jne	ret_to_kernel			/* yes, skip it */

	/*
	 * Take an AST from kernel space.  We don't need (and don't want)
	 * to do as much as the case where the interrupt came from user
	 * space.
	 */
	CCALL(ast_taken_kernel)

	mov	%rsp, %r15		/* AST changes stack, saved state */
	jmp	ret_to_kernel
4:
	movl	R32_CS(%r15),%eax	/* 32-bit user mode */
	jmp	3b


/*
 * nested int - simple path, can't preempt etc on way out
 */
int_from_intstack:
	incl	%gs:CPU_PREEMPTION_LEVEL
	incl	%gs:CPU_INTERRUPT_LEVEL
	incl	%gs:CPU_NESTED_ISTACK

	push	%gs:CPU_INT_STATE
	mov	%r15, %gs:CPU_INT_STATE

	CCALL1(interrupt, %r15)

	pop	%gs:CPU_INT_STATE

	decl	%gs:CPU_INTERRUPT_LEVEL
	decl	%gs:CPU_PREEMPTION_LEVEL
	decl	%gs:CPU_NESTED_ISTACK

	jmp	ret_to_kernel

/*
 *	Take an AST from an interrupted user
 */
ast_from_interrupt_user:
	movl	%gs:CPU_PENDING_AST,%eax
	testl	%eax,%eax		/* pending ASTs? */
	je	EXT(ret_to_user)	/* no, nothing to do */

	TIME_TRAP_UENTRY

	movl	$1, %ecx		/* check if we're in the PFZ */
	jmp	L_return_from_trap_with_ast	/* return */


/* Syscall dispatch routines! */

/*
 *
 * 32bit Tasks
 * System call entries via INTR_GATE or sysenter:
 *
 *	r15	 x86_saved_state32_t
 *	rsp	 kernel stack
 *
 *	both rsp and r15 are 16-byte aligned
 *	interrupts disabled
 *	direction flag cleared
 */

Entry(hndl_sysenter)
	/*
	 * We can be here either for a mach syscall or a unix syscall,
	 * as indicated by the sign of the code:
	 */
	movl	R32_EAX(%r15),%eax
	testl	%eax,%eax
	js	EXT(hndl_mach_scall)		/* < 0 => mach */
						/* > 0 => unix */
	
Entry(hndl_unix_scall)

        TIME_TRAP_UENTRY

	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */

	/* Check for active vtimers in the current task */
	TASK_VTIMER_CHECK(%rbx,%rcx)

	sti

	CCALL1(unix_syscall, %r15)
	/*
	 * always returns through thread_exception_return
	 */


Entry(hndl_mach_scall)
	TIME_TRAP_UENTRY

	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */

	/* Check for active vtimers in the current task */
	TASK_VTIMER_CHECK(%rbx,%rcx)

	sti

	CCALL1(mach_call_munger, %r15)
	/*
	 * always returns through thread_exception_return
	 */


Entry(hndl_mdep_scall)
	TIME_TRAP_UENTRY

	/* Check for active vtimers in the current task */
	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
	movq	TH_TASK(%rcx),%rbx		/* point to current task  */
	TASK_VTIMER_CHECK(%rbx,%rcx)

	sti

	CCALL1(machdep_syscall, %r15)
	/*
	 * always returns through thread_exception_return
	 */

/*
 * 64bit Tasks
 * System call entries via syscall only:
 *
 *	r15	 x86_saved_state64_t
 *	rsp	 kernel stack
 *
 *	both rsp and r15 are 16-byte aligned
 *	interrupts disabled
 *	direction flag cleared
 */

Entry(hndl_syscall)
	TIME_TRAP_UENTRY

	movq	%gs:CPU_ACTIVE_THREAD,%rcx	/* get current thread     */
	movl	$-1, TH_IOTIER_OVERRIDE(%rcx)	/* Reset IO tier override to -1 before handling syscall */
	movq	TH_TASK(%rcx),%rbx		/* point to current task  */

	/* Check for active vtimers in the current task */
	TASK_VTIMER_CHECK(%rbx,%rcx)

	/*
	 * We can be here either for a mach, unix machdep or diag syscall,
	 * as indicated by the syscall class:
	 */
	movl	R64_RAX(%r15), %eax		/* syscall number/class */
	movl	%eax, %edx
	andl	$(SYSCALL_CLASS_MASK), %edx	/* syscall class */
	cmpl	$(SYSCALL_CLASS_MACH<<SYSCALL_CLASS_SHIFT), %edx
	je	EXT(hndl_mach_scall64)
	cmpl	$(SYSCALL_CLASS_UNIX<<SYSCALL_CLASS_SHIFT), %edx
	je	EXT(hndl_unix_scall64)
	cmpl	$(SYSCALL_CLASS_MDEP<<SYSCALL_CLASS_SHIFT), %edx
	je	EXT(hndl_mdep_scall64)
	cmpl	$(SYSCALL_CLASS_DIAG<<SYSCALL_CLASS_SHIFT), %edx
	je	EXT(hndl_diag_scall64)

	/* Syscall class unknown */
	sti
	CCALL3(i386_exception, $(EXC_SYSCALL), %rax, $1)
	/* no return */


Entry(hndl_unix_scall64)
	incl	TH_SYSCALLS_UNIX(%rcx)		/* increment call count   */
	sti

	CCALL1(unix_syscall64, %r15)
	/*
	 * always returns through thread_exception_return
	 */


Entry(hndl_mach_scall64)
	incl	TH_SYSCALLS_MACH(%rcx)		/* increment call count   */
	sti

	CCALL1(mach_call_munger64, %r15)
	/*
	 * always returns through thread_exception_return
	 */



Entry(hndl_mdep_scall64)
	sti

	CCALL1(machdep_syscall64, %r15)
	/*
	 * always returns through thread_exception_return
	 */

Entry(hndl_diag_scall64)
	CCALL1(diagCall64, %r15)	// Call diagnostics
	test	%eax, %eax		// What kind of return is this?
	je	1f			// - branch if bad (zero)
	jmp	EXT(return_to_user)	// Normal return, do not check asts...
1:
	sti
	CCALL3(i386_exception, $EXC_SYSCALL, $0x6000, $1)
	/* no return */
/* TODO assert at all 'C' entry points that we're never operating on the fault stack's alias mapping */
Entry(hndl_machine_check)
	/* Adjust SP and savearea to their canonical, non-aliased addresses */
	CCALL1(panic_machine_check64, %r15)
	hlt

Entry(hndl_double_fault)
	CCALL1(panic_double_fault64, %r15)
	hlt