/*
* Copyright (c) 2000-2017 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* Mach Operating System
* Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie Mellon
* the rights to redistribute these changes.
*/
#include <mach/boolean.h>
#include <mach/thread_switch.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
#include <kern/counter.h>
#include <kern/ipc_kobject.h>
#include <kern/processor.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/spl.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/policy_internal.h>
#include <mach/policy.h>
#include <kern/syscall_subr.h>
#include <mach/mach_host_server.h>
#include <mach/mach_syscalls.h>
#include <sys/kdebug.h>
#include <kern/ast.h>
static void thread_depress_abstime(uint64_t interval);
static void thread_depress_ms(mach_msg_timeout_t interval);
/* Called from commpage to take a delayed preemption when exiting
* the "Preemption Free Zone" (PFZ).
*/
kern_return_t
pfz_exit(
__unused struct pfz_exit_args *args)
{
/* For now, nothing special to do. We'll pick up the ASTs on kernel exit. */
return KERN_SUCCESS;
}
/*
* swtch and swtch_pri both attempt to context switch (logic in
* thread_block no-ops the context switch if nothing would happen).
* A boolean is returned that indicates whether there is anything
* else runnable. That's no excuse to spin, though.
*/
static void
swtch_continue(void)
{
processor_t myprocessor;
boolean_t result;
disable_preemption();
myprocessor = current_processor();
result = SCHED(thread_should_yield)(myprocessor, current_thread());
enable_preemption();
ml_delay_on_yield();
thread_syscall_return(result);
/*NOTREACHED*/
}
boolean_t
swtch(
__unused struct swtch_args *args)
{
processor_t myprocessor;
disable_preemption();
myprocessor = current_processor();
if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
mp_enable_preemption();
return FALSE;
}
enable_preemption();
thread_yield_with_continuation((thread_continue_t)swtch_continue, NULL);
}
static void
swtch_pri_continue(void)
{
processor_t myprocessor;
boolean_t result;
thread_depress_abort(current_thread());
disable_preemption();
myprocessor = current_processor();
result = SCHED(thread_should_yield)(myprocessor, current_thread());
mp_enable_preemption();
ml_delay_on_yield();
thread_syscall_return(result);
/*NOTREACHED*/
}
boolean_t
swtch_pri(
__unused struct swtch_pri_args *args)
{
processor_t myprocessor;
disable_preemption();
myprocessor = current_processor();
if (!SCHED(thread_should_yield)(myprocessor, current_thread())) {
mp_enable_preemption();
return FALSE;
}
enable_preemption();
thread_depress_abstime(thread_depress_time);
thread_yield_with_continuation((thread_continue_t)swtch_pri_continue, NULL);
}
static void
thread_switch_continue(void *parameter, __unused int ret)
{
thread_t self = current_thread();
int option = (int)(intptr_t)parameter;
if (option == SWITCH_OPTION_DEPRESS || option == SWITCH_OPTION_OSLOCK_DEPRESS) {
thread_depress_abort(self);
}
ml_delay_on_yield();
thread_syscall_return(KERN_SUCCESS);
/*NOTREACHED*/
}
/*
* thread_switch:
*
* Context switch. User may supply thread hint.
*/
kern_return_t
thread_switch(
struct thread_switch_args *args)
{
thread_t thread = THREAD_NULL;
thread_t self = current_thread();
mach_port_name_t thread_name = args->thread_name;
int option = args->option;
mach_msg_timeout_t option_time = args->option_time;
uint32_t scale_factor = NSEC_PER_MSEC;
boolean_t depress_option = FALSE;
boolean_t wait_option = FALSE;
wait_interrupt_t interruptible = THREAD_ABORTSAFE;
port_intrans_options_t ptt_options = PORT_INTRANS_THREAD_NOT_CURRENT_THREAD;
/*
* Validate and process option.
*
* OSLock boosting only applies to other threads
* in your same task (even if you have a port for
* a thread in another task)
*/
switch (option) {
case SWITCH_OPTION_NONE:
break;
case SWITCH_OPTION_WAIT:
wait_option = TRUE;
break;
case SWITCH_OPTION_DEPRESS:
depress_option = TRUE;
break;
case SWITCH_OPTION_DISPATCH_CONTENTION:
scale_factor = NSEC_PER_USEC;
wait_option = TRUE;
interruptible |= THREAD_WAIT_NOREPORT;
break;
case SWITCH_OPTION_OSLOCK_DEPRESS:
depress_option = TRUE;
interruptible |= THREAD_WAIT_NOREPORT;
ptt_options |= PORT_INTRANS_THREAD_IN_CURRENT_TASK;
break;
case SWITCH_OPTION_OSLOCK_WAIT:
wait_option = TRUE;
interruptible |= THREAD_WAIT_NOREPORT;
ptt_options |= PORT_INTRANS_THREAD_IN_CURRENT_TASK;
break;
default:
return KERN_INVALID_ARGUMENT;
}
/*
* Translate the port name if supplied.
*/
if (thread_name != MACH_PORT_NULL) {
thread = port_name_to_thread(thread_name, ptt_options);
}
if (option == SWITCH_OPTION_OSLOCK_DEPRESS || option == SWITCH_OPTION_OSLOCK_WAIT) {
if (thread != THREAD_NULL) {
/*
* Attempt to kick the lock owner up to our same IO throttling tier.
* If the thread is currently blocked in throttle_lowpri_io(),
* it will immediately break out.
*
* TODO: SFI break out?
*/
int new_policy = proc_get_effective_thread_policy(self, TASK_POLICY_IO);
set_thread_iotier_override(thread, new_policy);
}
}
/*
* Try to handoff if supplied.
*/
if (thread != THREAD_NULL) {
spl_t s = splsched();
/* This may return a different thread if the target is pushing on something */
thread_t pulled_thread = thread_run_queue_remove_for_handoff(thread);
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE,
thread_tid(thread), thread->state,
pulled_thread ? TRUE : FALSE, 0, 0);
if (pulled_thread != THREAD_NULL) {
/* We can't be dropping the last ref here */
thread_deallocate_safe(thread);
if (wait_option) {
assert_wait_timeout((event_t)assert_wait_timeout, interruptible,
option_time, scale_factor);
} else if (depress_option) {
thread_depress_ms(option_time);
}
thread_run(self, thread_switch_continue, (void *)(intptr_t)option, pulled_thread);
__builtin_unreachable();
}
splx(s);
thread_deallocate(thread);
}
if (wait_option) {
assert_wait_timeout((event_t)assert_wait_timeout, interruptible, option_time, scale_factor);
} else {
disable_preemption();
bool should_yield = SCHED(thread_should_yield)(current_processor(), current_thread());
enable_preemption();
if (should_yield == false) {
/* Early-return if yielding to the scheduler will not be beneficial */
return KERN_SUCCESS;
}
if (depress_option) {
thread_depress_ms(option_time);
}
}
thread_yield_with_continuation(thread_switch_continue, (void *)(intptr_t)option);
__builtin_unreachable();
}
void
thread_yield_with_continuation(
thread_continue_t continuation,
void *parameter)
{
assert(continuation);
thread_block_reason(continuation, parameter, AST_YIELD);
__builtin_unreachable();
}
/* This function is called after an assert_wait(), therefore it must not
* cause another wait until after the thread_run() or thread_block()
*
* Following are the calling convention for thread ref deallocation.
*
* 1) If no continuation is provided, then thread ref is consumed.
* (thread_handoff_deallocate convention).
*
* 2) If continuation is provided with option THREAD_HANDOFF_SETRUN_NEEDED
* then thread ref is always consumed.
*
* 3) If continuation is provided with option THREAD_HANDOFF_NONE then thread
* ref is not consumed and it is upto the continuation to deallocate
* the thread reference.
*/
static wait_result_t
thread_handoff_internal(thread_t thread, thread_continue_t continuation,
void *parameter, thread_handoff_option_t option)
{
thread_t self = current_thread();
/*
* Try to handoff if supplied.
*/
if (thread != THREAD_NULL) {
spl_t s = splsched();
thread_t pulled_thread = thread_prepare_for_handoff(thread, option);
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_THREAD_SWITCH) | DBG_FUNC_NONE,
thread_tid(thread), thread->state,
pulled_thread ? TRUE : FALSE, 0, 0);
/* Deallocate thread ref if needed */
if (continuation == NULL || (option & THREAD_HANDOFF_SETRUN_NEEDED)) {
/* Use the safe version of thread deallocate */
thread_deallocate_safe(thread);
}
if (pulled_thread != THREAD_NULL) {
int result = thread_run(self, continuation, parameter, pulled_thread);
splx(s);
return result;
}
splx(s);
}
int result = thread_block_parameter(continuation, parameter);
return result;
}
void
thread_handoff_parameter(thread_t thread, thread_continue_t continuation,
void *parameter, thread_handoff_option_t option)
{
thread_handoff_internal(thread, continuation, parameter, option);
panic("NULL continuation passed to %s", __func__);
__builtin_unreachable();
}
wait_result_t
thread_handoff_deallocate(thread_t thread, thread_handoff_option_t option)
{
return thread_handoff_internal(thread, NULL, NULL, option);
}
/*
* Thread depression
*
* This mechanism drops a thread to priority 0 in order for it to yield to
* all other runnnable threads on the system. It can be canceled or timed out,
* whereupon the thread goes back to where it was.
*
* Note that TH_SFLAG_DEPRESS and TH_SFLAG_POLLDEPRESS are never set at the
* same time. DEPRESS always defers to POLLDEPRESS.
*
* DEPRESS only lasts across a single thread_block call, and never returns
* to userspace.
* POLLDEPRESS can be active anywhere up until thread termination.
*/
void
thread_depress_timer_setup(thread_t self)
{
self->depress_timer = kalloc_type(struct timer_call,
Z_ZERO | Z_WAITOK | Z_NOFAIL);
timer_call_setup(self->depress_timer, thread_depress_expire, self);
}
/*
* Depress thread's priority to lowest possible for the specified interval,
* with an interval of zero resulting in no timeout being scheduled.
*
* Must block with AST_YIELD afterwards to take effect
*/
void
thread_depress_abstime(uint64_t interval)
{
thread_t self = current_thread();
spl_t s = splsched();
thread_lock(self);
assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
if ((self->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
self->sched_flags |= TH_SFLAG_DEPRESS;
thread_recompute_sched_pri(self, SETPRI_LAZY);
if (interval != 0) {
uint64_t deadline;
clock_absolutetime_interval_to_deadline(interval, &deadline);
if (!timer_call_enter(self->depress_timer, deadline, TIMER_CALL_USER_CRITICAL)) {
self->depress_timer_active++;
}
}
}
thread_unlock(self);
splx(s);
}
void
thread_depress_ms(mach_msg_timeout_t interval)
{
uint64_t abstime;
clock_interval_to_absolutetime_interval(interval, NSEC_PER_MSEC, &abstime);
thread_depress_abstime(abstime);
}
/*
* Priority depression expiration.
*/
void
thread_depress_expire(void *p0,
__unused void *p1)
{
thread_t thread = (thread_t)p0;
spl_t s = splsched();
thread_lock(thread);
assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
if (--thread->depress_timer_active == 0) {
thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
if ((thread->state & TH_RUN) == TH_RUN) {
thread->last_basepri_change_time = mach_absolute_time();
}
thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
}
thread_unlock(thread);
splx(s);
}
/*
* Prematurely abort priority depression if there is one.
*/
kern_return_t
thread_depress_abort(thread_t thread)
{
kern_return_t result = KERN_NOT_DEPRESSED;
spl_t s = splsched();
thread_lock(thread);
assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
/*
* User-triggered depress-aborts should not get out
* of the poll-depress, but they should cancel a regular depress.
*/
if ((thread->sched_flags & TH_SFLAG_POLLDEPRESS) == 0) {
result = thread_depress_abort_locked(thread);
}
thread_unlock(thread);
splx(s);
return result;
}
/*
* Prematurely abort priority depression or poll depression if one is active.
* Called with the thread locked.
*/
kern_return_t
thread_depress_abort_locked(thread_t thread)
{
if ((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) == 0) {
return KERN_NOT_DEPRESSED;
}
assert((thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
thread->sched_flags &= ~TH_SFLAG_DEPRESSED_MASK;
if ((thread->state & TH_RUN) == TH_RUN) {
thread->last_basepri_change_time = mach_absolute_time();
}
thread_recompute_sched_pri(thread, SETPRI_LAZY);
if (timer_call_cancel(thread->depress_timer)) {
thread->depress_timer_active--;
}
return KERN_SUCCESS;
}
/*
* Invoked as part of a polling operation like a no-timeout port receive
*
* Forces a fixpri thread to yield if it is detected polling without blocking for too long.
*/
void
thread_poll_yield(thread_t self)
{
assert(self == current_thread());
assert((self->sched_flags & TH_SFLAG_DEPRESS) == 0);
if (self->sched_mode != TH_MODE_FIXED) {
return;
}
spl_t s = splsched();
uint64_t abstime = mach_absolute_time();
uint64_t total_computation = abstime -
self->computation_epoch + self->computation_metered;
if (total_computation >= max_poll_computation) {
thread_lock(self);
self->computation_epoch = abstime;
self->computation_interrupt_epoch = recount_current_thread_interrupt_time_mach();
self->computation_metered = 0;
uint64_t yield_expiration = abstime +
(total_computation >> sched_poll_yield_shift);
if (!timer_call_enter(self->depress_timer, yield_expiration,
TIMER_CALL_USER_CRITICAL)) {
self->depress_timer_active++;
}
self->sched_flags |= TH_SFLAG_POLLDEPRESS;
thread_recompute_sched_pri(self, SETPRI_DEFAULT);
thread_unlock(self);
}
splx(s);
}
/*
* Kernel-internal interface to yield for a specified period
*
* WARNING: Will still yield to priority 0 even if the thread is holding a contended lock!
*/
void
thread_yield_internal(mach_msg_timeout_t ms)
{
thread_t self = current_thread();
assert((self->sched_flags & TH_SFLAG_DEPRESSED_MASK) != TH_SFLAG_DEPRESSED_MASK);
processor_t myprocessor;
disable_preemption();
myprocessor = current_processor();
if (!SCHED(thread_should_yield)(myprocessor, self)) {
mp_enable_preemption();
return;
}
enable_preemption();
thread_depress_ms(ms);
thread_block_reason(THREAD_CONTINUE_NULL, NULL, AST_YIELD);
thread_depress_abort(self);
}
/*
* This yields to a possible non-urgent preemption pending on the current processor.
*
* This is useful when doing a long computation in the kernel without returning to userspace.
*
* As opposed to other yielding mechanisms, this does not drop the priority of the current thread.
*/
void
thread_yield_to_preemption()
{
/*
* ast_pending() should ideally be called with interrupts disabled, but
* the check here is fine because csw_check() will do the right thing.
*/
ast_t *pending_ast = ast_pending();
ast_t ast = AST_NONE;
processor_t p;
if (*pending_ast & AST_PREEMPT) {
thread_t self = current_thread();
spl_t s = splsched();
p = current_processor();
thread_lock(self);
ast = csw_check(self, p, AST_YIELD);
ast_on(ast);
thread_unlock(self);
if (ast != AST_NONE) {
(void)thread_block_reason(THREAD_CONTINUE_NULL, NULL, ast);
}
splx(s);
}
}