/*
* Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
/*
* @APPLE_FREE_COPYRIGHT@
*/
/*
* File: timer.c
* Purpose: Routines for handling the machine independent timer.
*/
#include <mach/mach_types.h>
#include <kern/timer_queue.h>
#include <kern/timer_call.h>
#include <kern/clock.h>
#include <kern/thread.h>
#include <kern/processor.h>
#include <kern/macro_help.h>
#include <kern/spl.h>
#include <kern/timer_queue.h>
#include <kern/pms.h>
#include <machine/commpage.h>
#include <machine/machine_routines.h>
#include <sys/kdebug.h>
#include <i386/cpu_data.h>
#include <i386/cpu_topology.h>
#include <i386/cpu_threads.h>
uint32_t spurious_timers;
/*
* Event timer interrupt.
*
* XXX a drawback of this implementation is that events serviced earlier must not set deadlines
* that occur before the entire chain completes.
*
* XXX a better implementation would use a set of generic callouts and iterate over them
*/
void
timer_intr(int user_mode, uint64_t rip)
{
uint64_t orig_abstime, abstime;
rtclock_timer_t *mytimer;
cpu_data_t *pp;
uint64_t pmdeadline;
uint64_t min_deadline = EndOfAllTime;
uint64_t run_deadline = EndOfAllTime;
bool timer_processed = false;
pp = current_cpu_datap();
SCHED_STATS_INC(timer_pop_count);
orig_abstime = abstime = mach_absolute_time();
/*
* Has a pending clock timer expired?
*/
mytimer = &pp->rtclock_timer;
timer_processed = (mytimer->deadline <= abstime ||
abstime >= mytimer->queue.earliest_soft_deadline);
if (timer_processed) {
uint64_t rtclock_deadline = MAX(mytimer->deadline, mytimer->when_set);
/*
* When opportunistically processing coalesced timers, don't factor
* their latency into the trace event.
*/
if (abstime > rtclock_deadline) {
TCOAL_DEBUG(0xEEEE0000, abstime,
mytimer->queue.earliest_soft_deadline,
abstime - mytimer->queue.earliest_soft_deadline, 0, 0);
} else {
min_deadline = rtclock_deadline;
}
mytimer->has_expired = TRUE;
mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
mytimer->has_expired = FALSE;
/*
* Get a more up-to-date current time after expiring the timer queue.
*/
abstime = mach_absolute_time();
mytimer->when_set = abstime;
}
/*
* Has a per-CPU running timer expired?
*/
run_deadline = running_timers_expire(pp->cpu_processor, abstime);
if (run_deadline != EndOfAllTime) {
if (run_deadline < min_deadline) {
min_deadline = run_deadline;
}
timer_processed = true;
abstime = mach_absolute_time();
}
/*
* Log the timer latency *before* the power management events.
*/
if (__probable(timer_processed)) {
/*
* Log the maximum interrupt service latency experienced by a timer.
*/
int64_t latency = min_deadline == EndOfAllTime ? 0 :
(int64_t)(abstime - min_deadline);
/*
* Log interrupt service latency (-ve value expected by tool)
* a non-PM event is expected next.
* The requested deadline may be earlier than when it was set
* - use MAX to avoid reporting bogus latencies.
*/
KDBG_RELEASE(DECR_TRAP_LATENCY, -latency,
user_mode != 0 ? rip : VM_KERNEL_UNSLIDE(rip), user_mode);
}
/*
* Is it time for power management state change?
*/
if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) {
KDBG_RELEASE(DECR_PM_DEADLINE | DBG_FUNC_START);
pmCPUDeadline(pp);
KDBG_RELEASE(DECR_PM_DEADLINE | DBG_FUNC_END);
timer_processed = true;
/*
* XXX Nothing below needs an updated abstime, so omit the update.
*/
}
/*
* Schedule the next deadline.
*/
x86_lcpu()->rtcDeadline = EndOfAllTime;
timer_resync_deadlines();
if (__improbable(!timer_processed)) {
spurious_timers++;
}
}
/*
* Set the clock deadline.
*/
void
timer_set_deadline(uint64_t deadline)
{
rtclock_timer_t *mytimer;
spl_t s;
cpu_data_t *pp;
s = splclock(); /* no interruptions */
pp = current_cpu_datap();
mytimer = &pp->rtclock_timer; /* Point to the timer itself */
mytimer->deadline = deadline; /* Set new expiration time */
mytimer->when_set = mach_absolute_time();
timer_resync_deadlines();
splx(s);
}
/*
* Re-evaluate the outstanding deadlines and select the most proximate.
*
* Should be called at splclock.
*/
void
timer_resync_deadlines(void)
{
uint64_t deadline = EndOfAllTime;
uint64_t pmdeadline;
rtclock_timer_t *mytimer;
spl_t s = splclock();
cpu_data_t *pp;
uint32_t decr;
pp = current_cpu_datap();
if (!pp->cpu_running) {
/* There's really nothing to do if this processor is down */
return;
}
/*
* If we have a clock timer set, pick that.
*/
mytimer = &pp->rtclock_timer;
if (!mytimer->has_expired &&
0 < mytimer->deadline && mytimer->deadline < EndOfAllTime) {
deadline = mytimer->deadline;
}
/*
* If we have a power management deadline, see if that's earlier.
*/
pmdeadline = pmCPUGetDeadline(pp);
if (0 < pmdeadline && pmdeadline < deadline) {
deadline = pmdeadline;
}
uint64_t run_deadline = running_timers_deadline(pp->cpu_processor);
if (run_deadline < deadline) {
deadline = run_deadline;
}
/*
* Go and set the "pop" event.
*/
decr = (uint32_t) setPop(deadline);
/* Record non-PM deadline for latency tool */
if (decr != 0 && deadline != pmdeadline) {
uint64_t queue_count = 0;
if (deadline != run_deadline) {
/*
* For non-quantum timer put the queue count
* in the tracepoint.
*/
queue_count = mytimer->queue.count;
}
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
DECR_SET_DEADLINE | DBG_FUNC_NONE,
decr, 2,
deadline,
queue_count, 0);
}
splx(s);
}
void
timer_queue_expire_local(
__unused void *arg)
{
rtclock_timer_t *mytimer;
uint64_t abstime;
cpu_data_t *pp;
pp = current_cpu_datap();
mytimer = &pp->rtclock_timer;
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
DECR_TIMER_EXPIRE_LOCAL | DBG_FUNC_START,
mytimer->deadline, 0, 0, 0, 0);
abstime = mach_absolute_time();
mytimer->has_expired = TRUE;
mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
mytimer->has_expired = FALSE;
mytimer->when_set = mach_absolute_time();
timer_resync_deadlines();
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
DECR_TIMER_EXPIRE_LOCAL | DBG_FUNC_END,
mytimer->deadline, 0, 0, 0, 0);
}
void
timer_queue_expire_rescan(
__unused void *arg)
{
rtclock_timer_t *mytimer;
uint64_t abstime;
cpu_data_t *pp;
assert(ml_get_interrupts_enabled() == FALSE);
pp = current_cpu_datap();
mytimer = &pp->rtclock_timer;
abstime = mach_absolute_time();
mytimer->has_expired = TRUE;
mytimer->deadline = timer_queue_expire_with_options(&mytimer->queue, abstime, TRUE);
mytimer->has_expired = FALSE;
mytimer->when_set = mach_absolute_time();
timer_resync_deadlines();
}
#define TIMER_RESORT_THRESHOLD_ABSTIME (50 * NSEC_PER_MSEC)
#if TCOAL_PRIO_STATS
int32_t nc_tcl, rt_tcl, bg_tcl, kt_tcl, fp_tcl, ts_tcl, qos_tcl;
#define TCOAL_PRIO_STAT(x) (x++)
#else
#define TCOAL_PRIO_STAT(x)
#endif
boolean_t
timer_resort_threshold(uint64_t skew)
{
if (skew >= TIMER_RESORT_THRESHOLD_ABSTIME) {
return TRUE;
} else {
return FALSE;
}
}
/*
* Return the local timer queue for a running processor
* else return the boot processor's timer queue.
*/
mpqueue_head_t *
timer_queue_assign(
uint64_t deadline)
{
cpu_data_t *cdp = current_cpu_datap();
mpqueue_head_t *queue;
if (cdp->cpu_running) {
queue = &cdp->rtclock_timer.queue;
if (deadline < cdp->rtclock_timer.deadline) {
timer_set_deadline(deadline);
}
} else {
queue = &cpu_datap(master_cpu)->rtclock_timer.queue;
}
return queue;
}
void
timer_queue_cancel(
mpqueue_head_t *queue,
uint64_t deadline,
uint64_t new_deadline)
{
if (queue == ¤t_cpu_datap()->rtclock_timer.queue) {
if (deadline < new_deadline) {
timer_set_deadline(new_deadline);
}
}
}
/*
* timer_queue_migrate_cpu() is called from the Power-Management kext
* when a logical processor goes idle (in a deep C-state) with a distant
* deadline so that it's timer queue can be moved to another processor.
* This target processor should be the least idle (most busy) --
* currently this is the primary processor for the calling thread's package.
* Locking restrictions demand that the target cpu must be the boot cpu.
*/
uint32_t
timer_queue_migrate_cpu(int target_cpu)
{
cpu_data_t *target_cdp = cpu_datap(target_cpu);
cpu_data_t *cdp = current_cpu_datap();
int ntimers_moved;
assert(!ml_get_interrupts_enabled());
assert(target_cpu != cdp->cpu_number);
assert(target_cpu == master_cpu);
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
DECR_TIMER_MIGRATE | DBG_FUNC_START,
target_cpu,
cdp->rtclock_timer.deadline, (cdp->rtclock_timer.deadline >> 32),
0, 0);
/*
* Move timer requests from the local queue to the target processor's.
* The return value is the number of requests moved. If this is 0,
* it indicates that the first (i.e. earliest) timer is earlier than
* the earliest for the target processor. Since this would force a
* resync, the move of this and all later requests is aborted.
*/
ntimers_moved = timer_queue_migrate(&cdp->rtclock_timer.queue,
&target_cdp->rtclock_timer.queue);
/*
* Assuming we moved stuff, clear local deadline.
*/
if (ntimers_moved > 0) {
cdp->rtclock_timer.deadline = EndOfAllTime;
setPop(EndOfAllTime);
}
KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
DECR_TIMER_MIGRATE | DBG_FUNC_END,
target_cpu, ntimers_moved, 0, 0, 0);
return ntimers_moved;
}
mpqueue_head_t *
timer_queue_cpu(int cpu)
{
return &cpu_datap(cpu)->rtclock_timer.queue;
}
void
timer_call_cpu(int cpu, void (*fn)(void *), void *arg)
{
mp_cpus_call(cpu_to_cpumask(cpu), SYNC, fn, arg);
}
void
timer_call_nosync_cpu(int cpu, void (*fn)(void *), void *arg)
{
/* XXX Needs error checking and retry */
mp_cpus_call(cpu_to_cpumask(cpu), NOSYNC, fn, arg);
}
static timer_coalescing_priority_params_ns_t tcoal_prio_params_init =
{
.idle_entry_timer_processing_hdeadline_threshold_ns = 5000ULL * NSEC_PER_USEC,
.interrupt_timer_coalescing_ilat_threshold_ns = 30ULL * NSEC_PER_USEC,
.timer_resort_threshold_ns = 50 * NSEC_PER_MSEC,
.timer_coalesce_rt_shift = 0,
.timer_coalesce_bg_shift = -5,
.timer_coalesce_kt_shift = 3,
.timer_coalesce_fp_shift = 3,
.timer_coalesce_ts_shift = 3,
.timer_coalesce_rt_ns_max = 0ULL,
.timer_coalesce_bg_ns_max = 100 * NSEC_PER_MSEC,
.timer_coalesce_kt_ns_max = 1 * NSEC_PER_MSEC,
.timer_coalesce_fp_ns_max = 1 * NSEC_PER_MSEC,
.timer_coalesce_ts_ns_max = 1 * NSEC_PER_MSEC,
.latency_qos_scale = {3, 2, 1, -2, -15, -15},
.latency_qos_ns_max = {1 * NSEC_PER_MSEC, 5 * NSEC_PER_MSEC, 20 * NSEC_PER_MSEC,
75 * NSEC_PER_MSEC, 10000 * NSEC_PER_MSEC, 10000 * NSEC_PER_MSEC},
.latency_tier_rate_limited = {FALSE, FALSE, FALSE, FALSE, TRUE, TRUE},
};
timer_coalescing_priority_params_ns_t *
timer_call_get_priority_params(void)
{
return &tcoal_prio_params_init;
}