This is xnu-12377.1.9. See this file in:
/*
 * Copyright (c) 2024 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include <kern/timeout.h>
#include <kern/clock.h>
#include <kern/monotonic.h>
#include <kern/recount.h>
#include <kern/debug.h>
#include <kern/backtrace.h>
#include <kern/trap_telemetry.h>
#include <machine/machine_routines.h>


kern_timeout_t panic_timeout; /* for debugging */
boolean_t kern_timeout_panic_initiated = false;

#if defined(__x86_64__)
#define ml_get_speculative_timebase ml_get_timebase
#endif

#if DEVELOPMENT || DEBUG
TUNABLE_DT_WRITEABLE(int, timeouts_are_fatal, "machine-timeouts", "timeouts-are-fatal",
    "timeouts_are_fatal", 1, TUNABLE_DT_CHECK_CHOSEN);
#endif

void
kern_timeout_restart(kern_timeout_t *to, timeout_flags_t flags)
{
#if CONFIG_CPU_COUNTERS
	if (__improbable(!(flags & TF_SAMPLE_PMC))) {
		to->start_cycles = 0;
		to->start_instrs = 0;
	} else {
		mt_cur_cpu_cycles_instrs_speculative(&to->start_cycles, &to->start_instrs);
	}
#endif /* CONFIG_CPU_COUNTERS */

	if (flags & TF_SAMPLE_INTERRUPT_TIME) {
		to->int_mt = recount_current_processor_interrupt_duration_mach();
	} else {
		to->int_mt = 0;
	}

	to->start_mt = (flags & TF_NONSPEC_TIMEBASE)? ml_get_timebase() : ml_get_speculative_timebase();
}

void
kern_timeout_start(kern_timeout_t *to, timeout_flags_t flags)
{
	if (flags & TF_BACKTRACE) {
		(void) backtrace(&to->bt[0], TO_BT_FRAMES, NULL, NULL);
	}
	kern_timeout_restart(to, flags);
}

void
kern_timeout_end(kern_timeout_t *to, timeout_flags_t flags)
{
	to->end_mt = (flags & TF_NONSPEC_TIMEBASE)? ml_get_timebase() : ml_get_speculative_timebase();
	if (flags & TF_SAMPLE_INTERRUPT_TIME) {
		to->int_mt = recount_current_processor_interrupt_duration_mach() - to->int_mt;
	}
}

/*
 * Zero out the timeout state so that we won't have a timeout triggered later in the processing
 * of this timeout.
 */
void
kern_timeout_override(kern_timeout_t *to)
{
	to->start_mt = 0;
#if CONFIG_CPU_COUNTERS
	to->start_cycles = 0;
	to->start_instrs = 0;
#endif /* CONFIG_CPU_COUNTERS */
}

#if CONFIG_CPU_COUNTERS
void
kern_timeout_cycles_instrs(kern_timeout_t *to, uint64_t *cycles, uint64_t *instrs)
{
	uint64_t now_cycles, now_instrs;

	if (__improbable(to->start_cycles == 0)) {
		*cycles = 0;
		*instrs = 0;
	} else {
		mt_cur_cpu_cycles_instrs_speculative(&now_cycles, &now_instrs);
		*cycles = now_cycles - to->start_cycles;
		*instrs = now_instrs - to->start_instrs;
	}
}

void
kern_timeout_cpi(kern_timeout_t *to, uint64_t *cpi_whole, uint64_t *cpi_fractional)
{
	uint64_t cycles, instrs;

	kern_timeout_cycles_instrs(to, &cycles, &instrs);
	*cpi_whole = cycles / instrs;
	*cpi_fractional = ((cycles * 100) / instrs) % 100;
}
#else /* !CONFIG_CPU_COUNTERS */
void
kern_timeout_cycles_instrs(kern_timeout_t __unused *to, uint64_t *cycles, uint64_t *instrs)
{
	*cycles = 0;
	*instrs = 0;
}

void
kern_timeout_cpi(kern_timeout_t __unused *to, uint64_t *cpi_whole, uint64_t *cpi_fractional)
{
	*cpi_whole = 0;
	*cpi_fractional = 0;
}
#endif /* CONFIG_CPU_COUNTERS */

__enum_closed_decl(timeout_mode_t, uint32_t, {
	TIMEOUT_TELEMETRY,
	TIMEOUT_PANIC
});

/*
 * This interface is a "try panic" because we won't invoke a nested panic
 * if a timeout has already happened that initiated the original panic.
 */
void
kern_timeout_try_panic(kern_timeout_type_t __unused type, uint64_t __unused payload, kern_timeout_t *to, const char *prefix, uint64_t threshold)
{
	char cpi[80];
	char duration[80];
	const uint64_t gross_duration = kern_timeout_gross_duration(to);
	const uint64_t net_duration = kern_timeout_net_duration(to);
	uint64_t gross_ns, net_ns, threshold_ns;
	uint64_t gross_us, net_us, threshold_us;
	uint64_t gross_ms, net_ms, threshold_ms;
	uint64_t start_mt, end_mt;
	uint64_t __unused average_freq = 0;
	uint64_t __unused cpi_whole = 0;
#ifdef __arm64__
	const char __unused core_type = ml_get_current_core_type();
#else
	const char __unused core_type = '-';
#endif /* __arm64__ */

	/*
	 * We can recursively try to panic due to a timeout in the panic flow,
	 * so if that happens, just bail out here.
	 */
	if (kern_timeout_panic_initiated) {
		return;
	}

	absolutetime_to_nanoseconds(gross_duration, &gross_ns);
	absolutetime_to_nanoseconds(net_duration, &net_ns);
	absolutetime_to_nanoseconds(threshold, &threshold_ns);
	kern_timeout_mach_times(to, &start_mt, &end_mt);

	cpi[0] = 0;

#if CONFIG_CPU_COUNTERS
	uint64_t cycles;
	uint64_t instrs;

	/*
	 * We're getting these values a bit late, but getting them
	 * is a bit expensive, so we take the slight hit in
	 * accuracy for the reported values (which aren't very
	 * stable anyway).
	 */
	kern_timeout_cycles_instrs(to, &cycles, &instrs);
	if (cycles > 0 && instrs > 0) {
		cpi_whole = cycles / instrs;
		average_freq = cycles / (gross_ns / 1000);
	}
#endif /* CONFIG_CPU_COUNTERS */

#if DEVELOPMENT || DEBUG
	timeout_mode_t mode = timeouts_are_fatal ? TIMEOUT_PANIC : TIMEOUT_TELEMETRY;
	if (mode == TIMEOUT_PANIC) {

#if CONFIG_CPU_COUNTERS && !defined(HAS_FEAT_XS)
		/*
		 * POLICY: if CPI > 100 and we are on a SoC that does not support
		 * FEAT_XS, it's likely the stall was caused by a long TLBI. This
		 * isn't an actionable radar condition for preemption or interrupt
		 * disabled timeouts, so do nothing.
		 */
		if ((type == KERN_TIMEOUT_PREEMPTION || type == KERN_TIMEOUT_INTERRUPT) &&
		    cpi_whole > 100) {
			return;
		}
#endif /* CONFIG_CPU_COUNTERS && !HAS_FEAT_XS */

#if ML_IO_TIMEOUTS_ENABLED
		/*
		 * POLICY: check the MMIO override window to see if we are still
		 * within it. If we are, abandon the attempt to panic, since
		 * the timeout is almost certainly due to a known issue causing
		 * a stall that got entangled with this core. We don't emit
		 * telemetry in this case because the MMIO overrides have their
		 * own telemetry mechanism.
		 */
		if (ml_io_check_for_mmio_overrides(start_mt)) {
			return;
		}
#endif /* ML_IO_TIMEOUTS_ENABLED */
	}

	if (mode == TIMEOUT_TELEMETRY) {
		trap_telemetry_type_t trap_type;
		switch (type) {
		case KERN_TIMEOUT_PREEMPTION:
			trap_type = TRAP_TELEMETRY_TYPE_PREEMPTION_TIMEOUT;
			break;
		case KERN_TIMEOUT_INTERRUPT:
			trap_type = TRAP_TELEMETRY_TYPE_INTERRUPT_TIMEOUT;
			break;
		case KERN_TIMEOUT_MMIO:
			trap_type = TRAP_TELEMETRY_TYPE_MMIO_TIMEOUT;
			break;
		case KERN_TIMEOUT_LOCK:
			trap_type = TRAP_TELEMETRY_TYPE_LOCK_TIMEOUT;
			break;
		default:
			panic("unknown timeout type\n");
		}
		trap_telemetry_report_latency_violation(
			trap_type,
			(trap_telemetry_latency_s) {
			.violation_cpi = cpi_whole,
			.violation_freq = average_freq,
			.violation_cpu_type = core_type,
			.violation_duration = net_ns,
			.violation_threshold = threshold_ns,
			.violation_payload = payload
		});
		return;
	}
#endif /* DEVELOPMENT || DEBUG */

	kern_timeout_panic_initiated = true;
	panic_timeout = *to;

	gross_us = gross_ns / 1000;
	net_us = net_ns / 1000;
	threshold_us = threshold_ns / 1000;
	gross_ms = gross_us / 1000;
	net_ms = net_us / 1000;
	threshold_ms = threshold_us / 1000;

#if CONFIG_CPU_COUNTERS
	if (cycles > 0 && instrs > 0) {
		uint64_t cpi_fractional;

		cpi_fractional = ((cycles * 100) / instrs) % 100;

		snprintf(cpi, sizeof(cpi), ", freq %llu MHz, type = %c, CPI = %llu.%llu [%llu, %llu]",
		    average_freq, core_type, cpi_whole, cpi_fractional, cycles, instrs);
	}
#endif /* CONFIG_CPU_COUNTERS */

	if (gross_ns > net_ns) {
		if (threshold_ms > 0) {
			snprintf(duration, sizeof(duration), "gross %llu.%llu ms, net %llu.%llu ms >= %llu.%llu ms",
			    gross_ms, gross_us % 1000, net_ms, net_us % 1000, threshold_ms, threshold_us % 1000);
		} else {
			snprintf(duration, sizeof(duration), "gross %llu us, net %llu us >= %llu us",
			    gross_us, net_us, threshold_us);
		}
	} else {
		if (threshold_ms > 0) {
			snprintf(duration, sizeof(duration), "%llu.%llu ms >= %llu.%llu ms",
			    gross_ms, gross_us % 1000, threshold_ms, threshold_us % 1000);
		} else {
			snprintf(duration, sizeof(duration), "%llu us >= %llu us",
			    gross_us, threshold_us);
		}
	}

	panic_plain("%s %s (start: %llu, end: %llu)%s", prefix, duration, start_mt, end_mt, cpi);
}