This is xnu-11215.1.10. See this file in:
/* Copyright (c) 2018-2021 Apple Inc.  All rights reserved. */

#include <CoreFoundation/CoreFoundation.h>
#include <darwintest.h>
#include <darwintest_utils.h>
#include <dispatch/dispatch.h>
#include <ktrace/ktrace.h>
#include <kperf/kperf.h>
#include <kern/debug.h>
#include <notify.h>
#include <sys/kdebug.h>
#include <sys/sysctl.h>
#include <TargetConditionals.h>

#include "ktrace/ktrace_helpers.h"

enum telemetry_pmi {
	TELEMETRY_PMI_NONE,
	TELEMETRY_PMI_INSTRS,
	TELEMETRY_PMI_CYCLES,
};
#define TELEMETRY_CMD_PMI_SETUP 3

T_GLOBAL_META(T_META_NAMESPACE("xnu.stackshot.microstackshot"),
    T_META_RADAR_COMPONENT_NAME("xnu"),
    T_META_RADAR_COMPONENT_VERSION("stackshot"),
    T_META_OWNER("mwidmann"),
    T_META_CHECK_LEAKS(false),
    T_META_ASROOT(true));

extern int __telemetry(uint64_t cmd, uint64_t deadline, uint64_t interval,
    uint64_t leeway, uint64_t arg4, uint64_t arg5);

/*
 * Data Analytics (da) also has a microstackshot configuration -- set a PMI
 * cycle interval of 0 to force it to disable microstackshot on PMI.
 */

static void
set_da_microstackshot_period(CFNumberRef num)
{
	CFPreferencesSetValue(CFSTR("microstackshotPMICycleInterval"), num,
	    CFSTR("com.apple.da"),
#if TARGET_OS_IPHONE
	    CFSTR("mobile"),
#else // TARGET_OS_IPHONE
	    CFSTR("root"),
#endif // !TARGET_OS_IPHONE
	    kCFPreferencesCurrentHost);

	notify_post("com.apple.da.tasking_changed");
}

static void
disable_da_microstackshots(void)
{
	int64_t zero = 0;
	CFNumberRef num = CFNumberCreate(NULL, kCFNumberSInt64Type, &zero);
	set_da_microstackshot_period(num);
	T_LOG("notified da of tasking change, sleeping");
#if TARGET_OS_WATCH
	sleep(8);
#else /* TARGET_OS_WATCH */
	sleep(3);
#endif /* !TARGET_OS_WATCH */
}

/*
 * Unset the preference to allow da to reset its configuration.
 */
static void
reenable_da_microstackshots(void)
{
	set_da_microstackshot_period(NULL);
}

/*
 * Clean up the test's configuration and allow da to activate again.
 */
static void
telemetry_cleanup(void)
{
	(void)__telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_NONE, 0, 0, 0, 0);
	reenable_da_microstackshots();
}

/*
 * Make sure da hasn't configured the microstackshots -- otherwise the PMI
 * setup command will return EBUSY.
 */
static void
telemetry_init(void)
{
	disable_da_microstackshots();
	T_LOG("installing cleanup handler");
	T_ATEND(telemetry_cleanup);
}

volatile static bool spinning = true;

static void *
thread_spin(__unused void *arg)
{
	while (spinning) {
	}
	return NULL;
}

static bool
query_pmi_params(unsigned int *pmi_counter, uint64_t *pmi_period)
{
	bool pmi_support = true;
	size_t sysctl_size = sizeof(pmi_counter);
	int ret = sysctlbyname(
			"kern.microstackshot.pmi_sample_counter",
			pmi_counter, &sysctl_size, NULL, 0);
	if (ret == -1 && errno == ENOENT) {
		pmi_support = false;
		T_LOG("no PMI support");
	} else {
		T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "query PMI counter");
	}
	if (pmi_support) {
		sysctl_size = sizeof(*pmi_period);
		T_QUIET; T_ASSERT_POSIX_SUCCESS(sysctlbyname(
				"kern.microstackshot.pmi_sample_period",
				pmi_period, &sysctl_size, NULL, 0),
				"query PMI period");
	}
	return pmi_support;
}

#define MT_MICROSTACKSHOT KDBG_EVENTID(DBG_MONOTONIC, 2, 1)
#define MS_RECORD MACHDBG_CODE(DBG_MACH_STACKSHOT, \
	        MICROSTACKSHOT_RECORD)
#if defined(__arm64__)
#define INSTRS_PERIOD (100ULL * 1000 * 1000)
#else /* defined(__arm64__) */
#define INSTRS_PERIOD (1ULL * 1000 * 1000 * 1000)
#endif /* defined(__arm64__) */
#define SLEEP_SECS 10

T_DECL(pmi_sampling, "attempt to configure microstackshots on PMI",
		T_META_REQUIRES_SYSCTL_EQ("kern.monotonic.supported", 1), T_META_TAG_VM_NOT_ELIGIBLE)
{
	start_controlling_ktrace();

	T_SETUPBEGIN;
	ktrace_session_t s = ktrace_session_create();
	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "session create");

	__block int pmi_events = 0;
	__block int microstackshot_record_events = 0;
	__block int pmi_records = 0;
	__block int io_records = 0;
	__block int interrupt_records = 0;
	__block int timer_arm_records = 0;
	__block int unknown_records = 0;
	__block int empty_records = 0;

	ktrace_events_single(s, MT_MICROSTACKSHOT, ^(__unused struct trace_point *tp) {
		pmi_events++;
	});
	ktrace_events_single_paired(s, MS_RECORD,
	    ^(struct trace_point *start, __unused struct trace_point *end) {
		if (start->arg1 & kPMIRecord) {
		        pmi_records++;
		}
		if (start->arg1 & kIORecord) {
		        io_records++;
		}
		if (start->arg1 & kInterruptRecord) {
		        interrupt_records++;
		}
		if (start->arg1 & kTimerArmingRecord) {
		        timer_arm_records++;
		}

		if (start->arg2 == end->arg2) {
			/*
			 * The buffer didn't grow for this record -- there was
			 * an error.
			 */
			empty_records++;
		}

		const uint8_t any_record = kPMIRecord | kIORecord | kInterruptRecord |
		kTimerArmingRecord;
		if ((start->arg1 & any_record) == 0) {
		        unknown_records++;
		}

		microstackshot_record_events++;
	});

	ktrace_set_completion_handler(s, ^{
		ktrace_session_destroy(s);
		T_EXPECT_GT(pmi_events, 0, "saw non-zero PMIs (%g/sec)",
				pmi_events / (double)SLEEP_SECS);
		T_EXPECT_GT(pmi_records, 0, "saw non-zero PMI record events (%g/sec)",
				pmi_records / (double)SLEEP_SECS);
		T_LOG("saw %d unknown record events", unknown_records);
		T_EXPECT_GT(microstackshot_record_events, 0,
				"saw non-zero microstackshot record events (%d -- %g/sec)",
				microstackshot_record_events,
				microstackshot_record_events / (double)SLEEP_SECS);
		T_EXPECT_NE(empty_records, microstackshot_record_events,
				"saw non-empty records (%d empty)", empty_records);

		if (interrupt_records > 0) {
			T_LOG("saw %g interrupt records per second",
					interrupt_records / (double)SLEEP_SECS);
		} else {
			T_LOG("saw no interrupt records");
		}
		if (io_records > 0) {
			T_LOG("saw %g I/O records per second",
					io_records / (double)SLEEP_SECS);
		} else {
			T_LOG("saw no I/O records");
		}
		if (timer_arm_records > 0) {
			T_LOG("saw %g timer arming records per second",
					timer_arm_records / (double)SLEEP_SECS);
		} else {
			T_LOG("saw no timer arming records");
		}

		T_END;
	});

	T_SETUPEND;

	telemetry_init();

	/*
	 * Start sampling via telemetry on the instructions PMI.
	 */
	int ret = __telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_INSTRS,
			INSTRS_PERIOD, 0, 0, 0);
	T_ASSERT_POSIX_SUCCESS(ret,
			"telemetry syscall succeeded, started microstackshots");

	unsigned int pmi_counter = 0;
	uint64_t pmi_period = 0;
	bool pmi_support = query_pmi_params(&pmi_counter, &pmi_period);
	T_QUIET; T_ASSERT_TRUE(pmi_support, "PMI should be supported");

	T_LOG("PMI counter: %u", pmi_counter);
	T_LOG("PMI period: %llu", pmi_period);
#if defined(__arm64__)
	const unsigned int instrs_counter = 1;
#else
	const unsigned int instrs_counter = 0;
#endif // defined(__arm64__)
	T_QUIET; T_ASSERT_EQ(pmi_counter, instrs_counter,
			"PMI on instructions retired");
	T_QUIET; T_ASSERT_EQ(pmi_period, INSTRS_PERIOD, "PMI period is set");

	pthread_t thread;
	int error = pthread_create(&thread, NULL, thread_spin, NULL);
	T_ASSERT_POSIX_ZERO(error, "started thread to spin");

	error = ktrace_start(s, dispatch_get_main_queue());
	T_ASSERT_POSIX_ZERO(error, "started tracing");

	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, SLEEP_SECS * NSEC_PER_SEC),
			dispatch_get_main_queue(), ^{
		spinning = false;
		ktrace_end(s, 0);
		(void)pthread_join(thread, NULL);
		T_LOG("ending trace session after %d seconds", SLEEP_SECS);
	});

	dispatch_main();
}

T_DECL(error_handling,
		"ensure that error conditions for the telemetry syscall are observed",
		T_META_REQUIRES_SYSCTL_EQ("kern.monotonic.supported", 1), T_META_TAG_VM_NOT_ELIGIBLE)
{
	telemetry_init();

	int ret = __telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_INSTRS,
	    1, 0, 0, 0);
	T_EXPECT_EQ(ret, -1, "telemetry shouldn't allow PMI every instruction");

	ret = __telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_INSTRS,
	    1000 * 1000, 0, 0, 0);
	T_EXPECT_EQ(ret, -1,
	    "telemetry shouldn't allow PMI every million instructions");

	ret = __telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_CYCLES,
	    1, 0, 0, 0);
	T_EXPECT_EQ(ret, -1, "telemetry shouldn't allow PMI every cycle");

	ret = __telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_CYCLES,
	    1000 * 1000, 0, 0, 0);
	T_EXPECT_EQ(ret, -1,
	    "telemetry shouldn't allow PMI every million cycles");

	ret = __telemetry(TELEMETRY_CMD_PMI_SETUP, TELEMETRY_PMI_CYCLES,
	    UINT64_MAX, 0, 0, 0);
	T_EXPECT_EQ(ret, -1, "telemetry shouldn't allow PMI every UINT64_MAX cycles");
}

#define START_EVENT (0xfeedfad0)
#define STOP_EVENT (0xfeedfac0)

T_DECL(excessive_sampling,
		"ensure that microstackshots are not being sampled too frequently",
		T_META_REQUIRES_SYSCTL_EQ("kern.monotonic.supported", 1), T_META_TAG_VM_NOT_ELIGIBLE)
{
	unsigned int interrupt_sample_rate = 0;
	size_t sysctl_size = sizeof(interrupt_sample_rate);
	T_QUIET;
	T_ASSERT_POSIX_SUCCESS(sysctlbyname(
			"kern.microstackshot.interrupt_sample_rate",
			&interrupt_sample_rate, &sysctl_size, NULL, 0),
			"query interrupt sample rate");
	unsigned int pmi_counter = 0;
	uint64_t pmi_period = 0;
	(void)query_pmi_params(&pmi_counter, &pmi_period);

	T_LOG("interrupt sample rate: %uHz", interrupt_sample_rate);
	T_LOG("PMI counter: %u", pmi_counter);
	T_LOG("PMI period: %llu", pmi_period);

	start_controlling_ktrace();

	T_SETUPBEGIN;
	ktrace_session_t s = ktrace_session_create();
	T_QUIET; T_WITH_ERRNO; T_ASSERT_NOTNULL(s, "session create");

	__block int microstackshot_record_events = 0;
	__block int pmi_records = 0;
	__block int io_records = 0;
	__block int interrupt_records = 0;
	__block int timer_arm_records = 0;
	__block int unknown_records = 0;
	__block int empty_records = 0;
	__block uint64_t first_timestamp_ns = 0;
	__block uint64_t last_timestamp_ns = 0;

	ktrace_events_single_paired(s, MS_RECORD,
			^(struct trace_point *start, __unused struct trace_point *end) {
		if (start->arg1 & kPMIRecord) {
			pmi_records++;
		}
		if (start->arg1 & kIORecord) {
			io_records++;
		}
		if (start->arg1 & kInterruptRecord) {
			interrupt_records++;
		}
		if (start->arg1 & kTimerArmingRecord) {
			timer_arm_records++;
		}

		if (start->arg2 == end->arg2) {
			/*
			 * The buffer didn't grow for this record -- there was
			 * an error.
			 */
			empty_records++;
		}

		const uint8_t any_record = kPMIRecord | kIORecord | kInterruptRecord |
				kTimerArmingRecord;
		if ((start->arg1 & any_record) == 0) {
			unknown_records++;
		}

		microstackshot_record_events++;
	});

	ktrace_events_single(s, START_EVENT, ^(struct trace_point *tp) {
		int error = ktrace_convert_timestamp_to_nanoseconds(s,
				tp->timestamp, &first_timestamp_ns);
		T_QUIET;
		T_ASSERT_POSIX_ZERO(error, "converted timestamp to nanoseconds");
	});

	ktrace_events_single(s, STOP_EVENT, ^(struct trace_point *tp) {
		int error = ktrace_convert_timestamp_to_nanoseconds(s,
				tp->timestamp, &last_timestamp_ns);
		T_QUIET;
		T_ASSERT_POSIX_ZERO(error, "converted timestamp to nanoseconds");
		ktrace_end(s, 1);
	});

	ktrace_set_completion_handler(s, ^{
		ktrace_session_destroy(s);

		uint64_t duration_ns = last_timestamp_ns - first_timestamp_ns;
		double duration_secs = (double)duration_ns / 1e9;

		T_LOG("test lasted %g seconds", duration_secs);

		T_MAYFAIL;
		T_EXPECT_EQ(unknown_records, 0, "saw zero unknown record events");
		T_MAYFAIL;
		T_EXPECT_GT(microstackshot_record_events, 0,
				"saw non-zero microstackshot record events (%d, %gHz)",
				microstackshot_record_events,
				microstackshot_record_events / duration_secs);
		T_EXPECT_NE(empty_records, microstackshot_record_events,
				"saw non-empty records (%d empty)", empty_records);

		double record_rate_hz = microstackshot_record_events / duration_secs;
		T_LOG("record rate: %gHz", record_rate_hz);
		T_LOG("PMI record rate: %gHz", pmi_records / duration_secs);
		T_LOG("interrupt record rate: %gHz",
				interrupt_records / duration_secs);
		T_LOG("I/O record rate: %gHz", io_records / duration_secs);
		T_LOG("timer arm record rate: %gHz",
				timer_arm_records / duration_secs);

		T_EXPECT_LE(record_rate_hz, (double)(dt_ncpu() * 50),
				"found appropriate rate of microstackshots");

		T_END;
	});

	pthread_t thread;
	int error = pthread_create(&thread, NULL, thread_spin, NULL);
	T_ASSERT_POSIX_ZERO(error, "started thread to spin");

	T_SETUPEND;

	error = ktrace_start(s, dispatch_get_main_queue());
	T_ASSERT_POSIX_ZERO(error, "started tracing");
	kdebug_trace(START_EVENT, 0, 0, 0, 0);

	dispatch_after(dispatch_time(DISPATCH_TIME_NOW, SLEEP_SECS * NSEC_PER_SEC),
			dispatch_get_main_queue(), ^{
		spinning = false;
		kdebug_trace(STOP_EVENT, 0, 0, 0, 0);
		(void)pthread_join(thread, NULL);
		T_LOG("ending trace session after %d seconds", SLEEP_SECS);
	});

	dispatch_main();
}