This is xnu-12377.1.9. See this file in:
// Copyright (c) 2024 Apple Inc. All rights reserved.
#include "sched_test_harness/sched_policy_darwintest.h"
#include "sched_test_harness/sched_edge_harness.h"
T_GLOBAL_META(T_META_NAMESPACE("xnu.scheduler"),
T_META_RADAR_COMPONENT_NAME("xnu"),
T_META_RADAR_COMPONENT_VERSION("scheduler"),
T_META_RUN_CONCURRENTLY(true),
T_META_OWNER("m_zinn"));
static mach_timebase_info_data_t timebase_info;
uint64_t
nanos_to_abs(uint64_t nanos)
{
static mach_timebase_info_data_t timebase = {};
if (timebase.numer == 0 || timebase.denom == 0) {
kern_return_t kr;
kr = mach_timebase_info(&timebase_info);
T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
timebase = timebase_info;
}
return nanos * timebase.denom / timebase.numer;
}
SCHED_POLICY_T_DECL(rt_migration_cluster_bound,
"Verify that cluster-bound realtime threads always choose the bound "
"cluster except when its derecommended")
{
int ret;
init_migration_harness(dual_die);
struct thread_group *tg = create_tg(0);
test_thread_t threads[dual_die.num_psets];
for (int i = 0; i < dual_die.num_psets; i++) {
threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
set_thread_cluster_bound(threads[i], i);
}
for (int i = 0; i < dual_die.num_psets; i++) {
set_current_processor(pset_id_to_cpu_id(i));
for (int j = 0; j < dual_die.num_psets; j++) {
ret = choose_pset_for_thread_expect(threads[j], j);
T_QUIET; T_EXPECT_TRUE(ret, "Expecting the bound cluster");
}
}
SCHED_POLICY_PASS("Cluster bound chooses bound cluster");
/* Derecommend the bound cluster */
for (int i = 0; i < dual_die.num_psets; i++) {
set_pset_derecommended(i);
int replacement_pset = -1;
for (int j = 0; j < dual_die.num_psets; j++) {
/* Find the first homogenous cluster and mark it as idle so we choose it */
if ((i != j) && (dual_die.psets[i].cpu_type == dual_die.psets[j].cpu_type)) {
replacement_pset = j;
break;
}
}
ret = choose_pset_for_thread_expect(threads[i], replacement_pset);
T_QUIET; T_EXPECT_TRUE(ret, "Expecting the idle pset when the bound cluster is derecommended");
/* Restore pset conditions */
set_pset_recommended(i);
}
SCHED_POLICY_PASS("Cluster binding is soft");
}
SCHED_POLICY_T_DECL(rt_choose_processor,
"Verify the realtime spill policy")
{
test_hw_topology_t topo = dual_die;
init_migration_harness(topo);
uint64_t start = mach_absolute_time();
const uint64_t period = 0;
const uint64_t computation = nanos_to_abs(5000000ULL); /* 5ms */
const uint64_t constraint = nanos_to_abs(10000000ULL); /* 10ms */
const bool preemptible = false;
const uint8_t priority_offset = 0;
struct thread_group *tg = create_tg(0);
thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
set_thread_sched_mode(thread, TH_MODE_REALTIME);
const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(10000000ULL /* 10ms */));
set_thread_realtime(thread, period, computation, constraint, preemptible, priority_offset, deadline);
test_thread_t earlier_threads[topo.total_cpus] = {};
for (int i = 0; i < topo.total_cpus; i++) {
earlier_threads[i] = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
set_thread_sched_mode(earlier_threads[i], TH_MODE_REALTIME);
const uint64_t early_deadline = rt_deadline_add(start, nanos_to_abs(5000000) /* 5ms */);
set_thread_realtime(earlier_threads[i], period, computation, constraint, preemptible, priority_offset, early_deadline);
}
test_thread_t later_thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
set_thread_sched_mode(later_thread, TH_MODE_REALTIME);
const uint64_t late_deadline = rt_deadline_add(start, nanos_to_abs(20000000ULL) /* 20ms */);
set_thread_realtime(later_thread, period, computation, constraint, preemptible, priority_offset, late_deadline);
for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id);
sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
/* Unloaded system. Expect to choose the preferred pset. */
choose_pset_for_thread_expect(thread, preferred_pset_id);
/*
* Load the preferred pset with earlier-deadline threads. Should cause
* the thread to spill (since the die has multiple clusters of each
* performance type).
*/
for (int i = 0; i < topo.psets[preferred_pset_id].num_cpus; i++) {
int cpu_id = pset_id_to_cpu_id(preferred_pset_id) + i;
cpu_set_thread_current(cpu_id, earlier_threads[i]);
}
int chosen = choose_pset_for_thread(thread);
T_QUIET; T_EXPECT_GE(chosen, 0, "chose a valid cluster");
T_QUIET; T_EXPECT_NE(chosen, preferred_pset_id, "chose an unloaded cluster");
T_QUIET; T_EXPECT_EQ(topo.psets[chosen].cpu_type, topo.psets[preferred_pset_id].cpu_type, "chose a pset of the same performance type");
/* Replace the first earlier-deadline thread with a later-deadline thread. Should cause the thread to preempt. */
cpu_set_thread_current(pset_id_to_cpu_id(preferred_pset_id), later_thread);
chosen = choose_pset_for_thread(thread);
T_QUIET; T_EXPECT_EQ(chosen, preferred_pset_id, "preempting later-deadline thread");
/* Load all psets of the same performance type with early-deadline threads. Expected preferred pset to be chosen. */
for (int i = 0; i < topo.num_psets; i++) {
if (topo.psets[i].cpu_type != topo.psets[preferred_pset_id].cpu_type) {
continue;
}
for (int j = 0; j < topo.psets[i].num_cpus; j++) {
int cpu_id = pset_id_to_cpu_id(i) + j;
cpu_set_thread_current(cpu_id, earlier_threads[cpu_id]);
}
}
choose_pset_for_thread_expect(thread, preferred_pset_id);
/* Clean up */
for (int i = 0; i < topo.total_cpus; i++) {
cpu_clear_thread_current(i);
}
sched_policy_pop_metadata(/* preferred_pset_id */);
}
SCHED_POLICY_PASS("sched_rt_choose_processor selects the right pset");
}
SCHED_POLICY_T_DECL(rt_spill_order, "Verify computed realtime spill orders.")
{
init_migration_harness(dual_die);
/* Test setup: reset all edges. */
for (uint src_id = 0; src_id < dual_die.num_psets; src_id++) {
for (uint dst_id = 0; dst_id < dual_die.num_psets; dst_id++) {
sched_rt_config_set(src_id, dst_id, (sched_clutch_edge) {});
}
}
/* First test: create edges from pset 5 to psets 0-3. */
for (unsigned i = 0; i < 4; i++) {
sched_rt_config_set(5, i, (sched_clutch_edge) {
.sce_migration_allowed = 1,
.sce_steal_allowed = 0,
.sce_migration_weight = i % 3 /* create ties to test die-locality */
});
}
/* Disallow spill from 5 to 4, despite being the same perf level. */
sched_rt_config_set(5, 4, (sched_clutch_edge) {
.sce_migration_allowed = 0,
.sce_steal_allowed = 0,
.sce_migration_weight = 0
});
rt_pset_recompute_spill_order(5);
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 0), 3, "spso_search_order[0] == 3");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 1), 0, "spso_search_order[1] == 0");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 2), 1, "spso_search_order[2] == 1");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 3), 2, "spso_search_order[3] == 2");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(5, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID");
/* Second test: create edges from 0 to psets 1, 2, 4, and 5. */
sched_rt_config_set(0, 1, (sched_clutch_edge) {
.sce_migration_allowed = 1,
.sce_steal_allowed = 0,
.sce_migration_weight = 2
});
sched_rt_config_set(0, 2, (sched_clutch_edge) {
.sce_migration_allowed = 1,
.sce_steal_allowed = 0,
.sce_migration_weight = 1
});
sched_rt_config_set(0, 4, (sched_clutch_edge) {
.sce_migration_allowed = 1,
.sce_steal_allowed = 0,
.sce_migration_weight = 0
});
sched_rt_config_set(0, 5, (sched_clutch_edge) {
.sce_migration_allowed = 1,
.sce_steal_allowed = 0,
.sce_migration_weight = 1
});
rt_pset_recompute_spill_order(0);
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 0), 4, "spso_search_order[0] == 4");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 1), 2, "spso_search_order[1] == 2");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 2), 5, "spso_search_order[2] == 5");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 3), 1, "spso_search_order[3] == 1");
T_QUIET; T_EXPECT_EQ(rt_pset_spill_search_order_at_offset(0, 4), PSET_ID_INVALID, "spso_search_order[4] == PSET_ID_INVALID");
SCHED_POLICY_PASS("Realtime spill orders are computed correctly.");
}
SCHED_POLICY_T_DECL(rt_thread_avoid_processor,
"Verify that thread_avoid_processor is correct for realtime threads")
{
int ret;
test_hw_topology_t topo = dual_die;
init_migration_harness(topo);
struct thread_group *tg = create_tg(0);
thread_t thread = create_thread(TH_BUCKET_FIXPRI, tg, BASEPRI_RTQUEUES);
/* Iterate conditions with different preferred psets and pset loads */
for (int preferred_pset_id = 0; preferred_pset_id < topo.num_psets; preferred_pset_id++) {
set_tg_sched_bucket_preferred_pset(tg, TH_BUCKET_FIXPRI, preferred_pset_id);
sched_policy_push_metadata("preferred_pset_id", preferred_pset_id);
/* Where the thread proactively wants to go */
int chosen_pset = choose_pset_for_thread(thread);
T_QUIET; T_EXPECT_EQ(preferred_pset_id, chosen_pset, "Thread should choose un-loaded preferred pset %s",
sched_policy_dump_metadata());
/* Thread generally should not avoid a processor in its chosen pset */
for (int c = 0; c < topo.psets[chosen_pset].num_cpus; c++) {
int avoid_cpu_id = pset_id_to_cpu_id(chosen_pset) + c;
sched_policy_push_metadata("avoid_cpu_id", avoid_cpu_id);
ret = thread_avoid_processor_expect(thread, avoid_cpu_id, false, false);
T_QUIET; T_EXPECT_TRUE(ret, "Thread should not want to leave processor in just chosen pset %s",
sched_policy_dump_metadata());
sched_policy_pop_metadata();
}
/* Thread should avoid processor if not allowed to run on the pset */
for (int c = 0; c < topo.total_cpus; c++) {
sched_clutch_edge edge = sched_rt_config_get(preferred_pset_id, cpu_id_to_pset_id(c));
if (cpu_id_to_pset_id(c) != preferred_pset_id && !(edge.sce_migration_allowed || edge.sce_steal_allowed)) {
sched_policy_push_metadata("avoid_non_preferred_cpu_id", c);
ret = thread_avoid_processor_expect(thread, c, false, true);
T_QUIET; T_EXPECT_TRUE(ret, "Thread should avoid processor in non-preferred pset to get to idle "
"preferred pset %s", sched_policy_dump_metadata());
sched_policy_pop_metadata();
}
}
sched_policy_pop_metadata();
}
SCHED_POLICY_PASS("thread_avoid_processor works for realtime threads");
}
static thread_t
create_realtime_thread_with_deadline(uint64_t deadline_nanos)
{
test_thread_t thread = create_thread(
TH_BUCKET_FIXPRI,
create_tg(0) /* realtime policies don't consider thread groups */,
BASEPRI_RTQUEUES);
set_thread_sched_mode(thread, TH_MODE_REALTIME);
set_thread_realtime(
thread,
0,
(uint32_t) nanos_to_abs(5000000ULL /* 5ms */),
(uint32_t) nanos_to_abs(10000000ULL /* 10ms */),
false,
0,
nanos_to_abs(deadline_nanos));
return thread;
}
static void
fill_all_cpus_with_realtime_threads(uint64_t deadline_nanos)
{
for (int i = 0; i < get_hw_topology().total_cpus; i++) {
cpu_set_thread_current(i, create_realtime_thread_with_deadline(deadline_nanos));
}
}
SCHED_POLICY_T_DECL(rt_choose_thread, "Verify realtime thread selection policy and mechanism")
{
int ret;
test_hw_topology_t topo = dual_die;
init_migration_harness(topo);
const uint64_t start = mach_absolute_time();
const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */
const uint64_t later_deadline = rt_deadline_add(start, nanos_to_abs(6000000)); /* start + 6ms */
fill_all_cpus_with_realtime_threads(later_deadline);
/* One of these threads will be on the stealing pset runqueue: */
test_thread_t later_deadline_thread = create_realtime_thread_with_deadline(later_deadline);
test_thread_t earlier_deadline_thread = create_realtime_thread_with_deadline(deadline);
/* And this thread will be on another runqueue: */
test_thread_t stealable_thread = create_realtime_thread_with_deadline(deadline);
/* Check that sched_rt_choose_thread obeys the steal policies configured by
* the realtime matrix. A pset should only steal if the thread's deadline
* is earlier than that of any thread on the pset's runqueue. */
for (uint stealing_pset_id = 0; stealing_pset_id < topo.num_psets; stealing_pset_id++) {
sched_policy_push_metadata("stealing_pset", stealing_pset_id);
for (uint off = 1; off < topo.num_psets; off++) {
uint other_pset_id = (stealing_pset_id + off) % topo.num_psets;
sched_policy_push_metadata("other_pset", other_pset_id);
enqueue_thread(pset_target(other_pset_id), stealable_thread);
enqueue_thread(pset_target(stealing_pset_id), earlier_deadline_thread);
ret = dequeue_thread_expect(pset_target(stealing_pset_id), earlier_deadline_thread);
T_QUIET; T_ASSERT_TRUE(ret, "when deadlines are equal, prefer thread from local runqueue %s", sched_policy_dump_metadata());
enqueue_thread(pset_target(stealing_pset_id), later_deadline_thread);
if (topo.psets[other_pset_id].cpu_type == topo.psets[stealing_pset_id].cpu_type) {
T_QUIET; T_ASSERT_TRUE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal allowed between psets of the same type %s", sched_policy_dump_metadata());
ret = dequeue_thread_expect(pset_target(stealing_pset_id), stealable_thread);
T_QUIET; T_ASSERT_TRUE(ret, "steal because the other pset has an earlier-deadline thread %s", sched_policy_dump_metadata());
ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread);
T_QUIET; T_ASSERT_TRUE(ret, "take thread from local runqueue because no earlier-deadline threads on other psets %s", sched_policy_dump_metadata());
} else {
T_QUIET; T_ASSERT_FALSE(sched_rt_config_get(other_pset_id, stealing_pset_id).sce_steal_allowed, "steal disallowed between psets of different types %s", sched_policy_dump_metadata());
ret = dequeue_thread_expect(pset_target(stealing_pset_id), later_deadline_thread);
T_QUIET; T_ASSERT_TRUE(ret, "take later-deadline thread because policy disallows steal %s", sched_policy_dump_metadata());
ret = dequeue_thread_expect(pset_target(other_pset_id), stealable_thread);
T_QUIET; T_ASSERT_TRUE(ret, "removed stealable thread %s", sched_policy_dump_metadata());
}
sched_policy_pop_metadata(/* other_pset */);
}
sched_policy_pop_metadata(/* stealing_pset */);
}
SCHED_POLICY_PASS("Verified realtime thread selection");
}
SCHED_POLICY_T_DECL(rt_followup_ipi, "Verify that followup IPIs are sent when there are stealable realtime threads and idle processors")
{
int ret;
test_hw_topology_t topo = dual_die;
init_migration_harness(topo);
const uint64_t start = mach_absolute_time();
const uint64_t deadline = rt_deadline_add(start, nanos_to_abs(5000000)); /* start + 5ms */
fill_all_cpus_with_realtime_threads(deadline);
/* This thread is used to load a runqueue. */
test_thread_t thread = create_realtime_thread_with_deadline(deadline);
for (int target_cpu = 0; target_cpu < topo.total_cpus; target_cpu++) {
sched_policy_push_metadata("target_cpu", target_cpu);
for (int idle_cpu = 0; idle_cpu < topo.total_cpus; idle_cpu++) {
if (target_cpu == idle_cpu) {
continue;
}
sched_policy_push_metadata("idle_cpu", idle_cpu);
enqueue_thread(cpu_target(target_cpu), thread);
test_thread_t saved_idle_thread = cpu_clear_thread_current(idle_cpu);
/* idle_cpu is now "idle," now simulate thread_select() on target_cpu: */
cpu_set_thread_current(target_cpu, cpu_clear_thread_current(target_cpu));
/* That should result in a deferred followup IPI, if spill is allowed between target_cpu and idle_cpu. */
if (topo.psets[cpu_id_to_pset_id(idle_cpu)].cpu_type == topo.psets[cpu_id_to_pset_id(target_cpu)].cpu_type) {
ret = ipi_expect(idle_cpu, TEST_IPI_DEFERRED);
T_QUIET; T_ASSERT_TRUE(ret, "should send a followup IPI %s", sched_policy_dump_metadata());
}
/* Clean up for the next iteration. */
ret = dequeue_thread_expect(cpu_target(target_cpu), thread);
T_QUIET; T_ASSERT_TRUE(ret, "cleaning up %s", sched_policy_dump_metadata());
cpu_set_thread_current(idle_cpu, saved_idle_thread);
sched_policy_pop_metadata(/* idle_cpu */);
}
sched_policy_pop_metadata(/* target_cpu */);
}
SCHED_POLICY_PASS("Realtime followup IPIs work");
}