#include <signal.h>
#include <spawn.h>
#include <stdlib.h>
#include <sys/sysctl.h>
#include <darwintest.h>
#include <dispatch/dispatch.h>
#include <mach-o/dyld.h>
/* internal */
#include <spawn_private.h>
#include <sys/coalition.h>
#include <sys/kern_memorystatus.h>
T_GLOBAL_META(
T_META_NAMESPACE("xnu.vm"),
T_META_RADAR_COMPONENT_NAME("xnu"),
T_META_RADAR_COMPONENT_VERSION("VM"));
#define NUM_PER_ROLE 3 /* Number of procs per role in coalition (besides leader) */
#define NUM_PROCS_IN_COALITION (NUM_PER_ROLE * (COALITION_NUM_TASKROLES - 1) + 1)
#define NUM_COALITIONS 3
#define COAL_ORDER_NUM_PIDS (NUM_PROCS_IN_COALITION + COALITION_NUM_TASKROLES - 1)
typedef struct {
pid_t pids[NUM_PROCS_IN_COALITION]; // An array of pids in this coalition. Owned by this struct.
pid_t expected_order[COAL_ORDER_NUM_PIDS]; // An array of pids in this coalition in proper sorted order.
uint64_t ids[COALITION_NUM_TYPES];
size_t leader_footprint;
} coalition_info_t;
/*
* Children pids spawned by this test that need to be cleaned up.
* Has to be a global because the T_ATEND API doesn't take any arguments.
*/
#define kMaxChildrenProcs NUM_PROCS_IN_COALITION * NUM_COALITIONS + 1
static pid_t children_pids[kMaxChildrenProcs];
static size_t num_children = 0;
/*
* Sets up a new coalition.
*/
static void init_coalition(coalition_info_t*, size_t leader_fp);
/*
* Places all procs in the coalition in the given band.
*/
static void place_coalition_in_band(const coalition_info_t *, int band);
/*
* Place the given proc in the given band.
*/
static void place_proc_in_band(pid_t pid, int band);
/*
* Cleans up any children processes.
*/
static void cleanup_children(void);
/*
* Check if we're on a kernel where we can test coalitions.
*/
static bool has_unrestrict_coalitions(void);
/*
* Unrestrict coalition syscalls.
*/
static void unrestrict_coalitions(void);
/*
* Restrict coalition syscalls
*/
static void restrict_coalitions(void);
/*
* Allocate the requested number of pages and fault them in.
* Used to achieve a desired footprint.
*/
static void *allocate_pages(int);
/*
* Get the vm page size.
*/
static int get_vmpage_size(void);
/*
* Launch a proc with a role in a coalition.
* If coalition_ids is NULL, skip adding the proc to the coalition.
*/
static pid_t
launch_proc_in_coalition(uint64_t *coalition_ids, int role, int num_pages);
static void
bufprint(char **buf, size_t *size, const char *fmt, ...)
{
va_list list;
int n_written;
va_start(list, fmt);
n_written = vsnprintf(*buf, *size, fmt, list);
va_end(list);
if (n_written > 0) {
*buf += n_written;
*size -= n_written;
}
}
static char *
pids_str(pid_t *pids, int n_pids)
{
int i;
size_t buf_len = n_pids * 8 + 2; /* For good measure */
char *buf = malloc(buf_len);
char *obuf = buf;
bufprint(&buf, &buf_len, "(");
for (i = 0; (i < n_pids) && (buf_len > 0); i++) {
if (pids[i] == -1) {
bufprint(&buf, &buf_len, "), (");
} else {
bool is_last = (i == (n_pids - 1)) || (pids[i + 1] == -1);
bufprint(&buf, &buf_len, "%d%s", pids[i], is_last ? "" : ", ");
}
}
bufprint(&buf, &buf_len, ")");
return obuf;
}
/*
* Sorts the given jetsam band with the desired order and verifies that the
* sort was done correctly.
* `expected_order` is an array of groups of PIDs separated by `-1`, where PIDs
* in each group are re-orderable. For instance, for the expected order:
* [1, 2, -1, 3, -1, 4]
* the orderings of
* 1, 2, 3, 4 and 2, 1, 3, 4 are both valid since 1 and 2 are in the same group.
*/
static void
sort_and_verify(
unsigned int prio,
memorystatus_jetsam_sort_order_t order,
pid_t *expected_order,
size_t expected_order_len)
{
size_t i, j, n_pids, group_idx;
bool in_order;
pid_t *actual_order;
pid_t *original_expected_order;
/* Bigger than we need it, but that's fine */
actual_order = malloc(sizeof(pid_t) * expected_order_len);
/* Make a copy of expected_order since we'll be overwriting it */
original_expected_order = malloc(sizeof(pid_t) * expected_order_len);
memcpy(original_expected_order, expected_order, sizeof(pid_t) * expected_order_len);
/*
* Add only the actual pids from expected_order in to tell memorystatus which
* PIDs we care about
*/
n_pids = 0;
for (i = 0; i < expected_order_len; i++) {
if (expected_order[i] != -1) {
actual_order[n_pids] = expected_order[i];
n_pids++;
}
}
int ret = memorystatus_control(MEMORYSTATUS_CMD_TEST_JETSAM_SORT, prio, order,
actual_order, n_pids * sizeof(pid_t));
T_QUIET; T_EXPECT_POSIX_SUCCESS(ret, "Band sorted and order copied out");
/* Check that the order we got was what we expected */
group_idx = 0; /* idx of pid that starts current reorderable group */
for (i = 0; i < n_pids; i++) {
/*
* Check if the current pid in actual_order is in the current group.
* If not, advance to the next group until we find it. This is essentially
* a ratcheting mechanism - we can move our search group forwards, but not
* backwards.
*/
for (j = group_idx; j < expected_order_len; j++) {
if (expected_order[j] == -1) {
/* Made it to the end of a group w/o finding the pid */
group_idx = j + 1;
continue;
} else if (expected_order[j] == actual_order[i]) {
/* Found our pid. Mark it found */
expected_order[j] = 0;
break;
}
}
}
/* Check that all pids were actually found */
in_order = true;
for (i = 0; i < expected_order_len; i++) {
if ((expected_order[i] != -1) && (expected_order[i] != 0)) {
in_order = false;
break;
}
}
T_EXPECT_TRUE(in_order, "Band in correct order when sorted in order (%d)", order);
if (!in_order) {
char *exp_str = pids_str(original_expected_order, expected_order_len);
char *actual_str = pids_str(actual_order, n_pids);
T_LOG("Out of order! Expected:\n%s\nbut got\n%s\n", exp_str, actual_str);
free(exp_str);
free(actual_str);
}
free(actual_order);
free(original_expected_order);
}
/*
* Background process that will munch some memory, signal its parent, and
* then sit in a loop.
*/
T_HELPER_DECL(coalition_member, "Mock coalition member") {
int num_pages = 0;
if (argc == 1) {
num_pages = atoi(argv[0]);
}
allocate_pages(num_pages);
if (num_pages) {
printf("%d has %d\n", getpid(), num_pages);
}
// Signal to the parent that we've touched all of our pages.
if (kill(getppid(), SIGUSR1) != 0) {
T_LOG("Unable to signal to parent process!");
exit(1);
}
while (true) {
sleep(100);
}
}
static void
random_order(int *arr, int size)
{
int i, a, b, s;
for (i = 0; i < size; i++) {
arr[i] = i;
}
for (i = 0; i < size; i++) {
a = rand() % size;
b = rand() % size;
s = arr[a];
arr[a] = arr[b];
arr[b] = s;
}
}
static void
add_coalition_to_order(pid_t *order, coalition_info_t *coal, int coal_idx)
{
int order_idx = coal_idx * (COAL_ORDER_NUM_PIDS + 1);
memcpy(&order[order_idx], &coal->expected_order, sizeof(coal->expected_order));
if (coal_idx != 0) {
order[order_idx - 1] = -1;
}
}
/*
* Test that sorting the fg bucket in coalition order works properly.
* Spawns children in the same coalition in the fg band. Each child
* has a different coalition role. Verifies that the coalition
* is sorted properly by role.
*/
#define COALS_EXPECTED_ORDER_LEN ((COAL_ORDER_NUM_PIDS * NUM_COALITIONS) + (NUM_COALITIONS - 1))
T_DECL(memorystatus_sort_coalitions_footprint, "Sort coalitions by leader footprint",
T_META_ASROOT(true),
T_META_TAG_VM_PREFERRED)
{
int i;
coalition_info_t *coalitions;
int coalition_order[NUM_COALITIONS];
pid_t *expected_order; /* Expected order of all pids in all coalitions */
if (!has_unrestrict_coalitions()) {
T_SKIP("Unable to test coalitions on this kernel.");
}
unrestrict_coalitions();
T_ATEND(cleanup_children);
T_ATEND(restrict_coalitions);
/* Initialize our coalitions */
coalitions = malloc(sizeof(coalition_info_t) * NUM_COALITIONS);
expected_order = malloc(sizeof(pid_t) * COALS_EXPECTED_ORDER_LEN);
/* Spawn the coalitions in random order */
random_order(coalition_order, NUM_COALITIONS);
/* Spawn coalitions, each with a different leader footprint */
for (i = 0; i < NUM_COALITIONS; i++) {
int coal = coalition_order[i];
init_coalition(&coalitions[coal], (NUM_COALITIONS - coal) * 50);
add_coalition_to_order(expected_order, &coalitions[coal], coal);
place_coalition_in_band(&coalitions[coal], JETSAM_PRIORITY_FOREGROUND);
}
/* Sort by leader footprint and verify coalitions are sorted by leader footprint */
sort_and_verify(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_FOOTPRINT, expected_order, COALS_EXPECTED_ORDER_LEN);
free(coalitions);
free(expected_order);
}
T_DECL(memorystatus_sort_coalitions_lru, "Sort coalitions by leader LRU",
T_META_ASROOT(true),
T_META_TAG_VM_PREFERRED)
{
int i;
coalition_info_t *coalitions;
int coalition_order[NUM_COALITIONS];
pid_t *expected_order; /* Expected order of all pids in all coalitions */
if (!has_unrestrict_coalitions()) {
T_SKIP("Unable to test coalitions on this kernel.");
}
unrestrict_coalitions();
T_ATEND(cleanup_children);
T_ATEND(restrict_coalitions);
/* Initialize our coalitions */
coalitions = malloc(sizeof(coalition_info_t) * NUM_COALITIONS);
expected_order = malloc(sizeof(pid_t) * COALS_EXPECTED_ORDER_LEN);
/* Spawn coalitions */
for (i = 0; i < NUM_COALITIONS; i++) {
init_coalition(&coalitions[i], 0);
}
/* Add coalitions to foreground in random order*/
random_order(coalition_order, NUM_COALITIONS);
for (i = 0; i < NUM_COALITIONS; i++) {
int coal = coalition_order[i];
place_coalition_in_band(&coalitions[coal], JETSAM_PRIORITY_FOREGROUND);
add_coalition_to_order(expected_order, &coalitions[coal], i);
}
/* Sort by leader LRU and verify coalitions are sorted by leader LRU */
sort_and_verify(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_LRU, expected_order, COALS_EXPECTED_ORDER_LEN);
free(coalitions);
free(expected_order);
}
/*
* Test that sorting the idle bucket in footprint order works properly.
*
* Spawns some children with very different footprints in the idle band,
* and then ensures that they get sorted properly.
*/
T_DECL(memorystatus_sort_footprint, "Footprint sort order",
T_META_ASROOT(true), T_META_TAG_VM_PREFERRED) {
#define kNumChildren 3
static const int kChildrenFootprints[kNumChildren] = {500, 0, 2500};
/*
* The expected sort order of the children in the order that they were launched.
* Used to construct the expected_order pid array.
* Note that procs should be sorted in descending footprint order.
*/
static const int kExpectedOrder[kNumChildren] = {2, 0, 1};
static const int kJetsamBand = JETSAM_PRIORITY_BACKGROUND;
__block pid_t pid;
sig_t res;
dispatch_source_t ds_allocated;
T_ATEND(cleanup_children);
// After we spawn the children, they'll signal that they've touched their pages.
res = signal(SIGUSR1, SIG_IGN);
T_WITH_ERRNO; T_ASSERT_NE(res, SIG_ERR, "SIG_IGN SIGUSR1");
ds_allocated = dispatch_source_create(DISPATCH_SOURCE_TYPE_SIGNAL, SIGUSR1, 0, dispatch_get_main_queue());
T_QUIET; T_ASSERT_NOTNULL(ds_allocated, "dispatch_source_create (ds_allocated)");
dispatch_source_set_event_handler(ds_allocated, ^{
if (num_children < kNumChildren) {
pid = launch_proc_in_coalition(NULL, 0, kChildrenFootprints[num_children]);
place_proc_in_band(pid, kJetsamBand);
} else {
pid_t expected_order[kNumChildren] = {0};
for (int i = 0; i < kNumChildren; i++) {
expected_order[i] = children_pids[kExpectedOrder[i]];
}
sort_and_verify(kJetsamBand, JETSAM_SORT_FOOTPRINT_NOCOAL, expected_order, kNumChildren);
T_END;
}
});
dispatch_activate(ds_allocated);
pid = launch_proc_in_coalition(NULL, 0, kChildrenFootprints[num_children]);
place_proc_in_band(pid, kJetsamBand);
dispatch_main();
#undef kNumChildren
}
static pid_t
launch_proc_in_coalition(uint64_t *coalition_ids, int role, int num_pages)
{
int ret;
posix_spawnattr_t attr;
pid_t pid;
char testpath[PATH_MAX];
uint32_t testpath_buf_size = PATH_MAX;
char num_pages_str[32] = {0};
char *argv[5] = {testpath, "-n", "coalition_member", num_pages_str, NULL};
extern char **environ;
T_QUIET; T_ASSERT_LT(num_children + 1, (size_t) kMaxChildrenProcs, "Don't create too many children.");
ret = posix_spawnattr_init(&attr);
T_QUIET; T_ASSERT_POSIX_ZERO(ret, "posix_spawnattr_init");
if (coalition_ids != NULL) {
for (int i = 0; i < COALITION_NUM_TYPES; i++) {
ret = posix_spawnattr_setcoalition_np(&attr, coalition_ids[i], i, role);
T_QUIET; T_ASSERT_POSIX_ZERO(ret, "posix_spawnattr_setcoalition_np");
}
}
ret = snprintf(num_pages_str, sizeof(num_pages_str), "%d", num_pages);
T_QUIET; T_ASSERT_LE((size_t) ret, sizeof(num_pages_str), "Don't allocate too many pages.");
ret = _NSGetExecutablePath(testpath, &testpath_buf_size);
T_QUIET; T_ASSERT_EQ(ret, 0, "_NSGetExecutablePath");
ret = posix_spawn(&pid, argv[0], NULL, &attr, argv, environ);
T_QUIET; T_ASSERT_POSIX_ZERO(ret, "posix_spawn");
ret = posix_spawnattr_destroy(&attr);
T_QUIET; T_ASSERT_POSIX_ZERO(ret, "posix_spawnattr_destroy");
children_pids[num_children++] = pid;
return pid;
}
static void
init_coalition(coalition_info_t *coalition, size_t leader_fp)
{
/* This code will need updating if we add a role */
static_assert(COALITION_NUM_TASKROLES == 4);
sigset_t set;
int ret, i, sig;
uint32_t flags = 0;
memset(coalition, 0, sizeof(coalition_info_t));
for (int i = 0; i < COALITION_NUM_TYPES; i++) {
COALITION_CREATE_FLAGS_SET_TYPE(flags, i);
ret = coalition_create(&coalition->ids[i], flags);
T_QUIET; T_ASSERT_POSIX_ZERO(ret, "coalition_create");
}
sigemptyset(&set);
ret = sigaddset(&set, SIGUSR1);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sigaddset(SIGUSR1)");
coalition->leader_footprint = leader_fp;
/*
* Spawn procs for each coalition role, and construct the expected
* sorted order.
*/
int n_roles[COALITION_NUM_TASKROLES] = {0};
int role_order_idx[COALITION_NUM_TASKROLES] = {
/* COALITION_TASKROLE_UNDEF */ 0,
/* COALITION_TASKROLE_LEADER */ (NUM_PER_ROLE + 1) * 3,
/* COALITION_TASKROLE_XPC */ (NUM_PER_ROLE + 1) * 2,
/* COALITION_TASKROLE_EXT */ NUM_PER_ROLE + 1
};
for (i = 1; i < COALITION_NUM_TASKROLES; i++) {
coalition->expected_order[role_order_idx[i] - 1] = -1;
}
for (size_t i = 0; i < NUM_PROCS_IN_COALITION; i++) {
int role;
size_t pages = 0;
while (true) {
role = rand() % COALITION_NUM_TASKROLES;
if ((role == COALITION_TASKROLE_LEADER) && n_roles[role]) {
continue; /* Already have a leader */
} else if (n_roles[role] == NUM_PER_ROLE) {
continue; /* Already have all of this role */
}
n_roles[role]++;
break;
}
if (role == COALITION_TASKROLE_LEADER) {
pages = leader_fp;
}
pid_t pid = launch_proc_in_coalition(coalition->ids, role, pages);
ret = sigwait(&set, &sig);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "sigwait");
T_QUIET; T_ASSERT_EQ(sig, SIGUSR1, "sigwait == SIGUSR1");
coalition->pids[i] = pid;
coalition->expected_order[role_order_idx[role]] = pid;
role_order_idx[role]++;
}
}
static void
place_proc_in_band(pid_t pid, int band)
{
memorystatus_priority_properties_t props = {0};
int ret;
props.priority = band;
props.user_data = 0;
ret = memorystatus_control(MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES, pid, 0, &props, sizeof(props));
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "move proc to band");
}
static void
place_coalition_in_band(const coalition_info_t *coalition, int band)
{
for (size_t i = 0; i < NUM_PROCS_IN_COALITION; i++) {
pid_t curr = coalition->pids[i];
place_proc_in_band(curr, band);
}
}
static void
cleanup_children(void)
{
int ret, status;
for (size_t i = 0; i < num_children; i++) {
pid_t exited_pid = 0;
pid_t curr = children_pids[i];
ret = kill(curr, SIGKILL);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kill");
while (exited_pid == 0) {
exited_pid = waitpid(curr, &status, 0);
}
T_QUIET; T_ASSERT_POSIX_SUCCESS(exited_pid, "waitpid");
T_QUIET; T_ASSERT_TRUE(WIFSIGNALED(status), "proc was signaled.");
T_QUIET; T_ASSERT_EQ(WTERMSIG(status), SIGKILL, "proc was killed");
}
}
static bool
has_unrestrict_coalitions()
{
int ret, val;
size_t val_sz;
val = 0;
val_sz = sizeof(val);
ret = sysctlbyname("kern.unrestrict_coalitions", &val, &val_sz, NULL, 0);
return ret >= 0;
}
static void
unrestrict_coalitions()
{
int ret, val = 1;
size_t val_sz;
val_sz = sizeof(val);
ret = sysctlbyname("kern.unrestrict_coalitions", NULL, 0, &val, val_sz);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.unrestrict_coalitions <- 1");
}
static void
restrict_coalitions()
{
int ret, val = 0;
size_t val_sz;
val_sz = sizeof(val);
ret = sysctlbyname("kern.unrestrict_coalitions", NULL, 0, &val, val_sz);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "kern.unrestrict_coalitions <- 0");
}
static void *
allocate_pages(int num_pages)
{
int page_size, i;
unsigned char *buf;
page_size = get_vmpage_size();
buf = malloc((unsigned long)(num_pages * page_size));
for (i = 0; i < num_pages; i++) {
((volatile unsigned char *)buf)[i * page_size] = 1;
}
return buf;
}
static int
get_vmpage_size()
{
int vmpage_size;
size_t size = sizeof(vmpage_size);
int ret = sysctlbyname("vm.pagesize", &vmpage_size, &size, NULL, 0);
T_QUIET; T_ASSERT_POSIX_SUCCESS(ret, "failed to query vm.pagesize");
T_QUIET; T_ASSERT_GT(vmpage_size, 0, "vm.pagesize is not > 0");
return vmpage_size;
}