#!/usr/local/bin/recon
local benchrun = require 'benchrun'
local csv = require 'csv'
local os = require 'os'
local perfdata = require 'perfdata'
local sysctl = require 'sysctl'
require 'strict'
local benchmark = benchrun.new({
name = 'xnu.zero_fill_fault_throughput',
version = 1,
arg = arg,
modify_argparser = function(parser)
parser:option{
name = '--cpu-workers',
description = 'Number of threads to bring up to do faulting work',
convert = tonumber,
argname = 'count',
}
parser:flag{
name = '--through-max-workers',
description = 'Run with [1..n] CPU workers',
}
parser:flag{
name = '--through-max-workers-fast',
description = 'Run with 1, 2, and each power of four value in [4..n] CPU workers',
}
parser:option{
name = '--path',
description = 'Path to fault throughput binary',
count = 1, -- This is a required option.
}
parser:option{
name = '--duration',
description = 'How long, in seconds, to run each iteration',
default = 30,
convert = tonumber,
argname = 'seconds',
}
parser:option{
name = '--variant',
description = 'Which benchmark variant to run',
choices = { 'separate-objects', 'share-objects' },
default = 'separate-objects',
argname = 'name',
}
parser:option{
name = '--first-cpu',
description = 'Pin threads to CPUs, starting with this CPU ID; requires enable_skstb=1 boot-arg',
default = -1,
convert = tonumber,
argname = 'cpu-id'
}
parser:flag{
name = '--verbose',
description = 'Enable verbose logging at a performance cost',
}
end,
})
local ncpus, _ = sysctl('hw.logicalcpu_max')
benchmark:assert(ncpus > 0, 'invalid number of logical CPUs')
local cpu_workers = benchmark.opt.cpu_workers or ncpus
benchmark:assert(cpu_workers > 0, 'invalid number of CPU workers')
benchmark:assert(benchmark.opt.first_cpu > -2, 'negative first CPU')
benchmark:assert(benchmark.opt.first_cpu < ncpus, 'invalid first CPU')
local page_throughput_unit = perfdata.unit.custom('pages/sec')
local test_threads = {}
if benchmark.opt.through_max_workers then
for i = 1, cpu_workers do
table.insert(test_threads, i)
end
elseif benchmark.opt.through_max_workers_fast then
local i = 1
while i <= cpu_workers do
table.insert(test_threads, i)
-- Always do a run with two threads to see what the first part of the
-- scaling curve looks like (and to measure perf on dual core systems).
if i == 1 and cpu_workers >= 2 then
table.insert(test_threads, i + 1)
end
i = i * 4
end
else
table.insert(test_threads, cpu_workers)
end
for _, thread_count in ipairs(test_threads) do
local cmd = {
benchmark.opt.path;
echo = true,
name = ('with %d CPU workers%s'):format(thread_count,
thread_count == 1 and '' or 's'),
}
if benchmark.opt.verbose then
cmd[#cmd + 1] = '-v'
end
cmd[#cmd + 1] = benchmark.opt.variant
cmd[#cmd + 1] = benchmark.opt.duration
cmd[#cmd + 1] = thread_count
if benchmark.opt.first_cpu ~= -1 then
cmd[#cmd + 1] = benchmark.opt.first_cpu
end
for out in benchmark:run(cmd) do
local result = out:match('-----Results-----\n(.*)')
benchmark:assert(result, 'unable to find result data in output')
local data = csv.openstring(result, { header = true })
for field in data:lines() do
for k, v in pairs(field) do
benchmark.writer:add_value(k, page_throughput_unit, tonumber(v), {
[perfdata.larger_better] = true,
threads = thread_count,
variant = benchmark.opt.variant
})
end
end
end
end
benchmark:finish()