This is xnu-11215.1.10. See this file in:
// clang++ -o kt-dump{,.cpp} -Wall -std=c++20
#include <assert.h>
#include <cstdio>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <mach-o/fat.h>
#include <mach-o/loader.h>
#include <optional>
#include <set>
#include <span>
#include <vector>
#include <removefile.h>
#include <unistd.h>
#include <spawn.h>
#include <fcntl.h>
#include <uuid/uuid.h>
/*
* kt-dump.cpp
*
* Tool to dump the kalloc type information from a given Mach-O binary.
* Usage:
* kt-dump [-f <simple|json|struct|stats>] <mach-o>
*
* The tool will scan the given Mach-O to find the __kalloc_type section.
* It will then walk that section using the kalloc_type_view definition
* provided below, in order to dump the type names and signatures that
* have been compiled into the binary.
*
* The output "format" can be specified with the -f option. The default
* format ("simple") will output the type name and the signature,
* enclosed in square brackets. The "json" format will print a JSON
* dictionary for each kalloc_type_view entry, including the type name,
* its size and the signature. The "struct" output format will use
* __builtin_dump_struct to dump a C-like representation of the view.
* Finally, if the "stats" output format is chosen, the tool will only
* show overall information about the __kalloc_type section.
*
* The tool supports both MH_KEXT_BUNDLE and kernel cache files. If a
* FAT Mach-O is provided, it must contain an arm64 slice.
*/
/* Note: these must be kept in sync with the defs in kalloc.h/zalloc.h */
__options_decl(kalloc_type_flags_t, uint32_t, {
KT_DEFAULT = 0x0001,
KT_PRIV_ACCT = 0x0002,
KT_SHARED_ACCT = 0x0004,
KT_DATA_ONLY = 0x0008,
KT_VM = 0x0010,
KT_CHANGED = 0x0020,
KT_CHANGED2 = 0x0040,
KT_PTR_ARRAY = 0x0080,
KT_NOSHARED = 0x2000,
KT_SLID = 0x4000,
KT_PROCESSED = 0x8000,
KT_HASH = 0xffff0000,
});
__options_decl(kalloc_type_version_t, uint16_t, {
KT_V1 = 0x0001,
});
/* fixme we need to recognize Intel for which this is 20*/
#define KHEAP_NUM_ZONES 22
struct zone_view {
void *zv_zone;
void *zv_stats;
const char *zv_name;
void *zv_next;
};
struct kalloc_type_view {
struct zone_view kt_zv;
const char *kt_signature;
kalloc_type_flags_t kt_flags;
uint32_t kt_size;
struct zone *kt_zshared;
struct zone *kt_zsig;
};
struct kalloc_type_var_view {
kalloc_type_version_t kt_version;
uint16_t kt_size_hdr;
/*
* Temporary: Needs to be 32bits cause we have many structs that use
* IONew/Delete that are larger than 32K.
*/
uint32_t kt_size_type;
struct zone_stats *kt_stats;
const char *kt_name;
struct zone_view *kt_next;
uint16_t kt_heap_start;
uint8_t kt_zones[KHEAP_NUM_ZONES];
const char *kt_sig_hdr;
const char *kt_sig_type;
kalloc_type_flags_t kt_flags;
};
template <typename T> struct macho_section {
section_64 section;
std::span<const T> contents;
macho_section(const section_64 &sec, std::span<uint8_t> data)
: section(sec),
contents(reinterpret_cast<T *>(
data.subspan(sec.offset, sec.size / sizeof(T)).data()),
sec.size / sizeof(T))
{
}
size_t
elem_size() const
{
return sizeof(T);
}
size_t
elem_count() const
{
return section.size / elem_size();
}
};
int
printf_with_indent(const char *indent, const char *format, ...)
{
int n = 0;
va_list ap;
if (*indent) {
std::cout << indent;
n += strlen(indent);
}
va_start(ap, format);
n += vprintf(format, ap);
va_end(ap);
return n;
}
static inline const char *
decode_string(const macho_section<char> &sec_cstring, const char *string)
{
/*
* Compute the offsets into the __cstring section.
* This works for both single kexts (MH_KEXT_BUNDLE) and kernel caches.
* For the former, the __cstring section addr is the offset of the section
* into the slice. For the latter, the __cstring section addr is the virtual
* address of the section, and the fields are pointers into such space.
*/
if (string) {
uintptr_t string_p = reinterpret_cast<uintptr_t>(string);
uint32_t string_off = (uint32_t)string_p;
return &sec_cstring.contents[string_off - sec_cstring.section.offset];
}
return nullptr;
}
static enum class out_fmt_type {
SIMPLE,
JSON,
STRUCT,
STATS
} out_fmt = out_fmt_type::SIMPLE;
class image {
const std::span<uint8_t> slice_contents;
size_t slice_mh_offs;
std::optional<macho_section<kalloc_type_view> > sec_types;
std::optional<macho_section<kalloc_type_var_view> > sec_types_var;
std::optional<macho_section<char> > sec_cstring;
uuid_t img_uuid;
std::set<std::pair<const char *, const char *> > dedup_entries;
std::set<std::tuple<const char *, const char *, const char *> > dedup_entries_var;
std::set<const char *> dedup_strings;
struct {
size_t uniq_structs = 0;
size_t uniq_structs_var = 0;
size_t names_sz = 0;
size_t sig_sz = 0;
} stats;
void
dump_types(const char *indent)
{
const char *sep = "\n";
if (out_fmt == out_fmt_type::JSON) {
std::cout << ",\n" << indent << " \"fixed\": [";
}
for (auto &ktv : sec_types->contents) {
const char *name = decode_string(*sec_cstring, ktv.kt_zv.zv_name);
const char *sig = decode_string(*sec_cstring, ktv.kt_signature);
/* Only output the equal entries (same name/signature) once */
if (!dedup_entries.insert(std::make_tuple(name, sig)).second) {
continue;
}
if (ktv.kt_flags & KT_DATA_ONLY) {
sig = "data";
}
if (dedup_strings.insert(name).second) {
stats.names_sz += strlen(name) + 1;
}
if (dedup_strings.insert(sig).second) {
stats.sig_sz += strlen(sig) + 1;
}
stats.uniq_structs++;
if (out_fmt != out_fmt_type::STRUCT) {
name += strlen("site.");
}
switch (out_fmt) {
case out_fmt_type::SIMPLE:
std::cout << indent << name << " [" << sig << "]\n";
break;
case out_fmt_type::JSON:
std::cout << sep << indent
<< " { \"name\": \"" << name << "\", "
<< "\"size\": " << ktv.kt_size << ", "
<< "\"sig\": \"" << sig << '"'
<< " }";
sep = ",\n";
break;
case out_fmt_type::STRUCT: {
/* Make a copy and fill in the pointers to the cstring section */
kalloc_type_view printable_view = ktv;
printable_view.kt_zv.zv_name = name;
printable_view.kt_signature = sig;
__builtin_dump_struct(&printable_view, &printf_with_indent, indent);
} break;
case out_fmt_type::STATS:
break;
}
}
if (out_fmt == out_fmt_type::JSON) {
std::cout << std::endl << indent << " ]";
}
}
void
dump_types_var(const char *indent)
{
const char *sep = "\n";
if (out_fmt == out_fmt_type::JSON) {
std::cout << ",\n" << indent << " \"var\": [";
}
for (auto &ktv : sec_types_var->contents) {
const char *name = decode_string(*sec_cstring, ktv.kt_name);
const char *sig_hdr = decode_string(*sec_cstring, ktv.kt_sig_hdr);
const char *sig_type = decode_string(*sec_cstring, ktv.kt_sig_type);
/* Only output the equal entries (same name/signature) once */
if (!dedup_entries_var.insert(std::make_tuple(name, sig_hdr, sig_type)).second) {
continue;
}
if (dedup_strings.insert(name).second) {
stats.names_sz += strlen(name) + 1;
}
if (sig_hdr && dedup_strings.insert(sig_hdr).second) {
stats.sig_sz += strlen(sig_hdr) + 1;
}
if (dedup_strings.insert(sig_type).second) {
stats.sig_sz += strlen(sig_type) + 1;
}
if (ktv.kt_flags & KT_DATA_ONLY) {
sig_type = "data";
if (ktv.kt_size_hdr) {
sig_hdr = "data";
}
}
stats.uniq_structs_var++;
if (out_fmt != out_fmt_type::STRUCT) {
name += strlen("site.");
}
switch (out_fmt) {
case out_fmt_type::SIMPLE:
if (sig_hdr) {
std::cout << indent << name
<< " [" << sig_hdr << ", " << sig_type << "]\n";
} else {
std::cout << indent << name
<< " [, " << sig_type << "]\n";
}
break;
case out_fmt_type::JSON:
std::cout << sep << indent
<< " { \"name\": \"" << name << "\", ";
if (sig_hdr) {
std::cout << "\"size_hdr\": " << ktv.kt_size_hdr << ", "
<< "\"sig_hdr\": \"" << sig_hdr << "\", ";
}
std::cout << "\"size_type\": " << ktv.kt_size_type << ", "
<< "\"sig_type\": \"" << sig_type << '"'
<< " }";
sep = ",\n";
break;
case out_fmt_type::STRUCT: {
/* Make a copy and fill in the pointers to the cstring section */
kalloc_type_var_view printable_view = ktv;
printable_view.kt_name = name;
printable_view.kt_sig_hdr = sig_hdr;
printable_view.kt_sig_type = sig_type;
__builtin_dump_struct(&printable_view, &printf_with_indent, indent);
} break;
case out_fmt_type::STATS:
break;
}
}
if (out_fmt == out_fmt_type::JSON) {
std::cout << std::endl << indent << " ]";
}
}
const mach_header_64 *
mh_hdr() const
{
return reinterpret_cast<const mach_header_64 *>(slice_contents.data() + slice_mh_offs);
}
public:
image(std::span<uint8_t> contents, size_t mh_offs = 0)
: slice_contents{contents}, slice_mh_offs{mh_offs}
{
auto *hdr = mh_hdr();
std::span<uint8_t> commands = contents.subspan(mh_offs + sizeof(*hdr));
assert(hdr->magic == MH_MAGIC_64);
for (size_t i = 0; i < hdr->ncmds; i++) {
auto *cmd = reinterpret_cast<const load_command *>(commands.data());
commands = commands.subspan(cmd->cmdsize);
switch (cmd->cmd) {
case LC_SEGMENT_64:
break;
case LC_UUID:
uuid_copy(img_uuid, reinterpret_cast<const uuid_command *>(cmd)->uuid);
continue;
default:
continue;
}
auto *seg_cmd = reinterpret_cast<const segment_command_64 *>(cmd);
const std::span<section_64> sections((section_64 *)(seg_cmd + 1), seg_cmd->nsects);
for (auto &sec : sections) {
std::string_view segname(sec.segname);
std::string_view sectname(sec.sectname);
if (sectname == "__kalloc_type") {
assert(!sec_types && "Multiple __kalloc_type sections?");
sec_types = macho_section<kalloc_type_view>(sec, slice_contents);
assert(sec.size % sec_types->elem_size() == 0 &&
"Check the definition of kalloc_type_view");
} else if (sectname == "__kalloc_var") {
assert(!sec_types_var && "Multiple __kalloc_var sections?");
sec_types_var = macho_section<kalloc_type_var_view>(sec, slice_contents);
assert(sec.size % sec_types_var->elem_size() == 0 &&
"Check the definition of kalloc_type_var_view");
} else if (segname == "__TEXT" && sectname == "__cstring") {
assert(!sec_cstring && "Multiple __kalloc_var sections?");
sec_cstring = macho_section<char>(sec, slice_contents);
}
}
}
}
~image() = default;
std::string
uuid() const
{
uuid_string_t to_str;
uuid_unparse_upper(img_uuid, to_str);
return std::string{to_str};
}
const char *
slice() const
{
auto *hdr = mh_hdr();
cpu_type_t cpu;
cpu_subtype_t sub;
if (hdr->magic == MH_CIGAM_64) {
cpu = OSSwapInt32(hdr->cputype);
sub = OSSwapInt32(hdr->cpusubtype & CPU_SUBTYPE_MASK);
} else {
cpu = hdr->cputype;
sub = hdr->cpusubtype & OSSwapInt32(CPU_SUBTYPE_MASK);
}
if (cpu == CPU_TYPE_ARM64) {
if (sub == CPU_SUBTYPE_ARM64E) {
return "arm64e";
}
return "arm64";
}
/* other slices unsupported for now */
return nullptr;
}
void
dump(const std::string &imgname, const char *indent = "")
{
if (out_fmt == out_fmt_type::JSON) {
std::cout << indent << "{\n"
<< indent << " \"image\": \"" << imgname << "\",\n"
<< indent << " \"slice\": \"" << slice() << "\",\n"
<< indent << " \"uuid\": \"" << uuid() << '"';
} else {
std::cout << imgname << " (" << slice() << ", " << uuid() << ")\n";
}
if (sec_types) {
dump_types(indent);
}
if (sec_types_var) {
dump_types_var(indent);
}
if (out_fmt == out_fmt_type::JSON) {
std::cout << std::endl << indent << "}";
}
if (out_fmt == out_fmt_type::STATS) {
if (auto &sec = *sec_types; sec_types) {
auto ucount = stats.uniq_structs;
auto usize = ucount * sec.elem_size();
std::cout << indent << "__kalloc_type: " << std::endl;
std::cout << indent << " total structs: " << sec.elem_count() << std::endl;
std::cout << indent << " unique structs: " << ucount << std::endl;
std::cout << indent << " total size: " << sec.section.size << std::endl;
std::cout << indent << " unique size: " << usize << std::endl;
}
if (auto &sec = *sec_types_var; sec_types_var) {
auto ucount = stats.uniq_structs_var;
auto usize = ucount * sec.elem_size();
std::cout << indent << "__kalloc_var: " << std::endl;
std::cout << indent << " total structs: " << sec.elem_count() << std::endl;
std::cout << indent << " unique structs: " << ucount << std::endl;
std::cout << indent << " total size: " << sec.section.size << std::endl;
std::cout << indent << " unique size: " << usize << std::endl;
}
std::cout << indent << "names strings: " << stats.names_sz << std::endl;
std::cout << indent << "signatures strings: " << stats.sig_sz << std::endl;
}
stats = {};
dedup_entries.clear();
dedup_entries_var.clear();
dedup_strings.clear();
}
};
static int
do_simple_macho(const std::string filename, std::span<uint8_t> contents)
{
image img{contents};
img.dump(filename);
return 0;
}
static int
do_fat_macho(const std::string filename, std::span<uint8_t> contents)
{
fat_header *fhdr = reinterpret_cast<fat_header *>(contents.data());
std::span<fat_arch> fat_archs(
reinterpret_cast<fat_arch *>(&contents[sizeof(fat_header)]),
OSSwapInt32(fhdr->nfat_arch));
const char *sep = "\n";
if (out_fmt == out_fmt_type::JSON) {
std::cout << "[";
}
for (auto &arch : fat_archs) {
image img{contents.subspan(OSSwapInt32(arch.offset), OSSwapInt32(arch.size))};
if (out_fmt == out_fmt_type::JSON) {
std::cout << sep;
} else {
std::cout << std::endl;
}
img.dump(filename, " ");
sep = ",\n";
}
if (out_fmt == out_fmt_type::JSON) {
std::cout << "\n]";
}
return 0;
}
static int
do_fileset(std::span<uint8_t> contents)
{
auto *hdr = reinterpret_cast<const mach_header_64 *>(contents.data());
std::span<uint8_t> commands = contents.subspan(sizeof(*hdr));
const char *sep = "\n";
if (hdr->cputype != CPU_TYPE_ARM64) {
std::cerr << "unsupported cpu type";
return 1;
}
if (out_fmt == out_fmt_type::JSON) {
std::cout << "[";
}
for (size_t i = 0; i < hdr->ncmds; i++) {
auto *cmd = reinterpret_cast<const segment_command_64 *>(commands.data());
commands = commands.subspan(cmd->cmdsize);
if (cmd->cmd != LC_FILESET_ENTRY) {
continue;
}
auto *fec = reinterpret_cast<const fileset_entry_command *>(cmd);
const char *name = reinterpret_cast<const char *>(cmd) + fec->entry_id.offset;
image img{contents, fec->fileoff};
if (out_fmt == out_fmt_type::JSON) {
std::cout << sep;
} else {
std::cout << std::endl;
}
img.dump(name, " ");
sep = ",\n";
}
if (out_fmt == out_fmt_type::JSON) {
std::cout << "]";
}
return 0;
}
void
read_file(std::filesystem::path &path, std::vector<uint8_t> &contents)
{
std::ifstream file(path, std::ifstream::binary);
size_t size(std::filesystem::file_size(path));
contents.resize(size);
file.read(reinterpret_cast<char *>(contents.data()), size);
file.close();
}
enum class file_kind {
UNKNOWN,
MACHO,
FAT_MACHO,
FILESET,
IMG4,
};
static file_kind
recognize_file(const std::vector<uint8_t> &contents)
{
const mach_header_64 *hdr;
if (contents.size() < sizeof(mach_header_64)) {
return file_kind::UNKNOWN;
}
hdr = reinterpret_cast<const mach_header_64 *>(contents.data());
if (hdr->magic == MH_MAGIC_64) {
switch (hdr->filetype) {
case MH_FILESET:
return file_kind::FILESET;
default:
return file_kind::MACHO;
}
}
if (hdr->magic == FAT_CIGAM) {
return file_kind::FAT_MACHO;
}
if (memcmp("IM4P", contents.data() + 8, 4) == 0) {
return file_kind::IMG4;
}
return file_kind::UNKNOWN;
}
static int
call_cmd_silent(const char *const *args)
{
posix_spawn_file_actions_t facts;
extern char **environ;
pid_t pid;
int rc;
posix_spawn_file_actions_init(&facts);
posix_spawn_file_actions_addopen(&facts,
STDIN_FILENO, "/dev/null", O_RDONLY, 0777);
posix_spawn_file_actions_addopen(&facts,
STDOUT_FILENO, "/dev/null", O_WRONLY, 0777);
posix_spawn_file_actions_addopen(&facts,
STDERR_FILENO, "/dev/null", O_WRONLY, 0777);
rc = posix_spawnp(&pid, args[0], &facts, nullptr,
(char *const *)args, environ);
posix_spawn_file_actions_destroy(&facts);
if (rc != 0) {
return 1;
}
waitpid(pid, &rc, 0);
if (!WIFEXITED(rc) || WEXITSTATUS(rc)) {
return 1;
}
return 0;
}
static int
do_file(const std::filesystem::path &path, std::vector<uint8_t> &contents)
{
int status = 0;
switch (recognize_file(contents)) {
case file_kind::MACHO:
return do_simple_macho(path.filename().string(), contents);
case file_kind::FAT_MACHO:
return do_fat_macho(path.filename().string(), contents);
case file_kind::FILESET:
return do_fileset(contents);
case file_kind::IMG4:
break;
case file_kind::UNKNOWN:
std::cerr << "Unsupported file type\n";
return 1;
}
char tmp_tpl[] = "/tmp/kt-dump.XXXXXX";
char *tmp_dir = mkdtemp(tmp_tpl);
if (tmp_dir == NULL) {
std::cerr << "Unable to make temporary directory to unpack img4\n";
return 1;
}
std::filesystem::path compressed_kc{tmp_dir};
std::filesystem::path uncompressed_kc{tmp_dir};
compressed_kc /= "compressed.kc";
uncompressed_kc /= "uncompressed.kc";
static const char *const img4args[] = {
"img4utility",
"--copyBinary",
"--input",
path.c_str(),
"--output",
compressed_kc.c_str(),
NULL,
};
static const char *const ct_args[] = {
"compression_tool",
"-decode",
"-v",
"-v",
"-v",
"-i",
compressed_kc.c_str(),
"-o",
uncompressed_kc.c_str(),
NULL,
};
if (call_cmd_silent(img4args)) {
std::cerr << "Unable to unpack img4 image\n";
status = 1;
} else if (call_cmd_silent(ct_args)) {
std::cerr << "Unable to decompress KC\n";
status = 1;
} else {
read_file(uncompressed_kc, contents);
}
removefile_state_t s = removefile_state_alloc();
removefile(tmp_dir, s, REMOVEFILE_RECURSIVE);
removefile_state_free(s);
return status ?: do_file(path, contents);
}
int
main(int argc, char const *argv[])
{
if (argc != 2 && argc != 4) {
std::cout << "Usage: " << argv[0]
<< " [-f <simple|json|struct|stats>] <mach-o>\n";
return 1;
}
std::string path_arg;
/* Parse command line args */
for (int i = 1; i < argc; i++) {
std::string arg(argv[i]);
if (arg == "-f") {
if (++i == argc) {
std::cerr << "Option " << arg << " requires an argument\n";
return 1;
}
arg = argv[i];
if (arg == "simple") {
out_fmt = out_fmt_type::SIMPLE;
} else if (arg == "json" || arg == "JSON") {
out_fmt = out_fmt_type::JSON;
} else if (arg == "struct") {
out_fmt = out_fmt_type::STRUCT;
} else if (arg == "stats") {
out_fmt = out_fmt_type::STATS;
} else {
std::cerr << "Unknown output format: " << arg << std::endl;
return 1;
}
} else {
/* Read the file specified as a positional arg */
path_arg = arg;
}
}
if (path_arg.length() == 0) {
std::cerr << "no file specified\n";
return 1;
}
std::filesystem::path path(path_arg);
std::vector<uint8_t> contents;
read_file(path, contents);
return do_file(path, contents);
}