import sys
# This script's return values
ERROR = 1
SUCCESS = 0
error_help_message = """Safe types were used in MIG in violation of VM API sanitization enforcement.
\t- If you created a new MIG file that is not related to Virtual Memory, you will want to add it to the list of \
exceptions in this Python script.
\t- If your work is related to Virtual Memory, you need to use unsafe types for userspace inputs. Make sure you \
define VM_KERNEL_SERVER in the MIG file, and that the safe type is associated to an unsafe type via VM_UNSAFE_TYPE/VM_TYPE_SAFE_UNSAFE. Learn more by reading doc/vm/sanitize.md."""
# List of safe types that should not be used to represent userspace inputs.
# Note that is may be counter-intuitive that we want to prevent the use of
# safe types: they are safe after all, so wouldn't it be better to use them?
# The source of the confusion is that the safety of the type really describes
# the safety of the data. Since the data will be coming from userspace, it is
# considered unsafe/untrusted, and we want to make sure we represent it with
# unsafe types to force the programmer to sanitize it before it can be
# represented with the (directly usable) safe type.
# This is a list of all the safe types which have corresponding unsafe types
# in vm_types_unsafe.h.
safe_type_list = [
"mach_vm_address_t",
"mach_vm_offset_t",
"mach_vm_size_t",
"vm_address_t",
"vm_offset_t",
"vm_size_t",
"vm_map_address_t",
"vm_map_offset_t",
"vm_map_size_t",
"memory_object_offset_t",
"memory_object_size_t",
"vm_object_offset_t",
"vm_object_size_t",
"pointer_t",
"vm32_address_t",
"vm32_offset_t",
"vm32_size_t",
"vm_prot_t",
"vm_inherit_t",
"vm_behavior_t",
"caddr_t",
"user_addr_t",
"size_t",
"user_size_t",
"struct mach_vm_range",
"mach_vm_range_recipe_v1_t",
]
# Files that are considered outside the VM boundary and are thus not subject to enforcement.
file_ignorelist = [
"arcade_register_server.c",
"clock_server.c",
"exc_server.c",
"mach_eventlink_server.c",
"mach_exc_server.c",
"mach_notify_server.c",
"mach_port_server.c",
"mach_voucher_server.c",
"memory_entry_server.c",
"processor_server.c",
"processor_set_server.c",
"restartable_server.c",
"task_server.c",
"thread_act_server.c",
"upl_server.c",
]
def print_error(*args, **kwargs):
print("error:", *args, file=sys.stderr, **kwargs)
def is_type_used_in_line(safe_type, line):
# This is used by an autogenerated struct in MIG that isn't an argument to a MIG call
if "vm_address_t reserved; /* Reserved */" in line:
return False
# arguments to MIG functions are typically the first thing on the line in the generated header,
# but we search for the type elsewhere to be on the safe side. We still need to be careful not
# to trigger false positives by doing a naive search
# e.g. size_t is in "__Request__host_page_size_t __attribute__((unused));"
if safe_type in line.replace(':', ' ').replace(';', ' ').replace(',', ' ').split():
return True
return False
def are_safe_types_used_in_file(filepath):
are_safe_types_used = False
lineno = 1
with open(filepath, "r") as file:
for line in file:
for safe_type in safe_type_list:
if is_type_used_in_line(safe_type, line):
print_error("Found safe type \"" + safe_type + "\" in " +filepath + ":" + str(lineno) + ". Line is \"" + line.strip() + "\"")
are_safe_types_used = True
lineno += 1
return are_safe_types_used
def main():
if len(sys.argv) < 2:
print_error("usage: python vm_api_enforcement.py filename [extra_filename...]")
return ERROR
are_safe_types_used = False
for filename in sys.argv[1:]:
if filename in file_ignorelist:
continue
if not (filename.endswith(".c") or filename.endswith(".h")):
print_error("File should be a .c or .h file:", filename)
return ERROR
header = filename[:-1] + "h"
are_safe_types_used = are_safe_types_used_in_file(header)
if are_safe_types_used:
print_error("{}: {}".format(sys.argv[0], error_help_message))
return ERROR
return SUCCESS
sys.exit(main())