import sys

# This script's return values
ERROR = 1
SUCCESS = 0

error_help_message = """Safe types were used in MIG in violation of VM API sanitization enforcement.
\t- If you created a new MIG file that is not related to Virtual Memory, you will want to add it to the list of \
exceptions in this Python script.
\t- If your work is related to Virtual Memory, you need to use unsafe types for userspace inputs. Make sure you \
define VM_KERNEL_SERVER in the MIG file, and that the safe type is associated to an unsafe type via VM_UNSAFE_TYPE/VM_TYPE_SAFE_UNSAFE. Learn more by reading doc/vm/sanitize.md."""

# List of safe types that should not be used to represent userspace inputs.
# Note that is may be counter-intuitive that we want to prevent the use of
# safe types: they are safe after all, so wouldn't it be better to use them?
# The source of the confusion is that the safety of the type really describes
# the safety of the data. Since the data will be coming from userspace, it is
# considered unsafe/untrusted, and we want to make sure we represent it with
# unsafe types to force the programmer to sanitize it before it can be
# represented with the (directly usable) safe type.
# This is a list of all the safe types which have corresponding unsafe types
# in vm_types_unsafe.h.
safe_type_list = [
	"mach_vm_address_t",
	"mach_vm_offset_t",
	"mach_vm_size_t",
	"vm_address_t",
	"vm_offset_t",
	"vm_size_t",
	"vm_map_address_t",
	"vm_map_offset_t",
	"vm_map_size_t",
	"memory_object_offset_t",
	"memory_object_size_t",
	"vm_object_offset_t",
	"vm_object_size_t",
	"pointer_t",
	"vm32_address_t",
	"vm32_offset_t",
	"vm32_size_t",
	"vm_prot_t",
	"vm_inherit_t",
	"vm_behavior_t",
	"caddr_t",
	"user_addr_t",
	"size_t",
	"user_size_t",
	"struct mach_vm_range",
	"mach_vm_range_recipe_v1_t",
]

# Files that are considered outside the VM boundary and are thus not subject to enforcement.
file_ignorelist = [
	"arcade_register_server.c",
	"clock_server.c",
	"exc_server.c",
	"mach_eventlink_server.c",
	"mach_exc_server.c",
	"mach_notify_server.c",
	"mach_port_server.c",
	"mach_voucher_server.c",
	"memory_entry_server.c",
	"processor_server.c",
	"processor_set_server.c",
	"restartable_server.c",
	"task_server.c",
	"thread_act_server.c",
	"upl_server.c",
]

def print_error(*args, **kwargs):
	print("error:", *args, file=sys.stderr, **kwargs)

def is_type_used_in_line(safe_type, line):
	# This is used by an autogenerated struct in MIG that isn't an argument to a MIG call
	if "vm_address_t	reserved;	/* Reserved */" in line:
		return False

	# arguments to MIG functions are typically the first thing on the line in the generated header,
	# but we search for the type elsewhere to be on the safe side. We still need to be careful not
	# to trigger false positives by doing a naive search
	# e.g. size_t is in "__Request__host_page_size_t __attribute__((unused));"
	if safe_type in line.replace(':', ' ').replace(';', ' ').replace(',', ' ').split():
		return True

	return False

def are_safe_types_used_in_file(filepath):
	are_safe_types_used = False
	lineno = 1
	with open(filepath, "r") as file:
		for line in file:
			for safe_type in safe_type_list:
				if is_type_used_in_line(safe_type, line):
					print_error("Found safe type \"" + safe_type + "\" in " +filepath + ":" + str(lineno) + ". Line is \"" + line.strip() + "\"")
					are_safe_types_used = True
			lineno += 1
	return are_safe_types_used

def main():
	if len(sys.argv) < 2:
		print_error("usage: python vm_api_enforcement.py filename [extra_filename...]")
		return ERROR

	are_safe_types_used = False
	for filename in sys.argv[1:]:
		if filename in file_ignorelist:
			continue

		if not (filename.endswith(".c") or filename.endswith(".h")):
			print_error("File should be a .c or .h file:", filename)
			return ERROR
		header = filename[:-1] + "h"

		are_safe_types_used = are_safe_types_used_in_file(header)

	if are_safe_types_used:
		print_error("{}: {}".format(sys.argv[0], error_help_message))
		return ERROR
	return SUCCESS

sys.exit(main())
