This is xnu-11215.1.10. See this file in:
/*
 * Copyright (c) 2020 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include <darwintest.h>
#include <machine/cpu_capabilities.h>
#include <sys/sysctl.h>

#include "exc_helpers.h"

T_GLOBAL_META(
	T_META_NAMESPACE("xnu.arm"),
	T_META_RADAR_COMPONENT_NAME("xnu"),
	T_META_RADAR_COMPONENT_VERSION("arm"),
	T_META_OWNER("sdooher"),
	T_META_RUN_CONCURRENTLY(true),
	T_META_TAG("SoCSpecific")
	);

static volatile bool cap_usable;

static size_t
bad_instruction_handler(mach_port_t task __unused, mach_port_t thread __unused,
    exception_type_t type __unused, mach_exception_data_t codes __unused)
{
	cap_usable = false;
	return 4;
}

static void
try_fp16(void)
{
	asm volatile (
                "fmov	h0, #0" "\n"
                :
                :
                : "v0"
        );
}

static void
try_atomics(void)
{
	uint64_t dword;
	asm volatile (
                "swp	xzr, xzr, [%[dword]]"
                :
                : [dword]"r"(&dword)
        );
}

static void
try_crc32(void)
{
	asm volatile ( "crc32b	wzr, wzr, wzr");
}

static void
try_fhm(void)
{
	asm volatile (
                "fmov	d0, #0"                 "\n"
                "fmlal	v0.2s, v0.2h, v0.2h"    "\n"
                :
                :
                : "v0"
        );
}

static void
try_sha512(void)
{
	asm volatile (
                "fmov		d0, #0"                 "\n"
                "fmov		d1, #0"                 "\n"
                "sha512h	q0, q0, v0.2d"          "\n"
                :
                :
                : "v0"
        );
}

static void
try_sha3(void)
{
	asm volatile (
                "fmov	d0, #0"                         "\n"
                "fmov	d1, #0"                         "\n"
                "eor3	v0.16b, v0.16b, v0.16b, v0.16b" "\n"
                :
                :
                : "v0"
        );
}

static void
try_sha1(void)
{
	asm volatile (
                "fmov		s0, #0"         "\n"
                "sha1h		s0, s0"         "\n"
                :
                :
                : "v0"
        );
}

static void
try_pmull(void)
{
	asm volatile (
                "fmov	d0, #0"                 "\n"
                "pmull	v0.1q, v0.1d, v0.1d"    "\n"
                :
                :
                : "v0"
        );
}

static void
try_aes(void)
{
	asm volatile (
                "fmov		d0, #0"                 "\n"
                "fmov		d1, #0"                 "\n"
                "aesd		v0.16B, v0.16B"         "\n"
                :
                :
                : "v0"
        );
}


static void
try_sha256(void)
{
	asm volatile (
                "fmov           d0, #0"                 "\n"
                "fmov           d1, #0"                 "\n"
                "sha256h        q0, q0, v0.4s"          "\n"
                :
                :
                : "v0"
        );
}


static void
try_compnum(void)
{
	asm volatile (
                "fmov	d0, #0"                         "\n"
                "fcadd	v0.2s, v0.2s, v0.2s, #90"       "\n"
                :
                :
                : "v0"
        );
}


static void
try_flagm(void)
{
	asm volatile (
                "cfinv"        "\n"
                "cfinv"        "\n"
        );
}

static void
try_flagm2(void)
{
	asm volatile (
                "axflag"        "\n"
                "xaflag"        "\n"
        );
}

static void
try_dotprod(void)
{
	asm volatile (
                "udot v0.4S,v1.16B,v2.16B"
                :
                :
                : "v0"
        );
}

static void
try_rdm(void)
{
	asm volatile (
                "sqrdmlah s0, s1, s2"
                :
                :
                : "s0"
        );
}

static void
try_sb(void)
{
	asm volatile (
                "sb"
        );
}

static void
try_frintts(void)
{
	asm volatile (
                "frint32x s0, s0"
                :
                :
                : "s0"
        );
}

static void
try_jscvt(void)
{
	asm volatile (
                "fmov	d0, #0"      "\n"
                "fjcvtzs w1, d0"     "\n"
                :
                :
                : "w1", "d0"
        );
}

static void
try_pauth(void)
{
	asm volatile (
                "pacga x0, x0, x0"
                :
                :
                : "x0"
        );
}

static void
try_dpb(void)
{
	int x;
	asm volatile (
                "dc cvap, %0"
                :
                : "r" (&x)
        );
}

static void
try_dpb2(void)
{
	int x;
	asm volatile (
                "dc cvadp, %0"
                :
                : "r" (&x)
        );
}

static void
try_lrcpc(void)
{
	int x;
	asm volatile (
                "ldaprb w0, [%0]"
                :
                : "r" (&x)
                : "w0"
        );
}

static void
try_lrcpc2(void)
{
	int x;
	asm volatile (
                "ldapurb w0, [%0]"
                :
                : "r" (&x)
                : "w0"
        );
}


static void
try_specres(void)
{
	int x;
	asm volatile (
                "cfp rctx, %0"
                :
                : "r" (&x)
        );
}

static void
try_bf16(void)
{
	asm volatile (
                "bfdot v0.4S,v1.8H,v2.8H"
                :
                :
                : "v0"
        );
}

static void
try_i8mm(void)
{
	asm volatile (
                "sudot v0.4S,v1.16B,v2.4B[0]"
                :
                :
                : "v0"
        );
}

static void
try_ecv(void)
{
	/*
	 * These registers are present only when FEAT_ECV is implemented.
	 * Otherwise, direct accesses to CNTPCTSS_EL0 or CNTVCTSS_EL0 are UNDEFINED.
	 */
	(void)__builtin_arm_rsr64("CNTPCTSS_EL0");
	(void)__builtin_arm_rsr64("CNTVCTSS_EL0");
}

static void
try_afp(void)
{
	/*
	 * FEAT_AFP can be detected via three new FPCR bits which were
	 * previously marked read-as-zero.
	 */
	const uint64_t FPCR_AFP_FLAGS = (1 << 0) | (1 << 1) | (1 << 2);

	uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
	__builtin_arm_wsr64("FPCR", old_fpcr | FPCR_AFP_FLAGS);
	uint64_t new_fpcr = __builtin_arm_rsr64("FPCR");
	__builtin_arm_wsr64("FPCR", old_fpcr);

	if ((new_fpcr & FPCR_AFP_FLAGS) != FPCR_AFP_FLAGS) {
		cap_usable = false;
	}
}

static void
try_rpres(void)
{
	/*
	 * When FEAT_RPRES is enabled via FPCR.AH, floating-point reciprocal
	 * estimate instructions increase precision from 8 mantissa bits to 12
	 * mantissa bits.  This can be detected by estimating 1/10.0 (which has
	 * no exact floating-point representation) and checking bits 11-14.
	 */
	const uint64_t FPCR_AH = (1 << 1);
	const uint32_t EXTRA_MANTISSA_BITS = (0xf << 11);

	uint32_t recip;
	uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
	__builtin_arm_wsr64("FPCR", old_fpcr | FPCR_AH);
	asm volatile (
                "fmov	s0, #10.0"      "\n"
                "frecpe s0, s0"         "\n"
                "fmov   %w0, s0"        "\n"
                : "=r"(recip)
                :
                : "s0"
        );
	__builtin_arm_wsr64("FPCR", old_fpcr);

	if ((recip & EXTRA_MANTISSA_BITS) == 0) {
		cap_usable = false;
	}
}

__attribute__((target("wfxt")))
static void
try_wfxt(void)
{
	asm volatile ("wfet xzr");
}

static void
try_sme(void)
{
	asm volatile (
               "rdsvl	x0, #1"
               :
               :
               : "x0"
        );
}

static void
try_sme2(void)
{
	asm volatile (
               "smstart za"             "\n"
               "zero    { zt0 }"        "\n"
               "smstop  za"             "\n"
        );
}

static void
try_sme_f32f32(void)
{
	asm volatile (
               "smstart"                                "\n"
               "fmopa   za0.s, p0/m, p0/m, z0.s, z0.s"  "\n"
               "smstop"                                 "\n"
        );
}

static void
try_sme_bi32i32(void)
{
	asm volatile (
               "smstart"                                "\n"
               "bmopa   za0.s, p0/m, p0/m, z0.s, z0.s"  "\n"
               "smstop"                                 "\n"
        );
}

static void
try_sme_b16f32(void)
{
	asm volatile (
               "smstart"                                "\n"
               "bfmopa  za0.s, p0/m, p0/m, z0.h, z0.h"  "\n"
               "smstop"                                 "\n"
        );
}

static void
try_sme_f16f32(void)
{
	asm volatile (
               "smstart"                                "\n"
               "fmopa   za0.s, p0/m, p0/m, z0.h, z0.h"  "\n"
               "smstop"                                 "\n"
        );
}

static void
try_sme_i8i32(void)
{
	asm volatile (
               "smstart"                                "\n"
               "smopa   za0.s, p0/m, p0/m, z0.b, z0.b"  "\n"
               "smstop"                                 "\n"
        );
}

static void
try_sme_i16i32(void)
{
	asm volatile (
               "smstart"                                "\n"
               "smopa   za0.s, p0/m, p0/m, z0.h, z0.h"  "\n"
               "smstop"                                 "\n"
        );
}

__attribute__((target("sme-f64f64")))
static void
try_sme_f64f64(void)
{
	asm volatile (
               "smstart"                                "\n"
               "fmopa   za0.d, p0/m, p0/m, z0.d, z0.d"  "\n"
               "smstop"                                 "\n"
        );
}

__attribute__((target("sme-i16i64")))
static void
try_sme_i16i64(void)
{
	asm volatile (
               "smstart"                                "\n"
               "smopa   za0.d, p0/m, p0/m, z0.h, z0.h"  "\n"
               "smstop"                                 "\n"
        );
}

static void
try_fpexcp(void)
{
	/* FP Exceptions are supported if all exceptions bit can be set. */
	const uint64_t flags = (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 15);

	uint64_t old_fpcr = __builtin_arm_rsr64("FPCR");
	__builtin_arm_wsr64("FPCR", old_fpcr | flags);
	uint64_t new_fpcr = __builtin_arm_rsr64("FPCR");
	__builtin_arm_wsr64("FPCR", old_fpcr);

	if ((new_fpcr & flags) != flags) {
		cap_usable = false;
	}
}

static void
try_dit(void)
{
	asm volatile (
                "msr DIT, x0"
                :
                :
                : "x0"
        );
}

static mach_port_t exc_port;

static void
test_cpu_capability(const char *cap_name, uint64_t cap_flag, bool has_commpage_entry, const char *cap_sysctl, void (*try_cpu_capability)(void))
{
	uint64_t caps = _get_cpu_capabilities();
	bool has_cap_flag = (caps & cap_flag);

	int sysctl_val;
	bool has_sysctl_flag = 0;
	if (cap_sysctl != NULL) {
		size_t sysctl_size = sizeof(sysctl_val);
		int err = sysctlbyname(cap_sysctl, &sysctl_val, &sysctl_size, NULL, 0);
		has_sysctl_flag = (err == 0 && sysctl_val > 0);
	}

	bool has_capability = has_commpage_entry ? has_cap_flag : has_sysctl_flag;

	if (!has_commpage_entry && cap_sysctl == NULL) {
		T_FAIL("Tested capability must have either sysctl or commpage flag");
		return;
	}

	if (has_commpage_entry && cap_sysctl != NULL) {
		T_EXPECT_EQ(has_cap_flag, has_sysctl_flag, "%s commpage flag matches sysctl flag", cap_name);
	}

	if (try_cpu_capability != NULL) {
		cap_usable = true;
		try_cpu_capability();
		T_EXPECT_EQ(has_capability, cap_usable, "%s capability matches actual usability", cap_name);
	}
}

T_DECL(cpu_capabilities, "Verify ARM CPU capabilities", T_META_TAG_VM_NOT_ELIGIBLE) {
	exc_port = create_exception_port(EXC_MASK_BAD_INSTRUCTION);
	repeat_exception_handler(exc_port, bad_instruction_handler);

	test_cpu_capability("FP16 (deprecated sysctl)", kHasFeatFP16, true, "hw.optional.neon_fp16", NULL);
	test_cpu_capability("FP16", kHasFeatFP16, true, "hw.optional.arm.FEAT_FP16", try_fp16);
	test_cpu_capability("LSE (deprecated sysctl)", kHasFeatLSE, true, "hw.optional.armv8_1_atomics", NULL);
	test_cpu_capability("LSE", kHasFeatLSE, true, "hw.optional.arm.FEAT_LSE", try_atomics);
	test_cpu_capability("CRC32", kHasARMv8Crc32, true, "hw.optional.armv8_crc32", try_crc32);
	test_cpu_capability("FHM (deprecated sysctl)", kHasFeatFHM, true, "hw.optional.armv8_2_fhm", NULL);
	test_cpu_capability("FHM", kHasFeatFHM, true, "hw.optional.arm.FEAT_FHM", try_fhm);
	test_cpu_capability("SHA512", kHasFeatSHA512, true, "hw.optional.armv8_2_sha512", try_sha512);
	test_cpu_capability("SHA3", kHasFeatSHA3, true, "hw.optional.armv8_2_sha3", try_sha3);
	test_cpu_capability("AES", kHasFeatAES, true, "hw.optional.arm.FEAT_AES", try_aes);
	test_cpu_capability("SHA1", kHasFeatSHA1, true, "hw.optional.arm.FEAT_SHA1", try_sha1);
	test_cpu_capability("SHA256", kHasFeatSHA256, true, "hw.optional.arm.FEAT_SHA256", try_sha256);
	test_cpu_capability("PMULL", kHasFeatPMULL, true, "hw.optional.arm.FEAT_PMULL", try_pmull);
	test_cpu_capability("FCMA (deprecated sysctl)", kHasFeatFCMA, true, "hw.optional.armv8_3_compnum", NULL);
	test_cpu_capability("FCMA", kHasFeatFCMA, true, "hw.optional.arm.FEAT_FCMA", try_compnum);
	test_cpu_capability("FlagM", kHasFEATFlagM, true, "hw.optional.arm.FEAT_FlagM", try_flagm);
	test_cpu_capability("FlagM2", kHasFEATFlagM2, true, "hw.optional.arm.FEAT_FlagM2", try_flagm2);
	test_cpu_capability("DotProd", kHasFeatDotProd, true, "hw.optional.arm.FEAT_DotProd", try_dotprod);
	test_cpu_capability("RDM", kHasFeatRDM, true, "hw.optional.arm.FEAT_RDM", try_rdm);
	test_cpu_capability("SB", kHasFeatSB, true, "hw.optional.arm.FEAT_SB", try_sb);
	test_cpu_capability("FRINTTS", kHasFeatFRINTTS, true, "hw.optional.arm.FEAT_FRINTTS", try_frintts);
	test_cpu_capability("JSCVT", kHasFeatJSCVT, true, "hw.optional.arm.FEAT_JSCVT", try_jscvt);
	test_cpu_capability("PAuth", kHasFeatPAuth, true, "hw.optional.arm.FEAT_PAuth", try_pauth);
	test_cpu_capability("DBP", kHasFeatDPB, true, "hw.optional.arm.FEAT_DPB", try_dpb);
	test_cpu_capability("DBP2", kHasFeatDPB2, true, "hw.optional.arm.FEAT_DPB2", try_dpb2);
	test_cpu_capability("SPECRES", kHasFeatSPECRES, true, "hw.optional.arm.FEAT_SPECRES", try_specres);
	test_cpu_capability("LRCPC", kHasFeatLRCPC, true, "hw.optional.arm.FEAT_LRCPC", try_lrcpc);
	test_cpu_capability("LRCPC2", kHasFeatLRCPC2, true, "hw.optional.arm.FEAT_LRCPC2", try_lrcpc2);
	test_cpu_capability("AFP", kHasFeatAFP, true, "hw.optional.arm.FEAT_AFP", try_afp);
	test_cpu_capability("DIT", kHasFeatDIT, true, "hw.optional.arm.FEAT_DIT", try_dit);
	test_cpu_capability("FP16", kHasFP_SyncExceptions, true, "hw.optional.arm.FP_SyncExceptions", try_fpexcp);
	test_cpu_capability("SME", kHasFeatSME, true, "hw.optional.arm.FEAT_SME", try_sme);
	test_cpu_capability("SME2", kHasFeatSME2, true, "hw.optional.arm.FEAT_SME2", try_sme2);

	// The following features do not have a commpage entry
	test_cpu_capability("BF16", 0, false, "hw.optional.arm.FEAT_BF16", try_bf16);
	test_cpu_capability("I8MM", 0, false, "hw.optional.arm.FEAT_I8MM", try_i8mm);
	test_cpu_capability("ECV", 0, false, "hw.optional.arm.FEAT_ECV", try_ecv);
	test_cpu_capability("RPRES", 0, false, "hw.optional.arm.FEAT_RPRES", try_rpres);
	test_cpu_capability("WFxT", 0, false, "hw.optional.arm.FEAT_WFxT", try_wfxt);
	test_cpu_capability("SME_F32F32", 0, false, "hw.optional.arm.SME_F32F32", try_sme_f32f32);
	test_cpu_capability("SME_BI32I32", 0, false, "hw.optional.arm.SME_BI32I32", try_sme_bi32i32);
	test_cpu_capability("SME_B16F32", 0, false, "hw.optional.arm.SME_B16F32", try_sme_b16f32);
	test_cpu_capability("SME_F16F32", 0, false, "hw.optional.arm.SME_F16F32", try_sme_f16f32);
	test_cpu_capability("SME_I8I32", 0, false, "hw.optional.arm.SME_I8I32", try_sme_i8i32);
	test_cpu_capability("SME_I16I32", 0, false, "hw.optional.arm.SME_I16I32", try_sme_i16i32);
	test_cpu_capability("SME_F64F64", 0, false, "hw.optional.arm.FEAT_SME_F64F64", try_sme_f64f64);
	test_cpu_capability("SME_I16I64", 0, false, "hw.optional.arm.FEAT_SME_I16I64", try_sme_i16i64);

	// The following features do not add instructions or registers to test for the presence of
	test_cpu_capability("LSE2", kHasFeatLSE2, true, "hw.optional.arm.FEAT_LSE2", NULL);
	test_cpu_capability("CSV2", kHasFeatCSV2, true, "hw.optional.arm.FEAT_CSV2", NULL);
	test_cpu_capability("CSV3", kHasFeatCSV3, true, "hw.optional.arm.FEAT_CSV3", NULL);
}