This is xnu-11215.1.10. See this file in:
/*
* Copyright (c) 2022 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* if_redirect.c
* Virtual network interface that redirects traffic to a delegate interface.
*/
#include <sys/sysctl.h>
#include <net/dlil.h>
#include <net/ethernet.h>
#include <net/kpi_interface.h>
#include <net/bpf.h>
#include <net/if_media.h>
#include <net/if_ether.h>
#include <net/if_redirect.h>
#include <netinet/icmp6.h>
#include <os/log.h>
#include <skywalk/os_skywalk_private.h>
#include <skywalk/nexus/netif/nx_netif.h>
#define RD_NAME "rd"
#define RD_MAXUNIT IF_MAXUNIT
#define RD_ZONE_MAX_ELEM MIN(IFNETS_MAX, RD_MAXUNIT)
#define RD_MAX_MTU 2048
#define RD_MAX_TX_RINGS 1
#define RD_MAX_RX_RINGS 1
#define RD_POOL_SIZE 1024
static uint8_t default_mac[ETHER_ADDR_LEN] = {0x0, 0x1, 0x2, 0x3, 0x4, 0x5};
SYSCTL_DECL(_net_link);
SYSCTL_NODE(_net_link, OID_AUTO, redirect, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
"Redirect interface");
static int if_redirect_debug = 0;
SYSCTL_INT(_net_link_redirect, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_LOCKED,
&if_redirect_debug, 0, "Redirect interface debug logs");
os_log_t redirect_log_handle = NULL;
#define RDLOG(level, format, ...) do { \
if (level == LOG_ERR) { \
os_log_error(redirect_log_handle, "%s: " format "\n", \
__FUNCTION__, ##__VA_ARGS__); \
} else { \
if (__probable(if_redirect_debug == 0)) { \
break; \
} \
if (level == LOG_DEBUG) { \
os_log_debug(redirect_log_handle, "%s: " format "\n", \
__FUNCTION__, ##__VA_ARGS__); \
} else if (level == LOG_INFO) { \
os_log_info(redirect_log_handle, "%s: " format "\n", \
__FUNCTION__, ##__VA_ARGS__); \
} \
} \
} while (0)
#define RDLOG_ERR(format, ...) RDLOG(LOG_ERR, format, ##__VA_ARGS__)
#define RDLOG_DBG(format, ...) RDLOG(LOG_DEBUG, format, ##__VA_ARGS__)
#define RDLOG_INFO(format, ...) RDLOG(LOG_INFO, format, ##__VA_ARGS__)
#define RD_MEDIA_LIST_MAX 27
typedef struct {
uuid_t rnx_provider;
uuid_t rnx_instance;
} redirect_nx, *redirect_nx_t;
typedef struct {
char rd_name[IFNAMSIZ]; /* our unique id */
lck_mtx_t rd_lock;
uint32_t rd_ftype;
ifnet_t rd_ifp;
ifnet_t rd_delegate_ifp;
/* General state of the interface */
boolean_t rd_detaching;
boolean_t rd_connected;
/* Used for tracking delegate related state info */
boolean_t rd_self_ref;
boolean_t rd_delegate_parent_set;
boolean_t rd_delegate_ref;
boolean_t rd_fsw_rx_cb_set;
boolean_t rd_delegate_set;
boolean_t rd_mac_addr_set;
boolean_t rd_detach_notify_set;
unsigned int rd_max_mtu;
uint32_t rd_retain_count;
kern_pbufpool_t rd_pp;
kern_channel_ring_t rd_rx_ring[RD_MAX_RX_RINGS];
kern_channel_ring_t rd_tx_ring[RD_MAX_TX_RINGS];
redirect_nx rd_nx;
struct netif_stats *rd_nifs;
void *rd_intf_adv_kern_ctx;
thread_call_t rd_doorbell_tcall;
boolean_t rd_doorbell_tcall_active;
boolean_t rd_waiting_for_tcall;
bool rd_intf_adv_enabled;
kern_nexus_capab_interface_advisory_notify_fn_t rd_intf_adv_notify;
} if_redirect, *if_redirect_t;
static if_redirect_t ifnet_get_if_redirect(ifnet_t);
static int redirect_clone_create(struct if_clone *, uint32_t, void *);
static int redirect_clone_destroy(ifnet_t);
static int redirect_ioctl(ifnet_t, u_long, void *);
static void redirect_if_free(ifnet_t);
static void redirect_free(if_redirect_t);
static errno_t redirect_demux(ifnet_t, mbuf_t, char *, protocol_family_t *);
static errno_t redirect_add_proto(ifnet_t, protocol_family_t,
const struct ifnet_demux_desc *, uint32_t);
static errno_t redirect_del_proto(ifnet_t, protocol_family_t);
static void redirect_clear_delegate_locked(if_redirect_t);
static void redirect_clear_delegate(if_redirect_t);
static struct if_clone
redirect_cloner = IF_CLONE_INITIALIZER(RD_NAME,
redirect_clone_create,
redirect_clone_destroy,
0,
RD_MAXUNIT);
static void interface_link_event(ifnet_t ifp, uint32_t event_code);
static LCK_GRP_DECLARE(redirect_lock_group, "redirect");
static LCK_ATTR_DECLARE(redirect_lock_attr, 0, 0);
#define RD_LOCK_INIT(rd) \
lck_mtx_init(&(rd)->rd_lock, &redirect_lock_group, &redirect_lock_attr)
#define RD_LOCK(rd) \
lck_mtx_lock(&(rd)->rd_lock)
#define RD_UNLOCK(rd) \
lck_mtx_unlock(&(rd)->rd_lock)
#define RD_LOCK_DESTROY(rd) \
lck_mtx_destroy(&(rd)->rd_lock, &redirect_lock_group)
static inline boolean_t
redirect_is_usable(if_redirect_t rd)
{
return !rd->rd_detaching && rd->rd_connected;
}
static inline unsigned int
redirect_max_mtu(ifnet_t ifp)
{
if_redirect_t rd;
unsigned int max_mtu = ETHERMTU;
rd = ifnet_get_if_redirect(ifp);
if (rd == NULL) {
RDLOG_ERR("rd is NULL");
goto done;
}
max_mtu = rd->rd_max_mtu;
done:
return max_mtu;
}
static void
redirect_free(if_redirect_t rd)
{
VERIFY(rd->rd_retain_count == 0);
if (rd->rd_pp != NULL) {
pp_release(rd->rd_pp);
rd->rd_pp = NULL;
}
RD_LOCK_DESTROY(rd);
RDLOG_DBG("%s", rd->rd_name);
kfree_type(if_redirect, rd);
}
static void
redirect_release(if_redirect_t rd)
{
uint32_t old_retain_count;
old_retain_count = OSDecrementAtomic(&rd->rd_retain_count);
switch (old_retain_count) {
case 0:
VERIFY(old_retain_count != 0);
break;
case 1:
redirect_free(rd);
break;
default:
break;
}
return;
}
static void
redirect_retain(if_redirect_t rd)
{
OSIncrementAtomic(&rd->rd_retain_count);
}
static void
redirect_bpf_tap(ifnet_t ifp, kern_packet_t pkt, bool input)
{
uint32_t dlt;
switch (ifp->if_family) {
case IFNET_FAMILY_ETHERNET:
dlt = DLT_EN10MB;
break;
case IFNET_FAMILY_CELLULAR:
case IFNET_FAMILY_UTUN:
case IFNET_FAMILY_IPSEC:
dlt = DLT_RAW;
break;
default:
DTRACE_SKYWALK1(invalid__family, ifnet_t, ifp);
return;
}
if (input) {
bpf_tap_packet_in(ifp, dlt, pkt, NULL, 0);
} else {
bpf_tap_packet_out(ifp, dlt, pkt, NULL, 0);
}
}
static void
redirect_packet_pool_init_prepare(if_redirect_t rd,
struct kern_pbufpool_init *pp_init)
{
uint32_t max_mtu = rd->rd_max_mtu;
bzero(pp_init, sizeof(*pp_init));
pp_init->kbi_version = KERN_PBUFPOOL_CURRENT_VERSION;
pp_init->kbi_flags |= KBIF_VIRTUAL_DEVICE;
pp_init->kbi_packets = RD_POOL_SIZE;
pp_init->kbi_bufsize = max_mtu;
pp_init->kbi_max_frags = 1;
pp_init->kbi_buflets = (2 * pp_init->kbi_packets); /* Tx/Rx pool */
pp_init->kbi_buf_seg_size = skmem_usr_buf_seg_size;
pp_init->kbi_ctx = NULL;
pp_init->kbi_ctx_retain = NULL;
pp_init->kbi_ctx_release = NULL;
}
static errno_t
redirect_packet_pool_make(if_redirect_t rd)
{
struct kern_pbufpool_init pp_init;
errno_t err;
redirect_packet_pool_init_prepare(rd, &pp_init);
(void)snprintf((char *)pp_init.kbi_name, sizeof(pp_init.kbi_name),
"%s pp", rd->rd_name);
err = kern_pbufpool_create(&pp_init, &rd->rd_pp, NULL);
return err;
}
static int
redirect_enqueue_pkt(struct nx_netif *nif, struct __kern_packet *pkt,
boolean_t flush, boolean_t *drop)
{
ifnet_t ifp = nif->nif_ifp;
uint64_t qset_id;
int err;
if (NX_LLINK_PROV(nif->nif_nx) &&
ifp->if_traffic_rule_count > 0 &&
nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
pkt, &qset_id) == 0) {
struct netif_qset * __single qset;
/*
* This always returns a qset because if the qset id is invalid the
* default qset is returned.
*/
qset = nx_netif_find_qset(nif, qset_id);
ASSERT(qset != NULL);
pkt->pkt_qset_idx = qset->nqs_idx;
err = ifnet_enqueue_ifcq_pkt(ifp, qset->nqs_ifcq, pkt, flush, drop);
nx_netif_qset_release(&qset);
} else {
/* callee consumes packet */
err = ifnet_enqueue_pkt(ifp, pkt, flush, drop);
}
return err;
}
static int
redirect_enqueue_mbuf(struct nx_netif *nif, struct mbuf *m,
boolean_t flush, boolean_t *drop)
{
return ifnet_enqueue_mbuf(nif->nif_ifp, m, flush, drop);
}
static int
redirect_tx_submit(ifnet_t delegate_ifp, struct pktq *spktq)
{
struct __kern_packet *spkt, *pkt;
struct nx_netif *nif;
struct netif_stats *nifs;
struct nexus_netif_adapter *dev_nifna;
struct mbuf *m;
boolean_t drop, native, compat;
errno_t err;
int cnt = 0;
if (!ifnet_datamov_begin(delegate_ifp)) {
RDLOG_ERR("delegate interface is being detached");
DTRACE_SKYWALK1(delegate__detached, ifnet_t, delegate_ifp);
return ENXIO;
}
if (NA(delegate_ifp) == NULL) {
RDLOG_ERR("nexus adapter is not present");
DTRACE_SKYWALK1(no__nexus, ifnet_t, delegate_ifp);
err = ENXIO;
goto done;
}
dev_nifna = NA(delegate_ifp);
nif = dev_nifna->nifna_netif;
nifs = &nif->nif_stats;
native = (dev_nifna->nifna_up.na_type == NA_NETIF_DEV);
compat = (dev_nifna->nifna_up.na_type == NA_NETIF_COMPAT_DEV);
while (KPKTQ_LEN(spktq) > 0) {
KPKTQ_DEQUEUE(spktq, spkt);
ASSERT(spkt != NULL);
drop = FALSE;
if (__probable(native)) {
pkt = nx_netif_pkt_to_pkt(dev_nifna, spkt, NETIF_CONVERT_TX);
if (pkt == NULL) {
continue;
}
pkt->pkt_pflags |= PKT_F_FLOW_ID;
pkt->pkt_pflags &= ~PKT_F_FLOW_ADV;
netif_ifp_inc_traffic_class_out_pkt(delegate_ifp,
pkt->pkt_svc_class, 1, pkt->pkt_length);
err = redirect_enqueue_pkt(nif, pkt, FALSE, &drop);
} else {
ASSERT(compat);
m = nx_netif_pkt_to_mbuf(dev_nifna, spkt, NETIF_CONVERT_TX);
if (m == NULL) {
continue;
}
m->m_pkthdr.pkt_flags = PKTF_FLOW_ID;
m->m_pkthdr.pkt_flags &= ~PKTF_FLOW_ADV;
ifp_inc_traffic_class_out(delegate_ifp, m);
err = redirect_enqueue_mbuf(nif, m, FALSE, &drop);
}
if (__probable(err == 0)) {
cnt++;
} else {
RDLOG_ERR("enqueue failed: %d", err);
if (drop) {
STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
STATS_INC(nifs, NETIF_STATS_DROP);
}
DTRACE_SKYWALK3(enqueue__failed,
ifnet_t, delegate_ifp, boolean_t, drop, int, err);
break;
}
}
done:
if (cnt > 0) {
netif_transmit(delegate_ifp, NETIF_XMIT_FLAG_REDIRECT);
}
ifnet_datamov_end(delegate_ifp);
return err;
}
/*
* nexus netif domain provider
*/
static errno_t
redirect_nxdp_init(kern_nexus_domain_provider_t domprov)
{
#pragma unused(domprov)
return 0;
}
static void
redirect_nxdp_fini(kern_nexus_domain_provider_t domprov)
{
#pragma unused(domprov)
}
static uuid_t redirect_nx_dom_prov;
static errno_t
redirect_register_nexus_domain_provider(void)
{
const struct kern_nexus_domain_provider_init dp_init = {
.nxdpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
.nxdpi_flags = 0,
.nxdpi_init = redirect_nxdp_init,
.nxdpi_fini = redirect_nxdp_fini
};
nexus_domain_provider_name_t domain_provider_name = "com.apple.redirect";
errno_t err = 0;
/* redirect_nxdp_init() is called before this function returns */
err = kern_nexus_register_domain_provider(NEXUS_TYPE_NET_IF,
domain_provider_name,
&dp_init, sizeof(dp_init),
&redirect_nx_dom_prov);
if (err != 0) {
RDLOG_ERR("failed to register domain provider");
return err;
}
return 0;
}
/*
* netif nexus routines
*/
static if_redirect_t
redirect_nexus_context(kern_nexus_t nexus)
{
if_redirect_t rd;
rd = (if_redirect_t)kern_nexus_get_context(nexus);
assert(rd != NULL);
return rd;
}
static errno_t
redirect_nx_ring_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_t channel, kern_channel_ring_t ring, boolean_t is_tx_ring,
void **ring_ctx)
{
#pragma unused(nxprov, channel, ring_ctx)
if_redirect_t rd;
rd = redirect_nexus_context(nexus);
RD_LOCK(rd);
if (rd->rd_detaching) {
DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
RD_UNLOCK(rd);
return ENXIO;
}
if (is_tx_ring) {
_CASSERT(RD_MAX_TX_RINGS == 1);
VERIFY(rd->rd_tx_ring[0] == NULL);
rd->rd_tx_ring[0] = ring;
} else {
_CASSERT(RD_MAX_RX_RINGS == 1);
VERIFY(rd->rd_rx_ring[0] == NULL);
rd->rd_rx_ring[0] = ring;
}
rd->rd_nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
RD_UNLOCK(rd);
RDLOG_INFO("%s: %s ring init", rd->rd_name,
is_tx_ring ? "TX" : "RX");
return 0;
}
static void
redirect_nx_ring_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_ring_t ring)
{
#pragma unused(nxprov, ring)
if_redirect_t rd;
thread_call_t __single tcall = NULL;
rd = redirect_nexus_context(nexus);
RD_LOCK(rd);
if (rd->rd_rx_ring[0] == ring) {
RDLOG_INFO("%s: RX ring fini", rd->rd_name);
rd->rd_rx_ring[0] = NULL;
} else if (rd->rd_tx_ring[0] == ring) {
RDLOG_INFO("%s: TX ring fini", rd->rd_name);
tcall = rd->rd_doorbell_tcall;
rd->rd_doorbell_tcall = NULL;
rd->rd_tx_ring[0] = NULL;
}
rd->rd_nifs = NULL;
RD_UNLOCK(rd);
if (tcall != NULL) {
boolean_t success;
success = thread_call_cancel_wait(tcall);
RDLOG_INFO("%s: thread_call_cancel %s",
rd->rd_name, success ? "SUCCESS" : "FAILURE");
if (!success) {
RD_LOCK(rd);
if (rd->rd_doorbell_tcall_active) {
rd->rd_waiting_for_tcall = TRUE;
RDLOG_INFO("%s: *waiting for threadcall",
rd->rd_name);
do {
msleep(rd, &rd->rd_lock,
PZERO, "redirect threadcall", 0);
} while (rd->rd_doorbell_tcall_active);
RDLOG_INFO("%s: threadcall done",
rd->rd_name);
rd->rd_waiting_for_tcall = FALSE;
}
RD_UNLOCK(rd);
}
success = thread_call_free(tcall);
RDLOG_INFO("%s: thread_call_free %s",
rd->rd_name, success ? "SUCCESS" : "FAILURE");
redirect_release(rd);
VERIFY(success == TRUE);
}
}
static errno_t
redirect_nx_pre_connect(kern_nexus_provider_t nxprov,
proc_t proc, kern_nexus_t nexus, nexus_port_t port,
kern_channel_t channel, void **channel_context)
{
#pragma unused(nxprov, proc, nexus, port, channel, channel_context)
return 0;
}
static errno_t
redirect_nx_connected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_t channel)
{
#pragma unused(nxprov, channel)
if_redirect_t rd = NULL;
rd = redirect_nexus_context(nexus);
RD_LOCK(rd);
if (rd->rd_detaching) {
DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
RD_UNLOCK(rd);
return EBUSY;
}
redirect_retain(rd);
rd->rd_connected = TRUE;
RD_UNLOCK(rd);
RDLOG_DBG("%s: connected channel %p", rd->rd_name, channel);
return 0;
}
static void
redirect_nx_pre_disconnect(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_t channel)
{
#pragma unused(nxprov, channel)
if_redirect_t rd;
rd = redirect_nexus_context(nexus);
RDLOG_INFO("%s: pre-disconnect channel %p", rd->rd_name, channel);
/* Quiesce the interface and flush any pending outbound packets */
if_down(rd->rd_ifp);
RD_LOCK(rd);
rd->rd_connected = FALSE;
RD_UNLOCK(rd);
}
static void
redirect_nx_disconnected(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_t channel)
{
#pragma unused(nxprov, channel)
if_redirect_t rd;
rd = redirect_nexus_context(nexus);
RDLOG_INFO("%s: disconnected channel %p", rd->rd_name, channel);
redirect_release(rd);
}
static errno_t
redirect_nx_slot_init(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_ring_t ring, kern_channel_slot_t slot, uint32_t slot_index,
struct kern_slot_prop **slot_prop_addr, void **slot_context)
{
#pragma unused(nxprov, nexus, ring, slot, slot_index, slot_prop_addr, slot_context)
return 0;
}
static void
redirect_nx_slot_fini(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_ring_t ring, kern_channel_slot_t slot, uint32_t slot_index)
{
#pragma unused(nxprov, nexus, ring, slot, slot_index)
}
static errno_t
redirect_nx_sync_tx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_ring_t tx_ring, uint32_t flags)
{
#pragma unused(nxprov)
if_redirect_t rd;
ifnet_t ifp;
kern_channel_slot_t last_tx_slot = NULL;
ifnet_t delegate_ifp;
struct kern_channel_ring_stat_increment stats;
kern_channel_slot_t tx_slot = NULL;
struct netif_stats *nifs = &NX_NETIF_PRIVATE(nexus)->nif_stats;
struct pktq tx_pktq;
uint32_t n_pkts = 0;
int error = 0;
bzero(&stats, sizeof(stats));
STATS_INC(nifs, NETIF_STATS_TX_SYNC);
rd = redirect_nexus_context(nexus);
RDLOG_INFO("%s ring %d flags 0x%x", rd->rd_name, tx_ring->ckr_ring_id, flags);
if (__improbable(!redirect_is_usable(rd))) {
RDLOG_INFO("%s is not usable", rd->rd_name);
DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
return ENOENT;
}
ifp = rd->rd_ifp;
delegate_ifp = rd->rd_delegate_ifp;
KPKTQ_INIT(&tx_pktq);
while ((tx_slot = kern_channel_get_next_slot(tx_ring, tx_slot, NULL)) != NULL) {
kern_packet_t sph;
/* detach the packet from the TX ring */
sph = kern_channel_slot_get_packet(tx_ring, tx_slot);
VERIFY(sph != 0);
kern_channel_slot_detach_packet(tx_ring, tx_slot, sph);
/* bpf tap output */
redirect_bpf_tap(ifp, sph, false);
ASSERT(sph != 0);
STATS_INC(nifs, NETIF_STATS_TX_COPY_DIRECT);
STATS_INC(nifs, NETIF_STATS_TX_PACKETS);
stats.kcrsi_slots_transferred++;
stats.kcrsi_bytes_transferred += kern_packet_get_data_length(sph);
KPKTQ_ENQUEUE(&tx_pktq, SK_PTR_ADDR_KPKT(sph));
n_pkts++;
last_tx_slot = tx_slot;
}
if (last_tx_slot != NULL) {
kern_channel_advance_slot(tx_ring, last_tx_slot);
kern_channel_increment_ring_net_stats(tx_ring, ifp, &stats);
}
if (__improbable(delegate_ifp == NULL)) {
RDLOG_INFO("%s has no delegate", rd->rd_name);
DTRACE_SKYWALK1(no__delegate, if_redirect_t, rd);
error = ENXIO;
goto done;
}
if (n_pkts > 0) {
redirect_tx_submit(delegate_ifp, &tx_pktq);
}
done:
/*
* Packets not enqueued into delegate interface AQM
*/
if (KPKTQ_LEN(&tx_pktq) > 0) {
DTRACE_SKYWALK2(unsent, if_redirect_t, rd, struct pktq *, &tx_pktq);
STATS_ADD(nifs, NETIF_STATS_DROP_NO_DELEGATE, KPKTQ_LEN(&tx_pktq));
pp_free_pktq(&tx_pktq);
}
return error;
}
static boolean_t
pkt_is_for_delegate(if_redirect_t rd, struct __kern_packet *pkt)
{
#if !(DEVELOPMENT || DEBUG)
#pragma unused(rd)
#endif
uint8_t proto;
uint8_t *hdr;
uint32_t l4len;
if ((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0) {
DTRACE_SKYWALK2(not__classified, if_redirect_t, rd,
struct __kern_packet *, pkt);
return FALSE;
}
if (pkt->pkt_flow_ip_hdr == 0 || pkt->pkt_flow_ip_hlen == 0) {
RDLOG_ERR("%s: classifier info missing", rd->rd_name);
DTRACE_SKYWALK2(classifier__info__missing, if_redirect_t, rd,
struct __kern_packet *, pkt);
return FALSE;
}
proto = pkt->pkt_flow_ip_proto;
l4len = pkt->pkt_length - pkt->pkt_l2_len - pkt->pkt_flow_ip_hlen;
hdr = __unsafe_forge_bidi_indexable(uint8_t *, pkt->pkt_flow_ip_hdr + pkt->pkt_flow_ip_hlen,
l4len);
if (proto == IPPROTO_ICMPV6) {
struct icmp6_hdr *icmp6;
if (l4len < sizeof(*icmp6)) {
RDLOG_ERR("%s: l4len(%u) < icmp6len(%lu)", rd->rd_name,
l4len, sizeof(*icmp6));
DTRACE_SKYWALK3(too__small__v6, if_redirect_t, rd,
struct __kern_packet *, pkt, uint32_t, l4len);
return FALSE;
}
icmp6 = (struct icmp6_hdr *)(void *)hdr;
if (icmp6->icmp6_type == ND_ROUTER_ADVERT) {
DTRACE_SKYWALK3(icmp6__ra, if_redirect_t, rd,
struct __kern_packet *, pkt, struct icmp6 *, icmp6);
return TRUE;
}
}
return FALSE;
}
static void
redirect_rx_cb(void *arg, struct pktq *spktq)
{
if_redirect_t __single rd = arg;
struct __kern_packet *spkt, *pkt;
struct pktq rpktq;
kern_packet_t ph;
kern_channel_ring_t rx_ring = NULL;
kern_channel_slot_t rx_slot = NULL, last_rx_slot = NULL;
struct kern_channel_ring_stat_increment stats;
int err;
/*
* The ring cannot disappear before the callback is finished and removed.
*/
rx_ring = rd->rd_rx_ring[0];
if (rx_ring == NULL) {
DTRACE_SKYWALK2(no__ring__drop, if_redirect_t, rd, struct pktq *, spktq);
pp_free_pktq(spktq);
return;
}
KPKTQ_INIT(&rpktq);
bzero(&stats, sizeof(stats));
kr_enter(rx_ring, TRUE);
kern_channel_reclaim(rx_ring);
while (KPKTQ_LEN(spktq) > 0) {
KPKTQ_DEQUEUE(spktq, spkt);
if (pkt_is_for_delegate(rd, spkt)) {
KPKTQ_ENQUEUE(&rpktq, spkt);
continue;
}
rx_slot = kern_channel_get_next_slot(rx_ring, last_rx_slot, NULL);
if (rx_slot == NULL) {
DTRACE_SKYWALK2(no__slot__drop, if_redirect_t, rd,
struct __kern_packet *, spkt);
pp_free_packet_single(spkt);
continue;
}
pkt = nx_netif_pkt_to_pkt(rd->rd_ifp->if_na, spkt, NETIF_CONVERT_RX);
if (pkt == NULL) {
DTRACE_SKYWALK1(copy__drop, if_redirect_t, rd);
continue;
}
ph = SK_PKT2PH(pkt);
stats.kcrsi_slots_transferred++;
stats.kcrsi_bytes_transferred += kern_packet_get_data_length(ph);
redirect_bpf_tap(rd->rd_ifp, ph, true);
err = kern_channel_slot_attach_packet(rx_ring, rx_slot, ph);
VERIFY(err == 0);
last_rx_slot = rx_slot;
}
ASSERT(KPKTQ_EMPTY(spktq));
KPKTQ_CONCAT(spktq, &rpktq);
if (last_rx_slot != NULL) {
kern_channel_advance_slot(rx_ring, last_rx_slot);
kern_channel_increment_ring_net_stats(rx_ring, rd->rd_ifp, &stats);
}
kr_exit(rx_ring);
if (last_rx_slot != NULL) {
kern_channel_notify(rx_ring, 0);
}
}
static errno_t
redirect_nx_sync_rx(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_ring_t ring, uint32_t flags)
{
#pragma unused(nxprov, nexus, ring, flags)
return 0;
}
static void
redirect_async_doorbell(thread_call_param_t arg0, thread_call_param_t arg1)
{
#pragma unused(arg1)
errno_t error;
if_redirect_t rd = (if_redirect_t)arg0;
kern_channel_ring_t ring;
boolean_t more;
RD_LOCK(rd);
ring = rd->rd_tx_ring[0];
if (__improbable(!redirect_is_usable(rd) || ring == NULL)) {
DTRACE_SKYWALK2(unusable, if_redirect_t, rd, kern_channel_ring_t, ring);
goto done;
}
rd->rd_doorbell_tcall_active = TRUE;
RD_UNLOCK(rd);
error = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX, FALSE,
&more);
if (error != 0 && error != EAGAIN) {
RDLOG_ERR("%s: Tx refill failed %d", rd->rd_name, error);
} else {
RDLOG_DBG("%s: Tx refilled", rd->rd_name);
}
RD_LOCK(rd);
done:
rd->rd_doorbell_tcall_active = FALSE;
if (rd->rd_waiting_for_tcall) {
RDLOG_INFO("%s: threadcall waking up waiter", rd->rd_name);
wakeup((caddr_t)rd);
}
RD_UNLOCK(rd);
}
static void
redirect_schedule_async_doorbell(if_redirect_t rd)
{
thread_call_t __single tcall;
RD_LOCK(rd);
if (__improbable(!redirect_is_usable(rd))) {
DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
RD_UNLOCK(rd);
return;
}
tcall = rd->rd_doorbell_tcall;
if (tcall != NULL) {
thread_call_enter(tcall);
} else {
tcall = thread_call_allocate_with_options(redirect_async_doorbell,
(thread_call_param_t)rd,
THREAD_CALL_PRIORITY_KERNEL,
THREAD_CALL_OPTIONS_ONCE);
if (tcall == NULL) {
RDLOG_ERR("%s: tcall alloc failed", rd->rd_name);
} else {
rd->rd_doorbell_tcall = tcall;
redirect_retain(rd);
thread_call_enter(tcall);
}
}
RD_UNLOCK(rd);
}
static errno_t
redirect_nx_tx_doorbell(kern_nexus_provider_t nxprov, kern_nexus_t nexus,
kern_channel_ring_t ring, uint32_t flags)
{
#pragma unused(nxprov, ring, flags)
errno_t error;
if_redirect_t rd;
rd = redirect_nexus_context(nexus);
RDLOG_DBG("%s", rd->rd_name);
if ((flags & KERN_NEXUS_TXDOORBELLF_ASYNC_REFILL) == 0) {
boolean_t more;
/* synchronous tx refill */
error = kern_channel_tx_refill(ring, UINT32_MAX, UINT32_MAX,
TRUE, &more);
if (error != 0 && error != EAGAIN) {
RDLOG_ERR("%s: Tx refill (sync) %d", rd->rd_name, error);
} else {
RDLOG_DBG("%s: Tx refilled (sync)", rd->rd_name);
}
} else {
RDLOG_DBG("%s: schedule async refill", rd->rd_name);
redirect_schedule_async_doorbell(rd);
}
return 0;
}
static errno_t
redirect_netif_prepare(kern_nexus_t nexus, ifnet_t ifp)
{
if_redirect_t rd;
rd = (if_redirect_t)kern_nexus_get_context(nexus);
(void)ifnet_set_capabilities_enabled(ifp, 0, -1);
ifnet_set_baudrate(ifp, 0);
ifnet_set_mtu(ifp, ETHERMTU);
ifnet_set_offload(ifp, 0);
if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
ifnet_set_flags(ifp,
IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX, 0xffff);
ifnet_set_addrlen(ifp, ETHER_ADDR_LEN);
ifnet_set_hdrlen(ifp, sizeof(struct ether_header));
} else {
ifnet_set_flags(ifp, IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
}
return 0;
}
static void
redirect_delegate_adv_config(ifnet_t delegate_ifp, bool enable)
{
struct nx_netif *delegate_nif;
ASSERT(delegate_ifp != NULL);
if (!SKYWALK_NATIVE(delegate_ifp)) {
RDLOG_ERR("%s is not skywalk native", if_name(delegate_ifp));
DTRACE_SKYWALK1(not__native, ifnet_t, delegate_ifp);
return;
}
delegate_nif = NA(delegate_ifp)->nifna_netif;
nx_netif_config_interface_advisory(delegate_nif->nif_nx, enable);
}
static errno_t
redirect_nx_intf_adv_config(void *prov_ctx, bool enable)
{
if_redirect_t rd = (if_redirect_t)prov_ctx;
RD_LOCK(rd);
if (!redirect_is_usable(rd)) {
RDLOG_ERR("cannot %s advisory on %s because it is not usable",
enable ? "enable" : "disable", if_name(rd->rd_ifp));
DTRACE_SKYWALK1(unusable, if_redirect_t, rd);
RD_UNLOCK(rd);
return ENXIO;
}
if (rd->rd_intf_adv_enabled == enable) {
RDLOG_ERR("advisory is already %s on %s",
enable ? "enable" : "disable", if_name(rd->rd_ifp));
DTRACE_SKYWALK1(advisory__already__set, if_redirect_t, rd);
RD_UNLOCK(rd);
return ENXIO;
}
if (!rd->rd_delegate_set) {
RDLOG_ERR("delegate is not set on %s", if_name(rd->rd_ifp));
DTRACE_SKYWALK1(no__delegate, if_redirect_t, rd);
RD_UNLOCK(rd);
return ENXIO;
}
redirect_delegate_adv_config(rd->rd_delegate_ifp, enable);
rd->rd_intf_adv_enabled = enable;
RD_UNLOCK(rd);
return 0;
}
static errno_t
fill_capab_interface_advisory(if_redirect_t rd, void *contents,
uint32_t *len)
{
struct kern_nexus_capab_interface_advisory * __single capab = contents;
if (*len != sizeof(*capab)) {
DTRACE_SKYWALK2(invalid__len, uint32_t, *len, size_t, sizeof(*capab));
return EINVAL;
}
if (capab->kncia_version !=
KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1) {
DTRACE_SKYWALK2(invalid__ver, uint32_t, capab->kncia_version,
uint32_t, KERN_NEXUS_CAPAB_INTERFACE_ADVISORY_VERSION_1);
return EINVAL;
}
VERIFY(capab->kncia_notify != NULL);
rd->rd_intf_adv_kern_ctx = capab->kncia_kern_context;
rd->rd_intf_adv_notify = capab->kncia_notify;
capab->kncia_provider_context = rd;
capab->kncia_config = redirect_nx_intf_adv_config;
return 0;
}
static errno_t
redirect_nx_capab_config(kern_nexus_provider_t nxprov, kern_nexus_t nx,
kern_nexus_capab_t capab, void *contents, uint32_t *len)
{
#pragma unused(nxprov)
errno_t error;
if_redirect_t rd;
rd = redirect_nexus_context(nx);
switch (capab) {
case KERN_NEXUS_CAPAB_INTERFACE_ADVISORY:
error = fill_capab_interface_advisory(rd, contents, len);
break;
default:
error = ENOTSUP;
break;
}
return error;
}
static errno_t
create_netif_provider_and_instance(if_redirect_t rd,
struct ifnet_init_eparams *init_params, ifnet_t *ifp,
uuid_t *provider, uuid_t *instance)
{
errno_t err = 0;
nexus_controller_t controller = kern_nexus_shared_controller();
struct kern_nexus_net_init net_init = {};
nexus_name_t provider_name = {};
nexus_attr_t __single nexus_attr = NULL;
struct kern_nexus_provider_init prov_init = {
.nxpi_version = KERN_NEXUS_DOMAIN_PROVIDER_CURRENT_VERSION,
.nxpi_flags = NXPIF_VIRTUAL_DEVICE,
.nxpi_pre_connect = redirect_nx_pre_connect,
.nxpi_connected = redirect_nx_connected,
.nxpi_pre_disconnect = redirect_nx_pre_disconnect,
.nxpi_disconnected = redirect_nx_disconnected,
.nxpi_ring_init = redirect_nx_ring_init,
.nxpi_ring_fini = redirect_nx_ring_fini,
.nxpi_slot_init = redirect_nx_slot_init,
.nxpi_slot_fini = redirect_nx_slot_fini,
.nxpi_sync_tx = redirect_nx_sync_tx,
.nxpi_sync_rx = redirect_nx_sync_rx,
.nxpi_tx_doorbell = redirect_nx_tx_doorbell,
.nxpi_config_capab = redirect_nx_capab_config,
};
err = kern_nexus_attr_create(&nexus_attr);
if (err != 0) {
RDLOG_ERR("%s nexus attribution creation failed, error: %d",
rd->rd_name, err);
DTRACE_SKYWALK2(attr__create__failed, if_redirect_t, rd, int, err);
goto failed;
}
snprintf((char *)provider_name, sizeof(provider_name),
"com.apple.netif.%s", rd->rd_name);
err = kern_nexus_controller_register_provider(controller,
redirect_nx_dom_prov,
provider_name,
&prov_init,
sizeof(prov_init),
nexus_attr,
provider);
if (err != 0) {
RDLOG_ERR("%s register provider failed, error %d", rd->rd_name, err);
DTRACE_SKYWALK2(register__failed, if_redirect_t, rd, int, err);
goto failed;
}
net_init.nxneti_version = KERN_NEXUS_NET_CURRENT_VERSION;
net_init.nxneti_flags = 0;
net_init.nxneti_eparams = init_params;
net_init.nxneti_lladdr = NULL;
net_init.nxneti_prepare = redirect_netif_prepare;
net_init.nxneti_rx_pbufpool = rd->rd_pp;
net_init.nxneti_tx_pbufpool = rd->rd_pp;
err = kern_nexus_controller_alloc_net_provider_instance(controller,
*provider, rd, NULL, instance, &net_init, ifp);
if (err != 0) {
RDLOG_ERR("%s alloc net provider instance failed %d", rd->rd_name, err);
DTRACE_SKYWALK2(alloc__provider__instance__failed, if_redirect_t, rd, int, err);
kern_nexus_controller_deregister_provider(controller, *provider);
uuid_clear(*provider);
goto failed;
}
failed:
if (nexus_attr != NULL) {
kern_nexus_attr_destroy(nexus_attr);
}
return err;
}
static errno_t
redirect_attach_netif_nexus(if_redirect_t rd,
struct ifnet_init_eparams *init_params, ifnet_t *ifp)
{
errno_t error = 0;
redirect_nx_t nx = &rd->rd_nx;
error = redirect_packet_pool_make(rd);
if (error != 0) {
RDLOG_ERR("%s packet pool make failed: %d", rd->rd_name, error);
DTRACE_SKYWALK2(pool__make__failed, if_redirect_t, rd, int, error);
return error;
}
return create_netif_provider_and_instance(rd, init_params, ifp,
&nx->rnx_provider, &nx->rnx_instance);
}
static void
detach_provider_and_instance(uuid_t provider, uuid_t instance)
{
nexus_controller_t controller = kern_nexus_shared_controller();
errno_t err;
if (!uuid_is_null(instance)) {
err = kern_nexus_controller_free_provider_instance(controller,
instance);
if (err != 0) {
RDLOG_ERR("free_provider_instance failed %d", err);
}
uuid_clear(instance);
}
if (!uuid_is_null(provider)) {
err = kern_nexus_controller_deregister_provider(controller,
provider);
if (err != 0) {
RDLOG_ERR("deregister_provider failed %d", err);
}
uuid_clear(provider);
}
return;
}
static void
redirect_detach_netif_nexus(if_redirect_t rd)
{
redirect_nx_t rnx = &rd->rd_nx;
detach_provider_and_instance(rnx->rnx_provider, rnx->rnx_instance);
}
static void
interface_link_event(ifnet_t ifp, uint32_t event_code)
{
struct event {
uint32_t ifnet_family;
uint32_t unit;
char if_name[IFNAMSIZ];
};
_Alignas(struct kern_event_msg) char message[sizeof(struct kern_event_msg) + sizeof(struct event)] = { 0 };
struct kern_event_msg *header = (struct kern_event_msg *)message;
struct event *data = (struct event *)(message + KEV_MSG_HEADER_SIZE);
header->total_size = sizeof(message);
header->vendor_code = KEV_VENDOR_APPLE;
header->kev_class = KEV_NETWORK_CLASS;
header->kev_subclass = KEV_DL_SUBCLASS;
header->event_code = event_code;
data->ifnet_family = ifnet_family(ifp);
data->unit = (uint32_t)ifnet_unit(ifp);
strlcpy(data->if_name, ifnet_name(ifp), IFNAMSIZ);
ifnet_event(ifp, header);
}
static if_redirect_t
ifnet_get_if_redirect(ifnet_t ifp)
{
return (if_redirect_t)ifnet_softc(ifp);
}
static int
redirect_clone_create(struct if_clone *ifc, uint32_t unit, void *param)
{
int error;
if_redirect_t rd;
struct ifnet_init_eparams rd_init;
struct if_redirect_create_params params;
user_addr_t param_addr = (user_addr_t)param;
ifnet_t __single ifp;
if (param_addr == USER_ADDR_NULL) {
RDLOG_ERR("create params not specified");
DTRACE_SKYWALK2(no__param, struct if_clone *, ifc, uint32_t, unit);
return EINVAL;
}
error = copyin(param_addr, ¶ms, sizeof(params));
if (error != 0) {
RDLOG_ERR("copyin failed: error %d", error);
DTRACE_SKYWALK1(copyin__failed, int, error);
return error;
}
if ((params.ircp_type != RD_CREATE_PARAMS_TYPE &&
params.ircp_type != RD_CREATE_PARAMS_TYPE_NOATTACH) ||
params.ircp_len != sizeof(params)) {
RDLOG_ERR("invalid type(0x%x) or len(0x%d)", params.ircp_type,
params.ircp_len);
DTRACE_SKYWALK2(invalid__params, uint16_t, params.ircp_type,
uint16_t, params.ircp_len);
return EINVAL;
}
if (params.ircp_ftype != IFRTYPE_FAMILY_ETHERNET &&
params.ircp_ftype != IFRTYPE_FAMILY_CELLULAR) {
RDLOG_ERR("functional type(0x%x) not supported", params.ircp_ftype);
DTRACE_SKYWALK1(invalid__ftype, uint32_t, params.ircp_ftype);
return ENOTSUP;
}
rd = kalloc_type(if_redirect, Z_WAITOK | Z_ZERO | Z_NOFAIL);
RD_LOCK_INIT(rd);
rd->rd_ftype = params.ircp_ftype;
rd->rd_retain_count = 1;
rd->rd_max_mtu = RD_MAX_MTU;
/* use the interface name as the unique id for ifp recycle */
if ((unsigned int)
snprintf(rd->rd_name, sizeof(rd->rd_name), "%s%d",
ifc->ifc_name, unit) >= sizeof(rd->rd_name)) {
redirect_release(rd);
RDLOG_ERR("invalid ifc_name(%s) or unit(%d)", ifc->ifc_name, unit);
DTRACE_SKYWALK2(invalid__name__or__unit, char *, ifc->ifc_name,
uint32_t, unit);
return EINVAL;
}
bzero(&rd_init, sizeof(rd_init));
rd_init.ver = IFNET_INIT_CURRENT_VERSION;
rd_init.len = sizeof(rd_init);
rd_init.flags |= (IFNET_INIT_SKYWALK_NATIVE | IFNET_INIT_IF_ADV);
if (params.ircp_type == RD_CREATE_PARAMS_TYPE_NOATTACH) {
rd_init.flags |= IFNET_INIT_NX_NOAUTO;
}
rd_init.uniqueid_len = (uint32_t)strbuflen(rd->rd_name);
rd_init.uniqueid = rd->rd_name;
rd_init.name = __unsafe_null_terminated_from_indexable(ifc->ifc_name);
rd_init.unit = unit;
rd_init.softc = rd;
rd_init.ioctl = redirect_ioctl;
rd_init.detach = redirect_if_free;
rd_init.subfamily = IFNET_SUBFAMILY_REDIRECT;
if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
rd_init.family = IFNET_FAMILY_ETHERNET;
rd_init.type = IFT_ETHER;
rd_init.demux = ether_demux;
rd_init.add_proto = ether_add_proto;
rd_init.del_proto = ether_del_proto;
rd_init.check_multi = ether_check_multi;
rd_init.framer_extended = ether_frameout_extended;
rd_init.broadcast_addr = etherbroadcastaddr;
rd_init.broadcast_len = ETHER_ADDR_LEN;
} else {
rd_init.family = IFNET_FAMILY_CELLULAR;
rd_init.type = IFT_CELLULAR;
rd_init.demux = redirect_demux;
rd_init.add_proto = redirect_add_proto;
rd_init.del_proto = redirect_del_proto;
}
error = redirect_attach_netif_nexus(rd, &rd_init, &ifp);
if (error != 0) {
redirect_release(rd);
RDLOG_ERR("attach netif nexus failed: error %d", error);
DTRACE_SKYWALK1(attach__nexus__failed, int, error);
return error;
}
/* take an additional reference for nexus controller */
redirect_retain(rd);
rd->rd_ifp = ifp;
if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
/* mac address will be set after delegate is configured */
(void) ifnet_set_lladdr(ifp, default_mac, ETHER_ADDR_LEN);
bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header));
} else {
bpfattach(ifp, DLT_RAW, 0);
}
return 0;
}
/*
* This function is meant for cleaning up everything, not just delegate
* related info.
*/
static void
redirect_cleanup(if_redirect_t rd)
{
redirect_clear_delegate(rd);
rd->rd_intf_adv_enabled = false;
}
static int
redirect_clone_destroy(ifnet_t ifp)
{
if_redirect_t rd;
rd = ifnet_get_if_redirect(ifp);
if (rd == NULL) {
RDLOG_ERR("rd is NULL");
DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
return ENXIO;
}
RD_LOCK(rd);
if (rd->rd_detaching) {
RDLOG_ERR("%s is detaching", rd->rd_name);
DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
RD_UNLOCK(rd);
return 0;
}
rd->rd_detaching = TRUE;
RD_UNLOCK(rd);
redirect_cleanup(rd);
redirect_detach_netif_nexus(rd);
/*
* Releasing reference held for nexus controller
*/
redirect_release(rd);
interface_link_event(ifp, KEV_DL_LINK_OFF);
ifnet_detach(ifp);
return 0;
}
static int
if_redirect_request_copyin(user_addr_t user_addr,
struct if_redirect_request *ifrr, uint64_t len)
{
int error;
if (user_addr == USER_ADDR_NULL || len < sizeof(*ifrr)) {
RDLOG_ERR("user_addr(0x%llx) or len(%llu) < %lu",
user_addr, len, sizeof(*ifrr));
error = EINVAL;
goto done;
}
error = copyin(user_addr, ifrr, sizeof(*ifrr));
if (error != 0) {
RDLOG_ERR("copyin failed: %d", error);
goto done;
}
if (ifrr->ifrr_reserved[0] != 0 || ifrr->ifrr_reserved[1] != 0 ||
ifrr->ifrr_reserved[2] != 0 || ifrr->ifrr_reserved[3] != 0) {
RDLOG_ERR("reserved[0]=0x%llu, reserved[1]=0x%llu"
"reserved[2]=0x%llu, reserved[3]=0x%llu", ifrr->ifrr_reserved[0],
ifrr->ifrr_reserved[1], ifrr->ifrr_reserved[2],
ifrr->ifrr_reserved[3]);
error = EINVAL;
goto done;
}
done:
return error;
}
static void
redirect_detach_notify(void *arg)
{
if_redirect_t __single rd = arg;
redirect_clear_delegate(rd);
}
static int
redirect_set_delegate(if_redirect_t rd, ifnet_t delegate_ifp)
{
ifnet_t ifp = rd->rd_ifp;
int error;
RD_LOCK(rd);
if (rd->rd_detaching) {
RDLOG_ERR("%s is detaching", rd->rd_name);
DTRACE_SKYWALK2(detaching, if_redirect_t, rd, ifnet_t, delegate_ifp);
RD_UNLOCK(rd);
return ENXIO;
}
if (rd->rd_delegate_ifp != NULL) {
if (rd->rd_delegate_ifp == delegate_ifp) {
RDLOG_ERR("cannot configure the same delegate");
DTRACE_SKYWALK2(same__ifp, if_redirect_t, rd,
ifnet_t, delegate_ifp);
RD_UNLOCK(rd);
return EALREADY;
} else {
redirect_clear_delegate_locked(rd);
}
}
ASSERT(rd->rd_delegate_ifp == NULL);
if (!ifnet_is_attached(ifp, 1)) {
RDLOG_ERR("failed to get self reference");
DTRACE_SKYWALK2(ifp__detaching, if_redirect_t, rd, ifnet_t, ifp);
error = ENXIO;
goto fail;
}
ASSERT(!rd->rd_self_ref);
rd->rd_self_ref = TRUE;
/* This saves the reference taken above */
error = ifnet_set_delegate_parent(delegate_ifp, ifp);
if (error != 0) {
RDLOG_ERR("failed to set delegate parent");
DTRACE_SKYWALK4(set__delegate__parent__failed, if_redirect_t, rd,
ifnet_t, delegate_ifp, ifnet_t, ifp, int, error);
goto fail;
}
ASSERT(!rd->rd_delegate_parent_set);
rd->rd_delegate_parent_set = TRUE;
if (!ifnet_is_attached(delegate_ifp, 1)) {
RDLOG_ERR("failed to get delegate reference");
DTRACE_SKYWALK2(delegate__detaching, if_redirect_t, rd,
ifnet_t, delegate_ifp);
error = ENXIO;
goto fail;
}
ASSERT(rd->rd_delegate_ifp == NULL);
rd->rd_delegate_ifp = delegate_ifp;
ASSERT(!rd->rd_delegate_ref);
rd->rd_delegate_ref = TRUE;
error = ifnet_set_flowswitch_rx_callback(delegate_ifp, redirect_rx_cb, rd);
if (error != 0) {
RDLOG_ERR("failed to set fsw rx callback: %d", error);
DTRACE_SKYWALK3(set__fsw__rx__cb__fail, if_redirect_t, rd, ifnet_t,
delegate_ifp, int, error);
goto fail;
}
ASSERT(!rd->rd_fsw_rx_cb_set);
rd->rd_fsw_rx_cb_set = TRUE;
error = ifnet_set_delegate(ifp, delegate_ifp);
if (error != 0) {
RDLOG_ERR("failed to set delegate ifp: %d", error);
DTRACE_SKYWALK4(set__delegate__fail, if_redirect_t, rd, ifnet_t, ifp,
ifnet_t, delegate_ifp, int, error);
goto fail;
}
ASSERT(!rd->rd_delegate_set);
rd->rd_delegate_set = TRUE;
if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
uint8_t mac_addr[ETHER_ADDR_LEN];
error = ifnet_lladdr_copy_bytes(delegate_ifp, mac_addr,
ETHER_ADDR_LEN);
if (error != 0) {
RDLOG_ERR("failed to get mac addr from %s, error %d",
if_name(delegate_ifp), error);
DTRACE_SKYWALK3(lladdr__copy__fail, if_redirect_t, rd,
ifnet_t, delegate_ifp, int, error);
goto fail;
}
error = ifnet_set_lladdr(ifp, mac_addr, ETHER_ADDR_LEN);
if (error != 0) {
RDLOG_ERR("failed to set mac addr for %s, error %d",
if_name(ifp), error);
DTRACE_SKYWALK3(set__lladdr__fail, if_redirect_t, rd,
ifnet_t, ifp, int, error);
goto fail;
}
ASSERT(!rd->rd_mac_addr_set);
rd->rd_mac_addr_set = TRUE;
}
/*
* This is enabled out-of-band from redirect_set_delegate() but we should do
* this here in case we move to a different delegate.
*/
if (rd->rd_intf_adv_enabled) {
redirect_delegate_adv_config(delegate_ifp, true);
}
ifnet_set_detach_notify(delegate_ifp, redirect_detach_notify, rd);
rd->rd_detach_notify_set = TRUE;
/*
* Check that the delegate is still attached. If not, the detach notify above
* could've been missed and we would have to cleanup everything here.
*/
if (!ifnet_is_attached(delegate_ifp, 0)) {
RDLOG_ERR("delegate %s detached during setup", if_name(delegate_ifp));
DTRACE_SKYWALK2(delegate__detached, if_redirect_t, rd,
ifnet_t, delegate_ifp);
error = ENXIO;
goto fail;
}
RD_UNLOCK(rd);
return 0;
fail:
redirect_clear_delegate_locked(rd);
RD_UNLOCK(rd);
return error;
}
static void
redirect_clear_delegate_locked(if_redirect_t rd)
{
ifnet_t ifp = rd->rd_ifp;
ifnet_t delegate_ifp = rd->rd_delegate_ifp;
int error;
if (rd->rd_detach_notify_set) {
ASSERT(delegate_ifp != NULL);
ifnet_set_detach_notify(delegate_ifp, NULL, NULL);
rd->rd_detach_notify_set = FALSE;
}
if (rd->rd_intf_adv_enabled && delegate_ifp != NULL) {
redirect_delegate_adv_config(delegate_ifp, false);
/*
* We don't clear rd_intf_adv_enabled because we want to reenable
* advisory after moving to a different delegate.
*/
}
if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET && rd->rd_mac_addr_set) {
ASSERT(delegate_ifp != NULL);
error = ifnet_set_lladdr(ifp, default_mac, ETHER_ADDR_LEN);
if (error != 0) {
RDLOG_ERR("failed to set mac addr for %s, error %d",
if_name(ifp), error);
DTRACE_SKYWALK3(set__lladdr__fail, if_redirect_t, rd,
ifnet_t, ifp, int, error);
}
rd->rd_mac_addr_set = FALSE;
}
if (rd->rd_delegate_set) {
ASSERT(delegate_ifp != NULL);
(void) ifnet_set_delegate(ifp, NULL);
rd->rd_delegate_set = FALSE;
}
if (rd->rd_fsw_rx_cb_set) {
ASSERT(delegate_ifp != NULL);
(void) ifnet_set_flowswitch_rx_callback(delegate_ifp, NULL, NULL);
rd->rd_fsw_rx_cb_set = FALSE;
}
if (rd->rd_delegate_ref) {
ASSERT(delegate_ifp != NULL);
rd->rd_delegate_ifp = NULL;
ifnet_decr_iorefcnt(delegate_ifp);
rd->rd_delegate_ref = FALSE;
}
if (rd->rd_delegate_parent_set) {
ASSERT(delegate_ifp != NULL);
ifnet_set_delegate_parent(delegate_ifp, NULL);
rd->rd_delegate_parent_set = FALSE;
}
if (rd->rd_self_ref) {
ifnet_decr_iorefcnt(ifp);
rd->rd_self_ref = FALSE;
}
}
static void
redirect_clear_delegate(if_redirect_t rd)
{
RD_LOCK(rd);
redirect_clear_delegate_locked(rd);
RD_UNLOCK(rd);
}
static int
redirect_ioctl_set_delegate(ifnet_t ifp, user_addr_t user_addr, uint64_t len)
{
if_redirect_t rd = NULL;
struct if_redirect_request ifrr;
ifnet_t delegate_ifp = NULL;
int error;
error = if_redirect_request_copyin(user_addr, &ifrr, len);
if (error != 0) {
RDLOG_ERR("if_redirect_request_copyin failed: error %d", error);
DTRACE_SKYWALK4(copyin__failed, ifnet_t, ifp, user_addr_t, user_addr,
uint64_t, len, int, error);
goto done;
}
if (ifrr.ifrr_delegate_name[0] == '\0') {
RDLOG_ERR("NULL delegate name");
DTRACE_SKYWALK1(null__delegate, ifnet_t, ifp);
error = EINVAL;
goto done;
}
/* ensure null termination */
ifrr.ifrr_delegate_name[IFNAMSIZ - 1] = '\0';
delegate_ifp = ifunit_ref(__unsafe_null_terminated_from_indexable(ifrr.ifrr_delegate_name));
if (delegate_ifp == NULL) {
RDLOG_ERR("delegate %s not found", ifrr.ifrr_delegate_name);
DTRACE_SKYWALK2(invalid__name, ifnet_t, ifp, char *,
ifrr.ifrr_delegate_name);
error = ENOENT;
goto done;
}
rd = ifnet_get_if_redirect(ifp);
if (rd == NULL) {
RDLOG_ERR("rd is NULL");
DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
error = ENOENT;
goto done;
}
/* Verify that the delegate type is supported */
if (rd->rd_ftype == IFRTYPE_FAMILY_ETHERNET) {
if (delegate_ifp->if_family != IFNET_FAMILY_ETHERNET) {
RDLOG_ERR("%s's family %d not compatible "
"with ethernet functional type", if_name(delegate_ifp),
delegate_ifp->if_family);
DTRACE_SKYWALK2(delegate__incompatible__ether, if_redirect_t, rd,
ifnet_t, delegate_ifp);
error = EINVAL;
goto done;
}
if (ifnet_is_low_latency(delegate_ifp)) {
RDLOG_ERR("low latency %s cannot be a delegate",
if_name(delegate_ifp));
DTRACE_SKYWALK2(delegate__is__ll, if_redirect_t, rd,
ifnet_t, delegate_ifp);
error = EINVAL;
goto done;
}
} else {
ASSERT(rd->rd_ftype == IFRTYPE_FAMILY_CELLULAR);
if (delegate_ifp->if_family != IFNET_FAMILY_CELLULAR &&
delegate_ifp->if_family != IFNET_FAMILY_UTUN &&
delegate_ifp->if_family != IFNET_FAMILY_IPSEC) {
RDLOG_ERR("%s's family %d not compatible "
"with cellular functional type", if_name(delegate_ifp),
delegate_ifp->if_family);
DTRACE_SKYWALK2(delegate__incompatible__cell, if_redirect_t, rd,
ifnet_t, delegate_ifp);
error = EINVAL;
goto done;
}
}
if (delegate_ifp->if_subfamily == IFNET_SUBFAMILY_REDIRECT) {
RDLOG_ERR("delegate %s cannot be redirect", if_name(delegate_ifp));
DTRACE_SKYWALK2(delegate__is__redirect, if_redirect_t, rd,
ifnet_t, delegate_ifp);
error = EINVAL;
goto done;
}
error = redirect_set_delegate(rd, delegate_ifp);
done:
if (delegate_ifp != NULL) {
ifnet_decr_iorefcnt(delegate_ifp);
}
return error;
}
static int
redirect_set_drvspec(ifnet_t ifp, uint64_t cmd, uint64_t len,
user_addr_t user_addr)
{
int error;
switch (cmd) {
case RD_S_CMD_SET_DELEGATE:
error = redirect_ioctl_set_delegate(ifp, user_addr, len);
break;
default:
error = EOPNOTSUPP;
break;
}
return error;
}
static int
redirect_get_drvspec(ifnet_t ifp, uint64_t cmd, uint64_t len,
user_addr_t user_addr)
{
#pragma unused(ifp, cmd, len, user_addr)
return 0;
}
union ifdrvu {
struct ifdrv32 *ifdrvu_32;
struct ifdrv64 *ifdrvu_64;
void *ifdrvu_p;
};
static errno_t
redirect_ioctl(ifnet_t ifp, u_long cmd, void *data)
{
if_redirect_t rd = NULL;
struct ifreq *ifr = NULL;
union ifdrvu drv;
uint64_t drv_cmd;
uint64_t drv_len;
boolean_t drv_set_command = FALSE;
user_addr_t user_addr;
int error = 0;
rd = ifnet_get_if_redirect(ifp);
if (rd == NULL) {
RDLOG_ERR("rd is NULL");
DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
return ENXIO;
}
RD_LOCK(rd);
if (rd->rd_detaching) {
RDLOG_ERR("%s is detaching", rd->rd_name);
DTRACE_SKYWALK1(detaching, if_redirect_t, rd);
RD_UNLOCK(rd);
return ENXIO;
}
RD_UNLOCK(rd);
ifr = (struct ifreq *)data;
switch (cmd) {
case SIOCSIFADDR:
ifnet_set_flags(ifp, IFF_UP, IFF_UP);
break;
case SIOCGIFMEDIA32:
case SIOCGIFMEDIA64: {
struct ifmediareq32 *ifmr;
RD_LOCK(rd);
if (rd->rd_ftype != IFRTYPE_FAMILY_ETHERNET) {
DTRACE_SKYWALK1(not__ether, if_redirect_t, rd);
RD_UNLOCK(rd);
return EOPNOTSUPP;
}
ifmr = (struct ifmediareq32 *)data;
ifmr->ifm_current = IFM_ETHER;
ifmr->ifm_mask = 0;
ifmr->ifm_status = (IFM_AVALID | IFM_ACTIVE);
ifmr->ifm_active = IFM_ETHER;
ifmr->ifm_count = 1;
user_addr = (cmd == SIOCGIFMEDIA64) ?
((struct ifmediareq64 *)data)->ifmu_ulist :
CAST_USER_ADDR_T(((struct ifmediareq32 *)data)->ifmu_ulist);
if (user_addr != USER_ADDR_NULL) {
error = copyout(&ifmr->ifm_current, user_addr, sizeof(int));
}
RD_UNLOCK(rd);
break;
}
case SIOCGIFDEVMTU: {
struct ifdevmtu *devmtu_p;
devmtu_p = &ifr->ifr_devmtu;
devmtu_p->ifdm_current = ifnet_mtu(ifp);
devmtu_p->ifdm_max = redirect_max_mtu(ifp);
devmtu_p->ifdm_min = IF_MINMTU;
break;
}
case SIOCSIFMTU:
if ((unsigned int)ifr->ifr_mtu > redirect_max_mtu(ifp) ||
ifr->ifr_mtu < IF_MINMTU) {
error = EINVAL;
} else {
error = ifnet_set_mtu(ifp, ifr->ifr_mtu);
}
break;
case SIOCSIFFLAGS:
if ((ifp->if_flags & IFF_UP) != 0) {
/* marked up, set running if not already set */
if ((ifp->if_flags & IFF_RUNNING) == 0) {
/* set running */
error = ifnet_set_flags(ifp, IFF_RUNNING,
IFF_RUNNING);
}
} else if ((ifp->if_flags & IFF_RUNNING) != 0) {
/* marked down, clear running */
error = ifnet_set_flags(ifp, 0, IFF_RUNNING);
}
break;
case SIOCSDRVSPEC32:
case SIOCSDRVSPEC64:
error = proc_suser(current_proc());
if (error != 0) {
break;
}
drv_set_command = TRUE;
OS_FALLTHROUGH;
case SIOCGDRVSPEC32:
case SIOCGDRVSPEC64:
drv.ifdrvu_p = data;
if (cmd == SIOCGDRVSPEC32 || cmd == SIOCSDRVSPEC32) {
drv_cmd = drv.ifdrvu_32->ifd_cmd;
drv_len = drv.ifdrvu_32->ifd_len;
user_addr = CAST_USER_ADDR_T(drv.ifdrvu_32->ifd_data);
} else {
drv_cmd = drv.ifdrvu_64->ifd_cmd;
drv_len = drv.ifdrvu_64->ifd_len;
user_addr = drv.ifdrvu_64->ifd_data;
}
if (drv_set_command) {
error = redirect_set_drvspec(ifp, drv_cmd, drv_len,
user_addr);
} else {
error = redirect_get_drvspec(ifp, drv_cmd, drv_len,
user_addr);
}
break;
case SIOCADDMULTI:
case SIOCDELMULTI:
error = 0;
break;
default:
error = EOPNOTSUPP;
break;
}
return error;
}
static void
redirect_if_free(ifnet_t ifp)
{
if_redirect_t rd = NULL;
if (ifp == NULL) {
RDLOG_ERR("ifp is NULL");
DTRACE_SKYWALK(null__ifp);
return;
}
rd = ifnet_get_if_redirect(ifp);
if (rd == NULL) {
RDLOG_ERR("rd is NULL");
DTRACE_SKYWALK1(null__rd, ifnet_t, ifp);
return;
}
RD_LOCK(rd);
ifp->if_softc = NULL;
VERIFY(rd->rd_doorbell_tcall == NULL);
RD_UNLOCK(rd);
redirect_release(rd);
ifnet_release(ifp);
return;
}
/*
* Network interface functions
*/
static errno_t
redirect_demux(__unused ifnet_t ifp, mbuf_t data, __unused char *frame_header,
protocol_family_t *protocol)
{
struct ip *ip;
u_int ip_version;
while (data != NULL && mbuf_len(data) < 1) {
data = mbuf_next(data);
}
if (data == NULL) {
RDLOG_DBG("data is NULL");
DTRACE_SKYWALK(null__data);
return ENOENT;
}
ip = mtod(data, struct ip *);
ip_version = ip->ip_v;
switch (ip_version) {
case 4:
*protocol = PF_INET;
return 0;
case 6:
*protocol = PF_INET6;
return 0;
default:
*protocol = PF_UNSPEC;
break;
}
return 0;
}
static errno_t
redirect_add_proto(__unused ifnet_t interface, protocol_family_t protocol,
__unused const struct ifnet_demux_desc *demux_array,
__unused uint32_t demux_count)
{
switch (protocol) {
case PF_INET:
return 0;
case PF_INET6:
return 0;
default:
break;
}
return ENOPROTOOPT;
}
static errno_t
redirect_del_proto(__unused ifnet_t interface,
__unused protocol_family_t protocol)
{
return 0;
}
__private_extern__ void
if_redirect_init(void)
{
int error;
redirect_log_handle = os_log_create("com.apple.xnu.net.redirect", "redirect");
(void)redirect_register_nexus_domain_provider();
error = if_clone_attach(&redirect_cloner);
if (error != 0) {
return;
}
return;
}