This is xnu-12377.1.9. See this file in:
/*
 * Copyright (c) 1999-2025 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#include "net/if_var.h"
#include <net/dlil_var_private.h>


LCK_ATTR_DECLARE(dlil_lck_attributes, 0, 0);

LCK_GRP_DECLARE(dlil_lock_group, "DLIL internal locks");
LCK_GRP_DECLARE(ifnet_lock_group, "ifnet locks");
LCK_GRP_DECLARE(ifnet_head_lock_group, "ifnet head lock");
LCK_GRP_DECLARE(ifnet_snd_lock_group, "ifnet snd locks");
LCK_GRP_DECLARE(ifnet_rcv_lock_group, "ifnet rcv locks");

LCK_ATTR_DECLARE(ifnet_lock_attr, 0, 0);
LCK_RW_DECLARE_ATTR(ifnet_head_lock, &ifnet_head_lock_group,
    &dlil_lck_attributes);
LCK_MTX_DECLARE_ATTR(dlil_ifnet_lock, &dlil_lock_group,
    &dlil_lck_attributes);


LCK_MTX_DECLARE_ATTR(dlil_thread_sync_lock, &dlil_lock_group,
    &dlil_lck_attributes);

uint32_t dlil_pending_thread_cnt = 0;


/*
 * Forward declarations.
 */
__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
__private_extern__ void if_rtproto_del(struct ifnet *ifp, int protocol);

/*
 * Utility routines
 */
kern_return_t
dlil_affinity_set(struct thread *tp, u_int32_t tag)
{
	thread_affinity_policy_data_t policy;

	bzero(&policy, sizeof(policy));
	policy.affinity_tag = tag;
	return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
	           (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
}

void
dlil_incr_pending_thread_count(void)
{
	LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
	lck_mtx_lock(&dlil_thread_sync_lock);
	dlil_pending_thread_cnt++;
	lck_mtx_unlock(&dlil_thread_sync_lock);
}

void
dlil_decr_pending_thread_count(void)
{
	LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
	lck_mtx_lock(&dlil_thread_sync_lock);
	VERIFY(dlil_pending_thread_cnt > 0);
	dlil_pending_thread_cnt--;
	if (dlil_pending_thread_cnt == 0) {
		wakeup(&dlil_pending_thread_cnt);
	}
	lck_mtx_unlock(&dlil_thread_sync_lock);
}

boolean_t
packet_has_vlan_tag(struct mbuf * m)
{
	u_int   tag = 0;

	if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
		tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
		if (tag == 0) {
			/* the packet is just priority-tagged, clear the bit */
			m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
		}
	}
	return tag != 0;
}

/*
 * Monitor functions.
 */
void
if_flt_monitor_busy(struct ifnet *ifp)
{
	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);

	++ifp->if_flt_busy;
	VERIFY(ifp->if_flt_busy != 0);
}

void
if_flt_monitor_unbusy(struct ifnet *ifp)
{
	if_flt_monitor_leave(ifp);
}

void
if_flt_monitor_enter(struct ifnet *ifp)
{
	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);

	while (ifp->if_flt_busy) {
		++ifp->if_flt_waiters;
		(void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
		    (PZERO - 1), "if_flt_monitor", NULL);
	}
	if_flt_monitor_busy(ifp);
}

void
if_flt_monitor_leave(struct ifnet *ifp)
{
	LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);

	VERIFY(ifp->if_flt_busy != 0);
	--ifp->if_flt_busy;

	if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
		ifp->if_flt_waiters = 0;
		wakeup(&ifp->if_flt_head);
	}
}


struct dlil_ifnet *
dlif_ifnet_alloc(void)
{
	return kalloc_type(struct dlil_ifnet, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}

void
dlif_ifnet_free(struct dlil_ifnet *ifnet)
{
	if (ifnet != NULL) {
		kfree_type(struct dlil_ifnet, ifnet);
	}
}

struct ifnet_filter *
dlif_filt_alloc(void)
{
	return kalloc_type(struct ifnet_filter, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}

void
dlif_filt_free(struct ifnet_filter *filt)
{
	if (filt != NULL) {
		kfree_type(struct ifnet_filter, filt);
	}
}

struct if_proto *
dlif_proto_alloc(void)
{
	return kalloc_type(struct if_proto, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}

void
dlif_proto_free(struct if_proto *ifproto)
{
	if (ifproto != NULL) {
		kfree_type(struct if_proto, ifproto);
	}
}

struct tcpstat_local *
dlif_tcpstat_alloc(void)
{
	return kalloc_type(struct tcpstat_local, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}

void
dlif_tcpstat_free(struct tcpstat_local *if_tcp_stat)
{
	if (if_tcp_stat != NULL) {
		kfree_type(struct tcpstat_local, if_tcp_stat);
	}
}

struct udpstat_local *
dlif_udpstat_alloc(void)
{
	return kalloc_type(struct udpstat_local, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}

void
dlif_udpstat_free(struct udpstat_local *if_udp_stat)
{
	if (if_udp_stat != NULL) {
		kfree_type(struct udpstat_local, if_udp_stat);
	}
}

struct ifaddr *
dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
{
	struct ifaddr *ifa, *oifa = NULL;
	struct sockaddr_dl *addr_sdl, *mask_sdl;
	char workbuf[IFNAMSIZ * 2];
	int namelen, masklen, socksize;
	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;

	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
	VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);

	namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
	    if_name(ifp));
	masklen = offsetof(struct sockaddr_dl, sdl_data[0])
	    + ((namelen > 0) ? namelen : 0);
	socksize = masklen + ifp->if_addrlen;
#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
	if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
		socksize = sizeof(struct sockaddr_dl);
	}
	socksize = ROUNDUP(socksize);
#undef ROUNDUP

	ifa = ifp->if_lladdr;
	if (socksize > DLIL_SDLMAXLEN ||
	    (ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
		/*
		 * Rare, but in the event that the link address requires
		 * more storage space than DLIL_SDLMAXLEN, allocate the
		 * largest possible storages for address and mask, such
		 * that we can reuse the same space when if_addrlen grows.
		 * This same space will be used when if_addrlen shrinks.
		 */
		struct dl_if_lladdr_xtra_space *__single dl_if_lladdr_ext;

		if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
			dl_if_lladdr_ext = zalloc_permanent(
				sizeof(*dl_if_lladdr_ext), ZALIGN(struct ifaddr));

			ifa = &dl_if_lladdr_ext->ifa;
			ifa_lock_init(ifa);
			ifa_initref(ifa);
			/* Don't set IFD_ALLOC, as this is permanent */
			ifa->ifa_debug = IFD_LINK;
		} else {
			dl_if_lladdr_ext = __unsafe_forge_single(
				struct dl_if_lladdr_xtra_space*, ifa);
			ifa = &dl_if_lladdr_ext->ifa;
		}

		IFA_LOCK(ifa);
		/* address and mask sockaddr_dl locations */
		bzero(dl_if_lladdr_ext->addr_sdl_bytes,
		    sizeof(dl_if_lladdr_ext->addr_sdl_bytes));
		bzero(dl_if_lladdr_ext->mask_sdl_bytes,
		    sizeof(dl_if_lladdr_ext->mask_sdl_bytes));
		addr_sdl = SDL(dl_if_lladdr_ext->addr_sdl_bytes);
		mask_sdl = SDL(dl_if_lladdr_ext->mask_sdl_bytes);
	} else {
		VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
		/*
		 * Use the storage areas for address and mask within the
		 * dlil_ifnet structure.  This is the most common case.
		 */
		if (ifa == NULL) {
			ifa = &dl_if->dl_if_lladdr.ifa;
			ifa_lock_init(ifa);
			ifa_initref(ifa);
			/* Don't set IFD_ALLOC, as this is permanent */
			ifa->ifa_debug = IFD_LINK;
		}
		IFA_LOCK(ifa);
		/* address and mask sockaddr_dl locations */
		bzero(dl_if->dl_if_lladdr.addr_sdl_bytes,
		    sizeof(dl_if->dl_if_lladdr.addr_sdl_bytes));
		bzero(dl_if->dl_if_lladdr.mask_sdl_bytes,
		    sizeof(dl_if->dl_if_lladdr.mask_sdl_bytes));
		addr_sdl = SDL(dl_if->dl_if_lladdr.addr_sdl_bytes);
		mask_sdl = SDL(dl_if->dl_if_lladdr.mask_sdl_bytes);
	}

	if (ifp->if_lladdr != ifa) {
		oifa = ifp->if_lladdr;
		ifp->if_lladdr = ifa;
	}

	VERIFY(ifa->ifa_debug == IFD_LINK);
	ifa->ifa_ifp = ifp;
	ifa->ifa_rtrequest = link_rtrequest;
	ifa->ifa_addr = SA(addr_sdl);
	addr_sdl->sdl_len = (u_char)socksize;
	addr_sdl->sdl_family = AF_LINK;
	if (namelen > 0) {
		bcopy(workbuf, addr_sdl->sdl_data, min(namelen,
		    sizeof(addr_sdl->sdl_data)));
		addr_sdl->sdl_nlen = (u_char)namelen;
	} else {
		addr_sdl->sdl_nlen = 0;
	}
	addr_sdl->sdl_index = ifp->if_index;
	addr_sdl->sdl_type = ifp->if_type;
	if (ll_addr != NULL) {
		addr_sdl->sdl_alen = ll_addr->sdl_alen;
		bcopy(CONST_LLADDR(ll_addr), LLADDR(addr_sdl), addr_sdl->sdl_alen);
	} else {
		addr_sdl->sdl_alen = 0;
	}
	ifa->ifa_netmask = SA(mask_sdl);
	mask_sdl->sdl_len = (u_char)masklen;
	while (namelen > 0) {
		mask_sdl->sdl_data[--namelen] = 0xff;
	}
	IFA_UNLOCK(ifa);

	if (oifa != NULL) {
		ifa_remref(oifa);
	}

	return ifa;
}


__private_extern__ int
dlil_alloc_local_stats(struct ifnet *ifp)
{
	int ret = EINVAL;

	if (ifp == NULL) {
		goto end;
	}

	if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
		ifp->if_tcp_stat = dlif_tcpstat_alloc();
		ifp->if_udp_stat = dlif_udpstat_alloc();

		VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
		    IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));

		ret = 0;
	}

	if (ifp->if_ipv4_stat == NULL) {
		ifp->if_ipv4_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
	}

	if (ifp->if_ipv6_stat == NULL) {
		ifp->if_ipv6_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
	}
end:
	if (ifp != NULL && ret != 0) {
		if (ifp->if_tcp_stat != NULL) {
			dlif_tcpstat_free(ifp->if_tcp_stat);
			ifp->if_tcp_stat = NULL;
		}
		if (ifp->if_udp_stat != NULL) {
			dlif_udpstat_free(ifp->if_udp_stat);
			ifp->if_udp_stat = NULL;
		}
		/* The macro kfree_type sets the passed pointer to NULL */
		if (ifp->if_ipv4_stat != NULL) {
			kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv4_stat);
		}
		if (ifp->if_ipv6_stat != NULL) {
			kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv6_stat);
		}
	}

	return ret;
}

errno_t
dlil_if_ref(struct ifnet *ifp)
{
	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;

	if (dl_if == NULL) {
		return EINVAL;
	}

	lck_mtx_lock_spin(&dl_if->dl_if_lock);
	++dl_if->dl_if_refcnt;
	if (dl_if->dl_if_refcnt == 0) {
		panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
		/* NOTREACHED */
	}
	lck_mtx_unlock(&dl_if->dl_if_lock);

	return 0;
}

errno_t
dlil_if_free(struct ifnet *ifp)
{
	struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
	bool need_release = FALSE;

	if (dl_if == NULL) {
		return EINVAL;
	}

	lck_mtx_lock_spin(&dl_if->dl_if_lock);
	switch (dl_if->dl_if_refcnt) {
	case 0:
		panic("%s: negative refcnt for ifp=%p", __func__, ifp);
		/* NOTREACHED */
		break;
	case 1:
		if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
			need_release = TRUE;
		}
		break;
	default:
		break;
	}
	--dl_if->dl_if_refcnt;
	lck_mtx_unlock(&dl_if->dl_if_lock);
	if (need_release) {
		_dlil_if_release(ifp, true);
	}
	return 0;
}

void
_dlil_if_release(ifnet_t ifp, bool clear_in_use)
{
	struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;

	VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
	if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
		VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
	}

	ifnet_lock_exclusive(ifp);
	kfree_data_counted_by(ifp->if_broadcast.ptr, ifp->if_broadcast.length);
	lck_mtx_lock(&dlifp->dl_if_lock);
	/* Copy the if name to the dedicated storage */
	ifp->if_name = tsnprintf(dlifp->dl_if_namestorage, sizeof(dlifp->dl_if_namestorage),
	    "%s", ifp->if_name);
	/* Reset external name (name + unit) */
	ifp->if_xname = tsnprintf(dlifp->dl_if_xnamestorage, sizeof(dlifp->dl_if_xnamestorage),
	    "%s?", ifp->if_name);
	if (clear_in_use) {
		ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
		dlifp->dl_if_flags &= ~DLIF_INUSE;
	}
	lck_mtx_unlock(&dlifp->dl_if_lock);
	ifnet_lock_done(ifp);
}

__private_extern__ void
dlil_if_release(ifnet_t ifp)
{
	_dlil_if_release(ifp, false);
}

void
if_proto_ref(struct if_proto *proto)
{
	os_atomic_inc(&proto->refcount, relaxed);
}

void
if_proto_free(struct if_proto *proto)
{
	u_int32_t oldval;
	struct ifnet *ifp = proto->ifp;
	u_int32_t proto_family = proto->protocol_family;
	struct kev_dl_proto_data ev_pr_data;

	oldval = os_atomic_dec_orig(&proto->refcount, relaxed);
	if (oldval > 1) {
		return;
	}

	if (proto->proto_kpi == kProtoKPI_v1) {
		if (proto->kpi.v1.detached) {
			proto->kpi.v1.detached(ifp, proto->protocol_family);
		}
	}
	if (proto->proto_kpi == kProtoKPI_v2) {
		if (proto->kpi.v2.detached) {
			proto->kpi.v2.detached(ifp, proto->protocol_family);
		}
	}

	/*
	 * Cleanup routes that may still be in the routing table for that
	 * interface/protocol pair.
	 */
	if_rtproto_del(ifp, proto_family);

	ifnet_lock_shared(ifp);

	/* No more reference on this, protocol must have been detached */
	VERIFY(proto->detached);

	/*
	 * The reserved field carries the number of protocol still attached
	 * (subject to change)
	 */
	ev_pr_data.proto_family = proto_family;
	ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);

	ifnet_lock_done(ifp);

	dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
	    (struct net_event_data *)&ev_pr_data,
	    sizeof(struct kev_dl_proto_data), FALSE);

	if (ev_pr_data.proto_remaining_count == 0) {
		/*
		 * The protocol count has gone to zero, mark the interface down.
		 * This used to be done by configd.KernelEventMonitor, but that
		 * is inherently prone to races (rdar://problem/30810208).
		 */
		(void) ifnet_set_flags(ifp, 0, IFF_UP);
		(void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
		dlil_post_sifflags_msg(ifp);
	}

	dlif_proto_free(proto);
}

__private_extern__ u_int32_t
dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list __counted_by(list_count),
    u_int32_t list_count)
{
	u_int32_t       count = 0;
	int             i;

	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);

	if (ifp->if_proto_hash == NULL) {
		goto done;
	}

	for (i = 0; i < PROTO_HASH_SLOTS; i++) {
		if_proto_ref_t proto;
		SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
			if (list != NULL && count < list_count) {
				list[count] = proto->protocol_family;
			}
			count++;
		}
	}
done:
	return count;
}

__private_extern__ u_int32_t
if_get_protolist(struct ifnet * ifp, u_int32_t *__counted_by(count) protolist, u_int32_t count)
{
	u_int32_t actual_count;
	ifnet_lock_shared(ifp);
	actual_count = dlil_ifp_protolist(ifp, protolist, count);
	ifnet_lock_done(ifp);
	return actual_count;
}

__private_extern__ void
if_free_protolist(u_int32_t *list)
{
	kfree_data_addr(list);
}

boolean_t
dlil_is_native_netif_nexus(ifnet_t ifp)
{
	return (ifp->if_eflags & IFEF_SKYWALK_NATIVE) && ifp->if_na != NULL;
}


/*
 * Caller must already be holding ifnet lock.
 */
struct if_proto *
find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
{
	struct if_proto *proto = NULL;
	u_int32_t i = proto_hash_value(protocol_family);

	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);

	if (ifp->if_proto_hash != NULL) {
		proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
	}

	while (proto != NULL && proto->protocol_family != protocol_family) {
		proto = SLIST_NEXT(proto, next_hash);
	}

	if (proto != NULL) {
		if_proto_ref(proto);
	}

	return proto;
}

/*
 * Clat routines.
 */

/*
 * This routine checks if the destination address is not a loopback, link-local,
 * multicast or broadcast address.
 */
int
dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
{
	int ret = 0;
	switch (proto_family) {
	case PF_INET: {
		struct ip *iph = mtod(m, struct ip *);
		if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
			ret = 1;
		}
		break;
	}
	case PF_INET6: {
		struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
		if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
		    CLAT64_NEEDED(&ip6h->ip6_dst)) {
			ret = 1;
		}
		break;
	}
	}

	return ret;
}

/*
 * @brief This routine translates IPv4 packet to IPv6 packet,
 *     updates protocol checksum and also translates ICMP for code
 *     along with inner header translation.
 *
 * @param ifp Pointer to the interface
 * @param proto_family pointer to protocol family. It is updated if function
 *     performs the translation successfully.
 * @param m Pointer to the pointer pointing to the packet. Needed because this
 *     routine can end up changing the mbuf to a different one.
 *
 * @return 0 on success or else a negative value.
 */
errno_t
dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
{
	VERIFY(*proto_family == PF_INET);
	VERIFY(IS_INTF_CLAT46(ifp));

	pbuf_t pbuf_store, *pbuf = NULL;
	struct ip *iph = NULL;
	struct in_addr osrc, odst;
	uint8_t proto = 0;
	struct in6_addr src_storage = {};
	struct in6_addr *src = NULL;
	struct sockaddr_in6 dstsock = {};
	int error = 0;
	uint16_t off = 0;
	uint16_t tot_len = 0;
	uint16_t ip_id_val = 0;
	uint16_t ip_frag_off = 0;

	boolean_t is_frag = FALSE;
	boolean_t is_first_frag = TRUE;
	boolean_t is_last_frag = TRUE;

	/*
	 * Ensure that the incoming mbuf chain contains a valid
	 * IPv4 header in contiguous memory, or exit early.
	 */
	if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip) ||
	    ((size_t)(*m)->m_len < sizeof(struct ip) &&
	    (*m = m_pullup(*m, sizeof(struct ip))) == NULL)) {
		ip6stat.ip6s_clat464_in_tooshort_drop++;
		return -1;
	}

	iph = mtod(*m, struct ip *);
	osrc = iph->ip_src;
	odst = iph->ip_dst;
	proto = iph->ip_p;
	off = (uint16_t)(iph->ip_hl << 2);
	ip_id_val = iph->ip_id;
	ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;

	tot_len = ntohs(iph->ip_len);

	/* Validate that mbuf contains IP payload equal to `iph->ip_len' */
	if ((size_t)(*m)->m_pkthdr.len < tot_len) {
		ip6stat.ip6s_clat464_in_tooshort_drop++;
		return -1;
	}

	pbuf_init_mbuf(&pbuf_store, *m, ifp);
	pbuf = &pbuf_store;

	/*
	 * For packets that are not first frags
	 * we only need to adjust CSUM.
	 * For 4 to 6, Fragmentation header gets appended
	 * after proto translation.
	 */
	if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
		is_frag = TRUE;

		/* If the offset is not zero, it is not first frag */
		if (ip_frag_off != 0) {
			is_first_frag = FALSE;
		}

		/* If IP_MF is set, then it is not last frag */
		if (ntohs(iph->ip_off) & IP_MF) {
			is_last_frag = FALSE;
		}
	}

	/*
	 * Translate IPv4 destination to IPv6 destination by using the
	 * prefixes learned through prior PLAT discovery.
	 */
	if ((error = nat464_synthesize_ipv6(ifp, &odst, &dstsock.sin6_addr)) != 0) {
		ip6stat.ip6s_clat464_out_v6synthfail_drop++;
		goto cleanup;
	}

	dstsock.sin6_len = sizeof(struct sockaddr_in6);
	dstsock.sin6_family = AF_INET6;

	/*
	 * Retrive the local IPv6 CLAT46 address reserved for stateless
	 * translation.
	 */
	src = in6_selectsrc_core(&dstsock, 0, ifp, 0, &src_storage, NULL, &error,
	    NULL, NULL, TRUE);

	if (src == NULL) {
		ip6stat.ip6s_clat464_out_nov6addr_drop++;
		error = -1;
		goto cleanup;
	}

	/*
	 * Translate the IP header part first.
	 * NOTE: `nat464_translate_46' handles the situation where the value
	 * `off' is past the end of the mbuf chain that is associated with
	 * the pbuf, in a graceful manner.
	 */
	error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
	    iph->ip_ttl, src_storage, dstsock.sin6_addr, tot_len) == NT_NAT64) ? 0 : -1;

	iph = NULL;     /* Invalidate iph as pbuf has been modified */

	if (error != 0) {
		ip6stat.ip6s_clat464_out_46transfail_drop++;
		goto cleanup;
	}

	/*
	 * Translate protocol header, update checksum, checksum flags
	 * and related fields.
	 */
	error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
	    proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;

	if (error != 0) {
		ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
		goto cleanup;
	}

	/* Now insert the IPv6 fragment header */
	if (is_frag) {
		error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);

		if (error != 0) {
			ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
			goto cleanup;
		}
	}

cleanup:
	if (pbuf_is_valid(pbuf)) {
		*m = pbuf->pb_mbuf;
		pbuf->pb_mbuf = NULL;
		pbuf_destroy(pbuf);
	} else {
		error = -1;
		*m = NULL;
		ip6stat.ip6s_clat464_out_invalpbuf_drop++;
	}

	if (error == 0) {
		*proto_family = PF_INET6;
		ip6stat.ip6s_clat464_out_success++;
	}

	return error;
}

/*
 * @brief This routine translates incoming IPv6 to IPv4 packet,
 *     updates protocol checksum and also translates ICMPv6 outer
 *     and inner headers
 *
 * @return 0 on success or else a negative value.
 */
errno_t
dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
{
	VERIFY(*proto_family == PF_INET6);
	VERIFY(IS_INTF_CLAT46(ifp));

	struct ip6_hdr *ip6h = NULL;
	struct in6_addr osrc, odst;
	uint8_t proto = 0;
	struct in6_ifaddr *ia6_clat_dst = NULL;
	struct in_ifaddr *ia4_clat_dst = NULL;
	struct in_addr *dst = NULL;
	struct in_addr src;
	int error = 0;
	uint32_t off = 0;
	u_int64_t tot_len = 0;
	uint8_t tos = 0;
	boolean_t is_first_frag = TRUE;

	/*
	 * Ensure that the incoming mbuf chain contains a valid
	 * IPv6 header in contiguous memory, or exit early.
	 */
	if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
	    ((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
	    (*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
		ip6stat.ip6s_clat464_in_tooshort_drop++;
		return -1;
	}

	ip6h = mtod(*m, struct ip6_hdr *);
	/* Validate that mbuf contains IP payload equal to ip6_plen  */
	if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
		ip6stat.ip6s_clat464_in_tooshort_drop++;
		return -1;
	}

	osrc = ip6h->ip6_src;
	odst = ip6h->ip6_dst;

	/*
	 * Retrieve the local CLAT46 reserved IPv6 address.
	 * Let the packet pass if we don't find one, as the flag
	 * may get set before IPv6 configuration has taken place.
	 */
	ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
	if (ia6_clat_dst == NULL) {
		goto done;
	}

	/*
	 * Check if the original dest in the packet is same as the reserved
	 * CLAT46 IPv6 address
	 */
	if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
		bool translate = false;
		pbuf_t pbuf_store, *pbuf = NULL;
		pbuf_init_mbuf(&pbuf_store, *m, ifp);
		pbuf = &pbuf_store;

		/*
		 * Retrieve the local CLAT46 IPv4 address reserved for stateless
		 * translation.
		 */
		ia4_clat_dst = inifa_ifpclatv4(ifp);
		if (ia4_clat_dst == NULL) {
			ifa_remref(&ia6_clat_dst->ia_ifa);
			ip6stat.ip6s_clat464_in_nov4addr_drop++;
			error = -1;
			goto cleanup;
		}
		ifa_remref(&ia6_clat_dst->ia_ifa);

		/* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
		dst = &ia4_clat_dst->ia_addr.sin_addr;
		error = nat464_synthesize_ipv4(ifp, &osrc, &src, &translate);
		if (error != 0) {
			ip6stat.ip6s_clat464_in_v4synthfail_drop++;
			error = -1;
			goto cleanup;
		}
		if (!translate) {
			/* no translation required */
			if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
				/* only allow icmpv6 */
				ip6stat.ip6s_clat464_in_v4synthfail_drop++;
				error = -1;
			}
			goto cleanup;
		}

		ip6h = pbuf->pb_data;
		off = sizeof(struct ip6_hdr);
		proto = ip6h->ip6_nxt;
		tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
		tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);

		/*
		 * Translate the IP header and update the fragmentation
		 * header if needed
		 */
		error = (nat464_translate_64(pbuf, off, tos, &proto,
		    ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
		    0 : -1;

		ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */

		if (error != 0) {
			ip6stat.ip6s_clat464_in_64transfail_drop++;
			goto cleanup;
		}

		/*
		 * Translate protocol header, update checksum, checksum flags
		 * and related fields.
		 */
		error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
		    (struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
		    NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;

		if (error != 0) {
			ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
			goto cleanup;
		}

cleanup:
		if (ia4_clat_dst != NULL) {
			ifa_remref(&ia4_clat_dst->ia_ifa);
		}

		if (pbuf_is_valid(pbuf)) {
			*m = pbuf->pb_mbuf;
			pbuf->pb_mbuf = NULL;
			pbuf_destroy(pbuf);
		} else {
			error = -1;
			ip6stat.ip6s_clat464_in_invalpbuf_drop++;
		}

		if (error == 0 && translate) {
			*proto_family = PF_INET;
			ip6stat.ip6s_clat464_in_success++;
		}
	} /* CLAT traffic */

done:
	return error;
}

/*
 * Thread management
 */
void
dlil_clean_threading_info(struct dlil_threading_info *inp)
{
	lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
	lck_grp_free(inp->dlth_lock_grp);
	inp->dlth_lock_grp = NULL;

	inp->dlth_flags = 0;
	inp->dlth_wtot = 0;
	bzero(inp->dlth_name_storage, sizeof(inp->dlth_name_storage));
	inp->dlth_name = NULL;
	inp->dlth_ifp = NULL;
	VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
	qlimit(&inp->dlth_pkts) = 0;
	bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));

	VERIFY(!inp->dlth_affinity);
	inp->dlth_thread = THREAD_NULL;
	inp->dlth_strategy = NULL;
	VERIFY(inp->dlth_driver_thread == THREAD_NULL);
	VERIFY(inp->dlth_poller_thread == THREAD_NULL);
	VERIFY(inp->dlth_affinity_tag == 0);
#if IFNET_INPUT_SANITY_CHK
	inp->dlth_pkts_cnt = 0;
#endif /* IFNET_INPUT_SANITY_CHK */
}

/*
 * Lock management
 */
static errno_t
_dlil_get_lock_assertion_type(ifnet_lock_assert_t what, unsigned int *type)
{
	switch (what) {
	case IFNET_LCK_ASSERT_EXCLUSIVE:
		*type = LCK_RW_ASSERT_EXCLUSIVE;
		return 0;

	case IFNET_LCK_ASSERT_SHARED:
		*type = LCK_RW_ASSERT_SHARED;
		return 0;

	case IFNET_LCK_ASSERT_OWNED:
		*type = LCK_RW_ASSERT_HELD;
		return 0;

	case IFNET_LCK_ASSERT_NOTOWNED:
		/* nothing to do here for RW lock; bypass assert */
		return ENOENT;

	default:
		panic("bad ifnet assert type: %d", what);
		/* NOTREACHED */
	}
}

__private_extern__ void
dlil_if_lock(void)
{
	lck_mtx_lock(&dlil_ifnet_lock);
}

__private_extern__ void
dlil_if_unlock(void)
{
	lck_mtx_unlock(&dlil_ifnet_lock);
}

__private_extern__ void
dlil_if_lock_assert(void)
{
	LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
}

__private_extern__ void
ifnet_head_lock_assert(ifnet_lock_assert_t what)
{
	unsigned int type = 0;

	if (_dlil_get_lock_assertion_type(what, &type) == 0) {
		LCK_RW_ASSERT(&ifnet_head_lock, type);
	}
}

__private_extern__ void
ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
{
#if !MACH_ASSERT
#pragma unused(ifp)
#endif
	unsigned int type = 0;

	if (_dlil_get_lock_assertion_type(what, &type) == 0) {
		LCK_RW_ASSERT(&ifp->if_lock, type);
	}
}

__private_extern__ void
ifnet_lock_shared(struct ifnet *ifp)
{
	lck_rw_lock_shared(&ifp->if_lock);
}

__private_extern__ void
ifnet_lock_exclusive(struct ifnet *ifp)
{
	lck_rw_lock_exclusive(&ifp->if_lock);
}

__private_extern__ void
ifnet_lock_done(struct ifnet *ifp)
{
	lck_rw_done(&ifp->if_lock);
}

#if INET
__private_extern__ void
if_inetdata_lock_shared(struct ifnet *ifp)
{
	lck_rw_lock_shared(&ifp->if_inetdata_lock);
}

__private_extern__ void
if_inetdata_lock_exclusive(struct ifnet *ifp)
{
	lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
}

__private_extern__ void
if_inetdata_lock_done(struct ifnet *ifp)
{
	lck_rw_done(&ifp->if_inetdata_lock);
}
#endif /* INET */

__private_extern__ void
if_inet6data_lock_shared(struct ifnet *ifp)
{
	lck_rw_lock_shared(&ifp->if_inet6data_lock);
}

__private_extern__ void
if_inet6data_lock_exclusive(struct ifnet *ifp)
{
	lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
}

__private_extern__ void
if_inet6data_lock_done(struct ifnet *ifp)
{
	lck_rw_done(&ifp->if_inet6data_lock);
}

__private_extern__ void
ifnet_head_lock_shared(void)
{
	lck_rw_lock_shared(&ifnet_head_lock);
}

__private_extern__ void
ifnet_head_lock_exclusive(void)
{
	lck_rw_lock_exclusive(&ifnet_head_lock);
}

__private_extern__ void
ifnet_head_done(void)
{
	lck_rw_done(&ifnet_head_lock);
}

__private_extern__ void
ifnet_head_assert_exclusive(void)
{
	LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
}

static errno_t
if_mcasts_update_common(struct ifnet * ifp, bool sync)
{
	errno_t err;

	if (sync) {
		err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
		if (err == EAFNOSUPPORT) {
			err = 0;
		}
	} else {
		ifnet_ioctl_async(ifp, SIOCADDMULTI);
		err = 0;
	}
	DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
	    "(err=%d)\n", if_name(ifp),
	    (err == 0 ? "successfully restored" : "failed to restore"),
	    ifp->if_updatemcasts, err);

	/* just return success */
	return 0;
}

errno_t
if_mcasts_update_async(struct ifnet *ifp)
{
	return if_mcasts_update_common(ifp, false);
}

errno_t
if_mcasts_update(struct ifnet *ifp)
{
	return if_mcasts_update_common(ifp, true);
}