This is xnu-12377.1.9. See this file in:
/*
* Copyright (c) 1999-2025 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include "net/if_var.h"
#include <net/dlil_var_private.h>
LCK_ATTR_DECLARE(dlil_lck_attributes, 0, 0);
LCK_GRP_DECLARE(dlil_lock_group, "DLIL internal locks");
LCK_GRP_DECLARE(ifnet_lock_group, "ifnet locks");
LCK_GRP_DECLARE(ifnet_head_lock_group, "ifnet head lock");
LCK_GRP_DECLARE(ifnet_snd_lock_group, "ifnet snd locks");
LCK_GRP_DECLARE(ifnet_rcv_lock_group, "ifnet rcv locks");
LCK_ATTR_DECLARE(ifnet_lock_attr, 0, 0);
LCK_RW_DECLARE_ATTR(ifnet_head_lock, &ifnet_head_lock_group,
&dlil_lck_attributes);
LCK_MTX_DECLARE_ATTR(dlil_ifnet_lock, &dlil_lock_group,
&dlil_lck_attributes);
LCK_MTX_DECLARE_ATTR(dlil_thread_sync_lock, &dlil_lock_group,
&dlil_lck_attributes);
uint32_t dlil_pending_thread_cnt = 0;
/*
* Forward declarations.
*/
__private_extern__ void link_rtrequest(int, struct rtentry *, struct sockaddr *);
__private_extern__ void if_rtproto_del(struct ifnet *ifp, int protocol);
/*
* Utility routines
*/
kern_return_t
dlil_affinity_set(struct thread *tp, u_int32_t tag)
{
thread_affinity_policy_data_t policy;
bzero(&policy, sizeof(policy));
policy.affinity_tag = tag;
return thread_policy_set(tp, THREAD_AFFINITY_POLICY,
(thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT);
}
void
dlil_incr_pending_thread_count(void)
{
LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(&dlil_thread_sync_lock);
dlil_pending_thread_cnt++;
lck_mtx_unlock(&dlil_thread_sync_lock);
}
void
dlil_decr_pending_thread_count(void)
{
LCK_MTX_ASSERT(&dlil_thread_sync_lock, LCK_MTX_ASSERT_NOTOWNED);
lck_mtx_lock(&dlil_thread_sync_lock);
VERIFY(dlil_pending_thread_cnt > 0);
dlil_pending_thread_cnt--;
if (dlil_pending_thread_cnt == 0) {
wakeup(&dlil_pending_thread_cnt);
}
lck_mtx_unlock(&dlil_thread_sync_lock);
}
boolean_t
packet_has_vlan_tag(struct mbuf * m)
{
u_int tag = 0;
if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
tag = EVL_VLANOFTAG(m->m_pkthdr.vlan_tag);
if (tag == 0) {
/* the packet is just priority-tagged, clear the bit */
m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
}
}
return tag != 0;
}
/*
* Monitor functions.
*/
void
if_flt_monitor_busy(struct ifnet *ifp)
{
LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
++ifp->if_flt_busy;
VERIFY(ifp->if_flt_busy != 0);
}
void
if_flt_monitor_unbusy(struct ifnet *ifp)
{
if_flt_monitor_leave(ifp);
}
void
if_flt_monitor_enter(struct ifnet *ifp)
{
LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
while (ifp->if_flt_busy) {
++ifp->if_flt_waiters;
(void) msleep(&ifp->if_flt_head, &ifp->if_flt_lock,
(PZERO - 1), "if_flt_monitor", NULL);
}
if_flt_monitor_busy(ifp);
}
void
if_flt_monitor_leave(struct ifnet *ifp)
{
LCK_MTX_ASSERT(&ifp->if_flt_lock, LCK_MTX_ASSERT_OWNED);
VERIFY(ifp->if_flt_busy != 0);
--ifp->if_flt_busy;
if (ifp->if_flt_busy == 0 && ifp->if_flt_waiters > 0) {
ifp->if_flt_waiters = 0;
wakeup(&ifp->if_flt_head);
}
}
struct dlil_ifnet *
dlif_ifnet_alloc(void)
{
return kalloc_type(struct dlil_ifnet, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}
void
dlif_ifnet_free(struct dlil_ifnet *ifnet)
{
if (ifnet != NULL) {
kfree_type(struct dlil_ifnet, ifnet);
}
}
struct ifnet_filter *
dlif_filt_alloc(void)
{
return kalloc_type(struct ifnet_filter, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}
void
dlif_filt_free(struct ifnet_filter *filt)
{
if (filt != NULL) {
kfree_type(struct ifnet_filter, filt);
}
}
struct if_proto *
dlif_proto_alloc(void)
{
return kalloc_type(struct if_proto, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}
void
dlif_proto_free(struct if_proto *ifproto)
{
if (ifproto != NULL) {
kfree_type(struct if_proto, ifproto);
}
}
struct tcpstat_local *
dlif_tcpstat_alloc(void)
{
return kalloc_type(struct tcpstat_local, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}
void
dlif_tcpstat_free(struct tcpstat_local *if_tcp_stat)
{
if (if_tcp_stat != NULL) {
kfree_type(struct tcpstat_local, if_tcp_stat);
}
}
struct udpstat_local *
dlif_udpstat_alloc(void)
{
return kalloc_type(struct udpstat_local, Z_WAITOK | Z_ZERO | Z_NOFAIL);
}
void
dlif_udpstat_free(struct udpstat_local *if_udp_stat)
{
if (if_udp_stat != NULL) {
kfree_type(struct udpstat_local, if_udp_stat);
}
}
struct ifaddr *
dlil_alloc_lladdr(struct ifnet *ifp, const struct sockaddr_dl *ll_addr)
{
struct ifaddr *ifa, *oifa = NULL;
struct sockaddr_dl *addr_sdl, *mask_sdl;
char workbuf[IFNAMSIZ * 2];
int namelen, masklen, socksize;
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_EXCLUSIVE);
VERIFY(ll_addr == NULL || ll_addr->sdl_alen == ifp->if_addrlen);
namelen = scnprintf(workbuf, sizeof(workbuf), "%s",
if_name(ifp));
masklen = offsetof(struct sockaddr_dl, sdl_data[0])
+ ((namelen > 0) ? namelen : 0);
socksize = masklen + ifp->if_addrlen;
#define ROUNDUP(a) (1 + (((a) - 1) | (sizeof (u_int32_t) - 1)))
if ((u_int32_t)socksize < sizeof(struct sockaddr_dl)) {
socksize = sizeof(struct sockaddr_dl);
}
socksize = ROUNDUP(socksize);
#undef ROUNDUP
ifa = ifp->if_lladdr;
if (socksize > DLIL_SDLMAXLEN ||
(ifa != NULL && ifa != &dl_if->dl_if_lladdr.ifa)) {
/*
* Rare, but in the event that the link address requires
* more storage space than DLIL_SDLMAXLEN, allocate the
* largest possible storages for address and mask, such
* that we can reuse the same space when if_addrlen grows.
* This same space will be used when if_addrlen shrinks.
*/
struct dl_if_lladdr_xtra_space *__single dl_if_lladdr_ext;
if (ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa) {
dl_if_lladdr_ext = zalloc_permanent(
sizeof(*dl_if_lladdr_ext), ZALIGN(struct ifaddr));
ifa = &dl_if_lladdr_ext->ifa;
ifa_lock_init(ifa);
ifa_initref(ifa);
/* Don't set IFD_ALLOC, as this is permanent */
ifa->ifa_debug = IFD_LINK;
} else {
dl_if_lladdr_ext = __unsafe_forge_single(
struct dl_if_lladdr_xtra_space*, ifa);
ifa = &dl_if_lladdr_ext->ifa;
}
IFA_LOCK(ifa);
/* address and mask sockaddr_dl locations */
bzero(dl_if_lladdr_ext->addr_sdl_bytes,
sizeof(dl_if_lladdr_ext->addr_sdl_bytes));
bzero(dl_if_lladdr_ext->mask_sdl_bytes,
sizeof(dl_if_lladdr_ext->mask_sdl_bytes));
addr_sdl = SDL(dl_if_lladdr_ext->addr_sdl_bytes);
mask_sdl = SDL(dl_if_lladdr_ext->mask_sdl_bytes);
} else {
VERIFY(ifa == NULL || ifa == &dl_if->dl_if_lladdr.ifa);
/*
* Use the storage areas for address and mask within the
* dlil_ifnet structure. This is the most common case.
*/
if (ifa == NULL) {
ifa = &dl_if->dl_if_lladdr.ifa;
ifa_lock_init(ifa);
ifa_initref(ifa);
/* Don't set IFD_ALLOC, as this is permanent */
ifa->ifa_debug = IFD_LINK;
}
IFA_LOCK(ifa);
/* address and mask sockaddr_dl locations */
bzero(dl_if->dl_if_lladdr.addr_sdl_bytes,
sizeof(dl_if->dl_if_lladdr.addr_sdl_bytes));
bzero(dl_if->dl_if_lladdr.mask_sdl_bytes,
sizeof(dl_if->dl_if_lladdr.mask_sdl_bytes));
addr_sdl = SDL(dl_if->dl_if_lladdr.addr_sdl_bytes);
mask_sdl = SDL(dl_if->dl_if_lladdr.mask_sdl_bytes);
}
if (ifp->if_lladdr != ifa) {
oifa = ifp->if_lladdr;
ifp->if_lladdr = ifa;
}
VERIFY(ifa->ifa_debug == IFD_LINK);
ifa->ifa_ifp = ifp;
ifa->ifa_rtrequest = link_rtrequest;
ifa->ifa_addr = SA(addr_sdl);
addr_sdl->sdl_len = (u_char)socksize;
addr_sdl->sdl_family = AF_LINK;
if (namelen > 0) {
bcopy(workbuf, addr_sdl->sdl_data, min(namelen,
sizeof(addr_sdl->sdl_data)));
addr_sdl->sdl_nlen = (u_char)namelen;
} else {
addr_sdl->sdl_nlen = 0;
}
addr_sdl->sdl_index = ifp->if_index;
addr_sdl->sdl_type = ifp->if_type;
if (ll_addr != NULL) {
addr_sdl->sdl_alen = ll_addr->sdl_alen;
bcopy(CONST_LLADDR(ll_addr), LLADDR(addr_sdl), addr_sdl->sdl_alen);
} else {
addr_sdl->sdl_alen = 0;
}
ifa->ifa_netmask = SA(mask_sdl);
mask_sdl->sdl_len = (u_char)masklen;
while (namelen > 0) {
mask_sdl->sdl_data[--namelen] = 0xff;
}
IFA_UNLOCK(ifa);
if (oifa != NULL) {
ifa_remref(oifa);
}
return ifa;
}
__private_extern__ int
dlil_alloc_local_stats(struct ifnet *ifp)
{
int ret = EINVAL;
if (ifp == NULL) {
goto end;
}
if (ifp->if_tcp_stat == NULL && ifp->if_udp_stat == NULL) {
ifp->if_tcp_stat = dlif_tcpstat_alloc();
ifp->if_udp_stat = dlif_udpstat_alloc();
VERIFY(IS_P2ALIGNED(ifp->if_tcp_stat, sizeof(u_int64_t)) &&
IS_P2ALIGNED(ifp->if_udp_stat, sizeof(u_int64_t)));
ret = 0;
}
if (ifp->if_ipv4_stat == NULL) {
ifp->if_ipv4_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
}
if (ifp->if_ipv6_stat == NULL) {
ifp->if_ipv6_stat = kalloc_type(struct if_tcp_ecn_stat, Z_WAITOK | Z_ZERO);
}
end:
if (ifp != NULL && ret != 0) {
if (ifp->if_tcp_stat != NULL) {
dlif_tcpstat_free(ifp->if_tcp_stat);
ifp->if_tcp_stat = NULL;
}
if (ifp->if_udp_stat != NULL) {
dlif_udpstat_free(ifp->if_udp_stat);
ifp->if_udp_stat = NULL;
}
/* The macro kfree_type sets the passed pointer to NULL */
if (ifp->if_ipv4_stat != NULL) {
kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv4_stat);
}
if (ifp->if_ipv6_stat != NULL) {
kfree_type(struct if_tcp_ecn_stat, ifp->if_ipv6_stat);
}
}
return ret;
}
errno_t
dlil_if_ref(struct ifnet *ifp)
{
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
if (dl_if == NULL) {
return EINVAL;
}
lck_mtx_lock_spin(&dl_if->dl_if_lock);
++dl_if->dl_if_refcnt;
if (dl_if->dl_if_refcnt == 0) {
panic("%s: wraparound refcnt for ifp=%p", __func__, ifp);
/* NOTREACHED */
}
lck_mtx_unlock(&dl_if->dl_if_lock);
return 0;
}
errno_t
dlil_if_free(struct ifnet *ifp)
{
struct dlil_ifnet *dl_if = (struct dlil_ifnet *)ifp;
bool need_release = FALSE;
if (dl_if == NULL) {
return EINVAL;
}
lck_mtx_lock_spin(&dl_if->dl_if_lock);
switch (dl_if->dl_if_refcnt) {
case 0:
panic("%s: negative refcnt for ifp=%p", __func__, ifp);
/* NOTREACHED */
break;
case 1:
if ((ifp->if_refflags & IFRF_EMBRYONIC) != 0) {
need_release = TRUE;
}
break;
default:
break;
}
--dl_if->dl_if_refcnt;
lck_mtx_unlock(&dl_if->dl_if_lock);
if (need_release) {
_dlil_if_release(ifp, true);
}
return 0;
}
void
_dlil_if_release(ifnet_t ifp, bool clear_in_use)
{
struct dlil_ifnet *dlifp = (struct dlil_ifnet *)ifp;
VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_count) > 0);
if (!(ifp->if_xflags & IFXF_ALLOC_KPI)) {
VERIFY(OSDecrementAtomic64(&net_api_stats.nas_ifnet_alloc_os_count) > 0);
}
ifnet_lock_exclusive(ifp);
kfree_data_counted_by(ifp->if_broadcast.ptr, ifp->if_broadcast.length);
lck_mtx_lock(&dlifp->dl_if_lock);
/* Copy the if name to the dedicated storage */
ifp->if_name = tsnprintf(dlifp->dl_if_namestorage, sizeof(dlifp->dl_if_namestorage),
"%s", ifp->if_name);
/* Reset external name (name + unit) */
ifp->if_xname = tsnprintf(dlifp->dl_if_xnamestorage, sizeof(dlifp->dl_if_xnamestorage),
"%s?", ifp->if_name);
if (clear_in_use) {
ASSERT((dlifp->dl_if_flags & DLIF_INUSE) != 0);
dlifp->dl_if_flags &= ~DLIF_INUSE;
}
lck_mtx_unlock(&dlifp->dl_if_lock);
ifnet_lock_done(ifp);
}
__private_extern__ void
dlil_if_release(ifnet_t ifp)
{
_dlil_if_release(ifp, false);
}
void
if_proto_ref(struct if_proto *proto)
{
os_atomic_inc(&proto->refcount, relaxed);
}
void
if_proto_free(struct if_proto *proto)
{
u_int32_t oldval;
struct ifnet *ifp = proto->ifp;
u_int32_t proto_family = proto->protocol_family;
struct kev_dl_proto_data ev_pr_data;
oldval = os_atomic_dec_orig(&proto->refcount, relaxed);
if (oldval > 1) {
return;
}
if (proto->proto_kpi == kProtoKPI_v1) {
if (proto->kpi.v1.detached) {
proto->kpi.v1.detached(ifp, proto->protocol_family);
}
}
if (proto->proto_kpi == kProtoKPI_v2) {
if (proto->kpi.v2.detached) {
proto->kpi.v2.detached(ifp, proto->protocol_family);
}
}
/*
* Cleanup routes that may still be in the routing table for that
* interface/protocol pair.
*/
if_rtproto_del(ifp, proto_family);
ifnet_lock_shared(ifp);
/* No more reference on this, protocol must have been detached */
VERIFY(proto->detached);
/*
* The reserved field carries the number of protocol still attached
* (subject to change)
*/
ev_pr_data.proto_family = proto_family;
ev_pr_data.proto_remaining_count = dlil_ifp_protolist(ifp, NULL, 0);
ifnet_lock_done(ifp);
dlil_post_msg(ifp, KEV_DL_SUBCLASS, KEV_DL_PROTO_DETACHED,
(struct net_event_data *)&ev_pr_data,
sizeof(struct kev_dl_proto_data), FALSE);
if (ev_pr_data.proto_remaining_count == 0) {
/*
* The protocol count has gone to zero, mark the interface down.
* This used to be done by configd.KernelEventMonitor, but that
* is inherently prone to races (rdar://problem/30810208).
*/
(void) ifnet_set_flags(ifp, 0, IFF_UP);
(void) ifnet_ioctl(ifp, 0, SIOCSIFFLAGS, NULL);
dlil_post_sifflags_msg(ifp);
}
dlif_proto_free(proto);
}
__private_extern__ u_int32_t
dlil_ifp_protolist(struct ifnet *ifp, protocol_family_t *list __counted_by(list_count),
u_int32_t list_count)
{
u_int32_t count = 0;
int i;
ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
if (ifp->if_proto_hash == NULL) {
goto done;
}
for (i = 0; i < PROTO_HASH_SLOTS; i++) {
if_proto_ref_t proto;
SLIST_FOREACH(proto, &ifp->if_proto_hash[i], next_hash) {
if (list != NULL && count < list_count) {
list[count] = proto->protocol_family;
}
count++;
}
}
done:
return count;
}
__private_extern__ u_int32_t
if_get_protolist(struct ifnet * ifp, u_int32_t *__counted_by(count) protolist, u_int32_t count)
{
u_int32_t actual_count;
ifnet_lock_shared(ifp);
actual_count = dlil_ifp_protolist(ifp, protolist, count);
ifnet_lock_done(ifp);
return actual_count;
}
__private_extern__ void
if_free_protolist(u_int32_t *list)
{
kfree_data_addr(list);
}
boolean_t
dlil_is_native_netif_nexus(ifnet_t ifp)
{
return (ifp->if_eflags & IFEF_SKYWALK_NATIVE) && ifp->if_na != NULL;
}
/*
* Caller must already be holding ifnet lock.
*/
struct if_proto *
find_attached_proto(struct ifnet *ifp, u_int32_t protocol_family)
{
struct if_proto *proto = NULL;
u_int32_t i = proto_hash_value(protocol_family);
ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
if (ifp->if_proto_hash != NULL) {
proto = SLIST_FIRST(&ifp->if_proto_hash[i]);
}
while (proto != NULL && proto->protocol_family != protocol_family) {
proto = SLIST_NEXT(proto, next_hash);
}
if (proto != NULL) {
if_proto_ref(proto);
}
return proto;
}
/*
* Clat routines.
*/
/*
* This routine checks if the destination address is not a loopback, link-local,
* multicast or broadcast address.
*/
int
dlil_is_clat_needed(protocol_family_t proto_family, mbuf_t m)
{
int ret = 0;
switch (proto_family) {
case PF_INET: {
struct ip *iph = mtod(m, struct ip *);
if (CLAT46_NEEDED(ntohl(iph->ip_dst.s_addr))) {
ret = 1;
}
break;
}
case PF_INET6: {
struct ip6_hdr *ip6h = mtod(m, struct ip6_hdr *);
if ((size_t)m_pktlen(m) >= sizeof(struct ip6_hdr) &&
CLAT64_NEEDED(&ip6h->ip6_dst)) {
ret = 1;
}
break;
}
}
return ret;
}
/*
* @brief This routine translates IPv4 packet to IPv6 packet,
* updates protocol checksum and also translates ICMP for code
* along with inner header translation.
*
* @param ifp Pointer to the interface
* @param proto_family pointer to protocol family. It is updated if function
* performs the translation successfully.
* @param m Pointer to the pointer pointing to the packet. Needed because this
* routine can end up changing the mbuf to a different one.
*
* @return 0 on success or else a negative value.
*/
errno_t
dlil_clat46(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
{
VERIFY(*proto_family == PF_INET);
VERIFY(IS_INTF_CLAT46(ifp));
pbuf_t pbuf_store, *pbuf = NULL;
struct ip *iph = NULL;
struct in_addr osrc, odst;
uint8_t proto = 0;
struct in6_addr src_storage = {};
struct in6_addr *src = NULL;
struct sockaddr_in6 dstsock = {};
int error = 0;
uint16_t off = 0;
uint16_t tot_len = 0;
uint16_t ip_id_val = 0;
uint16_t ip_frag_off = 0;
boolean_t is_frag = FALSE;
boolean_t is_first_frag = TRUE;
boolean_t is_last_frag = TRUE;
/*
* Ensure that the incoming mbuf chain contains a valid
* IPv4 header in contiguous memory, or exit early.
*/
if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip) ||
((size_t)(*m)->m_len < sizeof(struct ip) &&
(*m = m_pullup(*m, sizeof(struct ip))) == NULL)) {
ip6stat.ip6s_clat464_in_tooshort_drop++;
return -1;
}
iph = mtod(*m, struct ip *);
osrc = iph->ip_src;
odst = iph->ip_dst;
proto = iph->ip_p;
off = (uint16_t)(iph->ip_hl << 2);
ip_id_val = iph->ip_id;
ip_frag_off = ntohs(iph->ip_off) & IP_OFFMASK;
tot_len = ntohs(iph->ip_len);
/* Validate that mbuf contains IP payload equal to `iph->ip_len' */
if ((size_t)(*m)->m_pkthdr.len < tot_len) {
ip6stat.ip6s_clat464_in_tooshort_drop++;
return -1;
}
pbuf_init_mbuf(&pbuf_store, *m, ifp);
pbuf = &pbuf_store;
/*
* For packets that are not first frags
* we only need to adjust CSUM.
* For 4 to 6, Fragmentation header gets appended
* after proto translation.
*/
if (ntohs(iph->ip_off) & ~(IP_DF | IP_RF)) {
is_frag = TRUE;
/* If the offset is not zero, it is not first frag */
if (ip_frag_off != 0) {
is_first_frag = FALSE;
}
/* If IP_MF is set, then it is not last frag */
if (ntohs(iph->ip_off) & IP_MF) {
is_last_frag = FALSE;
}
}
/*
* Translate IPv4 destination to IPv6 destination by using the
* prefixes learned through prior PLAT discovery.
*/
if ((error = nat464_synthesize_ipv6(ifp, &odst, &dstsock.sin6_addr)) != 0) {
ip6stat.ip6s_clat464_out_v6synthfail_drop++;
goto cleanup;
}
dstsock.sin6_len = sizeof(struct sockaddr_in6);
dstsock.sin6_family = AF_INET6;
/*
* Retrive the local IPv6 CLAT46 address reserved for stateless
* translation.
*/
src = in6_selectsrc_core(&dstsock, 0, ifp, 0, &src_storage, NULL, &error,
NULL, NULL, TRUE);
if (src == NULL) {
ip6stat.ip6s_clat464_out_nov6addr_drop++;
error = -1;
goto cleanup;
}
/*
* Translate the IP header part first.
* NOTE: `nat464_translate_46' handles the situation where the value
* `off' is past the end of the mbuf chain that is associated with
* the pbuf, in a graceful manner.
*/
error = (nat464_translate_46(pbuf, off, iph->ip_tos, iph->ip_p,
iph->ip_ttl, src_storage, dstsock.sin6_addr, tot_len) == NT_NAT64) ? 0 : -1;
iph = NULL; /* Invalidate iph as pbuf has been modified */
if (error != 0) {
ip6stat.ip6s_clat464_out_46transfail_drop++;
goto cleanup;
}
/*
* Translate protocol header, update checksum, checksum flags
* and related fields.
*/
error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc, (struct nat464_addr *)&odst,
proto, PF_INET, PF_INET6, NT_OUT, !is_first_frag) == NT_NAT64) ? 0 : -1;
if (error != 0) {
ip6stat.ip6s_clat464_out_46proto_transfail_drop++;
goto cleanup;
}
/* Now insert the IPv6 fragment header */
if (is_frag) {
error = nat464_insert_frag46(pbuf, ip_id_val, ip_frag_off, is_last_frag);
if (error != 0) {
ip6stat.ip6s_clat464_out_46frag_transfail_drop++;
goto cleanup;
}
}
cleanup:
if (pbuf_is_valid(pbuf)) {
*m = pbuf->pb_mbuf;
pbuf->pb_mbuf = NULL;
pbuf_destroy(pbuf);
} else {
error = -1;
*m = NULL;
ip6stat.ip6s_clat464_out_invalpbuf_drop++;
}
if (error == 0) {
*proto_family = PF_INET6;
ip6stat.ip6s_clat464_out_success++;
}
return error;
}
/*
* @brief This routine translates incoming IPv6 to IPv4 packet,
* updates protocol checksum and also translates ICMPv6 outer
* and inner headers
*
* @return 0 on success or else a negative value.
*/
errno_t
dlil_clat64(ifnet_t ifp, protocol_family_t *proto_family, mbuf_t *m)
{
VERIFY(*proto_family == PF_INET6);
VERIFY(IS_INTF_CLAT46(ifp));
struct ip6_hdr *ip6h = NULL;
struct in6_addr osrc, odst;
uint8_t proto = 0;
struct in6_ifaddr *ia6_clat_dst = NULL;
struct in_ifaddr *ia4_clat_dst = NULL;
struct in_addr *dst = NULL;
struct in_addr src;
int error = 0;
uint32_t off = 0;
u_int64_t tot_len = 0;
uint8_t tos = 0;
boolean_t is_first_frag = TRUE;
/*
* Ensure that the incoming mbuf chain contains a valid
* IPv6 header in contiguous memory, or exit early.
*/
if ((size_t)(*m)->m_pkthdr.len < sizeof(struct ip6_hdr) ||
((size_t)(*m)->m_len < sizeof(struct ip6_hdr) &&
(*m = m_pullup(*m, sizeof(struct ip6_hdr))) == NULL)) {
ip6stat.ip6s_clat464_in_tooshort_drop++;
return -1;
}
ip6h = mtod(*m, struct ip6_hdr *);
/* Validate that mbuf contains IP payload equal to ip6_plen */
if ((size_t)(*m)->m_pkthdr.len < ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr)) {
ip6stat.ip6s_clat464_in_tooshort_drop++;
return -1;
}
osrc = ip6h->ip6_src;
odst = ip6h->ip6_dst;
/*
* Retrieve the local CLAT46 reserved IPv6 address.
* Let the packet pass if we don't find one, as the flag
* may get set before IPv6 configuration has taken place.
*/
ia6_clat_dst = in6ifa_ifpwithflag(ifp, IN6_IFF_CLAT46);
if (ia6_clat_dst == NULL) {
goto done;
}
/*
* Check if the original dest in the packet is same as the reserved
* CLAT46 IPv6 address
*/
if (IN6_ARE_ADDR_EQUAL(&odst, &ia6_clat_dst->ia_addr.sin6_addr)) {
bool translate = false;
pbuf_t pbuf_store, *pbuf = NULL;
pbuf_init_mbuf(&pbuf_store, *m, ifp);
pbuf = &pbuf_store;
/*
* Retrieve the local CLAT46 IPv4 address reserved for stateless
* translation.
*/
ia4_clat_dst = inifa_ifpclatv4(ifp);
if (ia4_clat_dst == NULL) {
ifa_remref(&ia6_clat_dst->ia_ifa);
ip6stat.ip6s_clat464_in_nov4addr_drop++;
error = -1;
goto cleanup;
}
ifa_remref(&ia6_clat_dst->ia_ifa);
/* Translate IPv6 src to IPv4 src by removing the NAT64 prefix */
dst = &ia4_clat_dst->ia_addr.sin_addr;
error = nat464_synthesize_ipv4(ifp, &osrc, &src, &translate);
if (error != 0) {
ip6stat.ip6s_clat464_in_v4synthfail_drop++;
error = -1;
goto cleanup;
}
if (!translate) {
/* no translation required */
if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
/* only allow icmpv6 */
ip6stat.ip6s_clat464_in_v4synthfail_drop++;
error = -1;
}
goto cleanup;
}
ip6h = pbuf->pb_data;
off = sizeof(struct ip6_hdr);
proto = ip6h->ip6_nxt;
tos = (ntohl(ip6h->ip6_flow) >> 20) & 0xff;
tot_len = ntohs(ip6h->ip6_plen) + sizeof(struct ip6_hdr);
/*
* Translate the IP header and update the fragmentation
* header if needed
*/
error = (nat464_translate_64(pbuf, off, tos, &proto,
ip6h->ip6_hlim, src, *dst, tot_len, &is_first_frag) == NT_NAT64) ?
0 : -1;
ip6h = NULL; /* Invalidate ip6h as pbuf has been changed */
if (error != 0) {
ip6stat.ip6s_clat464_in_64transfail_drop++;
goto cleanup;
}
/*
* Translate protocol header, update checksum, checksum flags
* and related fields.
*/
error = (nat464_translate_proto(pbuf, (struct nat464_addr *)&osrc,
(struct nat464_addr *)&odst, proto, PF_INET6, PF_INET,
NT_IN, !is_first_frag) == NT_NAT64) ? 0 : -1;
if (error != 0) {
ip6stat.ip6s_clat464_in_64proto_transfail_drop++;
goto cleanup;
}
cleanup:
if (ia4_clat_dst != NULL) {
ifa_remref(&ia4_clat_dst->ia_ifa);
}
if (pbuf_is_valid(pbuf)) {
*m = pbuf->pb_mbuf;
pbuf->pb_mbuf = NULL;
pbuf_destroy(pbuf);
} else {
error = -1;
ip6stat.ip6s_clat464_in_invalpbuf_drop++;
}
if (error == 0 && translate) {
*proto_family = PF_INET;
ip6stat.ip6s_clat464_in_success++;
}
} /* CLAT traffic */
done:
return error;
}
/*
* Thread management
*/
void
dlil_clean_threading_info(struct dlil_threading_info *inp)
{
lck_mtx_destroy(&inp->dlth_lock, inp->dlth_lock_grp);
lck_grp_free(inp->dlth_lock_grp);
inp->dlth_lock_grp = NULL;
inp->dlth_flags = 0;
inp->dlth_wtot = 0;
bzero(inp->dlth_name_storage, sizeof(inp->dlth_name_storage));
inp->dlth_name = NULL;
inp->dlth_ifp = NULL;
VERIFY(qhead(&inp->dlth_pkts) == NULL && qempty(&inp->dlth_pkts));
qlimit(&inp->dlth_pkts) = 0;
bzero(&inp->dlth_stats, sizeof(inp->dlth_stats));
VERIFY(!inp->dlth_affinity);
inp->dlth_thread = THREAD_NULL;
inp->dlth_strategy = NULL;
VERIFY(inp->dlth_driver_thread == THREAD_NULL);
VERIFY(inp->dlth_poller_thread == THREAD_NULL);
VERIFY(inp->dlth_affinity_tag == 0);
#if IFNET_INPUT_SANITY_CHK
inp->dlth_pkts_cnt = 0;
#endif /* IFNET_INPUT_SANITY_CHK */
}
/*
* Lock management
*/
static errno_t
_dlil_get_lock_assertion_type(ifnet_lock_assert_t what, unsigned int *type)
{
switch (what) {
case IFNET_LCK_ASSERT_EXCLUSIVE:
*type = LCK_RW_ASSERT_EXCLUSIVE;
return 0;
case IFNET_LCK_ASSERT_SHARED:
*type = LCK_RW_ASSERT_SHARED;
return 0;
case IFNET_LCK_ASSERT_OWNED:
*type = LCK_RW_ASSERT_HELD;
return 0;
case IFNET_LCK_ASSERT_NOTOWNED:
/* nothing to do here for RW lock; bypass assert */
return ENOENT;
default:
panic("bad ifnet assert type: %d", what);
/* NOTREACHED */
}
}
__private_extern__ void
dlil_if_lock(void)
{
lck_mtx_lock(&dlil_ifnet_lock);
}
__private_extern__ void
dlil_if_unlock(void)
{
lck_mtx_unlock(&dlil_ifnet_lock);
}
__private_extern__ void
dlil_if_lock_assert(void)
{
LCK_MTX_ASSERT(&dlil_ifnet_lock, LCK_MTX_ASSERT_OWNED);
}
__private_extern__ void
ifnet_head_lock_assert(ifnet_lock_assert_t what)
{
unsigned int type = 0;
if (_dlil_get_lock_assertion_type(what, &type) == 0) {
LCK_RW_ASSERT(&ifnet_head_lock, type);
}
}
__private_extern__ void
ifnet_lock_assert(struct ifnet *ifp, ifnet_lock_assert_t what)
{
#if !MACH_ASSERT
#pragma unused(ifp)
#endif
unsigned int type = 0;
if (_dlil_get_lock_assertion_type(what, &type) == 0) {
LCK_RW_ASSERT(&ifp->if_lock, type);
}
}
__private_extern__ void
ifnet_lock_shared(struct ifnet *ifp)
{
lck_rw_lock_shared(&ifp->if_lock);
}
__private_extern__ void
ifnet_lock_exclusive(struct ifnet *ifp)
{
lck_rw_lock_exclusive(&ifp->if_lock);
}
__private_extern__ void
ifnet_lock_done(struct ifnet *ifp)
{
lck_rw_done(&ifp->if_lock);
}
#if INET
__private_extern__ void
if_inetdata_lock_shared(struct ifnet *ifp)
{
lck_rw_lock_shared(&ifp->if_inetdata_lock);
}
__private_extern__ void
if_inetdata_lock_exclusive(struct ifnet *ifp)
{
lck_rw_lock_exclusive(&ifp->if_inetdata_lock);
}
__private_extern__ void
if_inetdata_lock_done(struct ifnet *ifp)
{
lck_rw_done(&ifp->if_inetdata_lock);
}
#endif /* INET */
__private_extern__ void
if_inet6data_lock_shared(struct ifnet *ifp)
{
lck_rw_lock_shared(&ifp->if_inet6data_lock);
}
__private_extern__ void
if_inet6data_lock_exclusive(struct ifnet *ifp)
{
lck_rw_lock_exclusive(&ifp->if_inet6data_lock);
}
__private_extern__ void
if_inet6data_lock_done(struct ifnet *ifp)
{
lck_rw_done(&ifp->if_inet6data_lock);
}
__private_extern__ void
ifnet_head_lock_shared(void)
{
lck_rw_lock_shared(&ifnet_head_lock);
}
__private_extern__ void
ifnet_head_lock_exclusive(void)
{
lck_rw_lock_exclusive(&ifnet_head_lock);
}
__private_extern__ void
ifnet_head_done(void)
{
lck_rw_done(&ifnet_head_lock);
}
__private_extern__ void
ifnet_head_assert_exclusive(void)
{
LCK_RW_ASSERT(&ifnet_head_lock, LCK_RW_ASSERT_EXCLUSIVE);
}
static errno_t
if_mcasts_update_common(struct ifnet * ifp, bool sync)
{
errno_t err;
if (sync) {
err = ifnet_ioctl(ifp, 0, SIOCADDMULTI, NULL);
if (err == EAFNOSUPPORT) {
err = 0;
}
} else {
ifnet_ioctl_async(ifp, SIOCADDMULTI);
err = 0;
}
DLIL_PRINTF("%s: %s %d suspended link-layer multicast membership(s) "
"(err=%d)\n", if_name(ifp),
(err == 0 ? "successfully restored" : "failed to restore"),
ifp->if_updatemcasts, err);
/* just return success */
return 0;
}
errno_t
if_mcasts_update_async(struct ifnet *ifp)
{
return if_mcasts_update_common(ifp, false);
}
errno_t
if_mcasts_update(struct ifnet *ifp)
{
return if_mcasts_update_common(ifp, true);
}