This is xnu-11215.1.10. See this file in:
/*
* Copyright (c) 2015-2022 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#define _IP_VHL
#include <skywalk/os_skywalk_private.h>
#include <skywalk/nexus/netif/nx_netif.h>
#include <skywalk/nexus/flowswitch/nx_flowswitch.h>
#include <net/ethernet.h>
#include <net/pktap.h>
#include <sys/kdebug.h>
#include <sys/sdt.h>
#define DBG_FUNC_NX_NETIF_HOST_ENQUEUE \
SKYWALKDBG_CODE(DBG_SKYWALK_NETIF, 2)
static void nx_netif_host_catch_tx(struct nexus_adapter *, bool);
static inline struct __kern_packet*
nx_netif_mbuf_to_kpkt(struct nexus_adapter *, struct mbuf *);
#define SK_IFCAP_CSUM (IFCAP_HWCSUM|IFCAP_CSUM_PARTIAL|IFCAP_CSUM_ZERO_INVERT)
static bool
nx_netif_host_is_gso_needed(struct nexus_adapter *na)
{
struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
/*
* Don't enable for Compat netif.
*/
if (na->na_type != NA_NETIF_HOST) {
return false;
}
/*
* Don't enable if netif is not plumbed under a flowswitch.
*/
if (!NA_KERNEL_ONLY(na)) {
return false;
}
/*
* Don't enable If HW TSO is enabled.
*/
if (((nif->nif_hwassist & IFNET_TSO_IPV4) != 0) ||
((nif->nif_hwassist & IFNET_TSO_IPV6) != 0)) {
return false;
}
/*
* Don't enable if TX aggregation is disabled.
*/
if (sk_fsw_tx_agg_tcp == 0) {
return false;
}
return true;
}
static void
nx_netif_host_adjust_if_capabilities(struct nexus_adapter *na, bool activate)
{
struct nx_netif *nif = ((struct nexus_netif_adapter *)na)->nifna_netif;
struct ifnet *ifp = na->na_ifp;
ifnet_lock_exclusive(ifp);
if (activate) {
/* XXX: adi@apple.com - disable TSO and LRO for now */
nif->nif_hwassist = ifp->if_hwassist;
nif->nif_capabilities = ifp->if_capabilities;
nif->nif_capenable = ifp->if_capenable;
ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_TSOF);
ifp->if_capabilities &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
ifp->if_capenable &= ~(SK_IFCAP_CSUM | IFCAP_TSO);
/*
* Re-enable the capabilities which Skywalk layer provides:
*
* Native driver: a copy from packet to mbuf always occurs
* for each inbound and outbound packet; if hardware
* does not support csum offload, we leverage combined
* copy and checksum, and thus advertise IFNET_CSUM_PARTIAL.
* We also always enable 16KB jumbo mbuf support.
*
* Compat driver: inbound and outbound mbufs don't incur a
* copy, and so leave the driver advertised flags alone.
*/
if (NA_KERNEL_ONLY(na)) {
if (na->na_type == NA_NETIF_HOST) { /* native */
ifp->if_hwassist |=
IFNET_MULTIPAGES | (nif->nif_hwassist &
(IFNET_CHECKSUMF | IFNET_TSOF));
ifp->if_capabilities |=
(nif->nif_capabilities &
(SK_IFCAP_CSUM | IFCAP_TSO));
ifp->if_capenable |=
(nif->nif_capenable &
(SK_IFCAP_CSUM | IFCAP_TSO));
/*
* If hardware doesn't support IP and TCP/UDP csum offload,
* advertise IFNET_CSUM_PARTIAL.
*/
if ((ifp->if_hwassist & IFNET_UDP_TCP_TX_CHECKSUMF) !=
IFNET_UDP_TCP_TX_CHECKSUMF) {
ifp->if_hwassist |= IFNET_CSUM_PARTIAL | IFNET_CSUM_ZERO_INVERT;
ifp->if_capabilities |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
ifp->if_capenable |= IFCAP_CSUM_PARTIAL | IFCAP_CSUM_ZERO_INVERT;
}
if (sk_fsw_tx_agg_tcp != 0) {
ifp->if_hwassist |= IFNET_TSOF;
ifp->if_capabilities |= IFCAP_TSO;
ifp->if_capenable |= IFCAP_TSO;
}
if (!nx_netif_host_is_gso_needed(na)) {
if_set_eflags(ifp, IFEF_SENDLIST);
}
} else { /* compat */
ifp->if_hwassist |=
(nif->nif_hwassist &
(IFNET_CHECKSUMF | IFNET_TSOF));
ifp->if_capabilities |=
(nif->nif_capabilities &
(SK_IFCAP_CSUM | IFCAP_TSO));
ifp->if_capenable |=
(nif->nif_capenable &
(SK_IFCAP_CSUM | IFCAP_TSO));
}
}
} else {
if (NA_KERNEL_ONLY(na) && na->na_type == NA_NETIF_HOST) {
if_clear_eflags(ifp, IFEF_SENDLIST);
}
/* Unset any capabilities previously set by Skywalk */
ifp->if_hwassist &= ~(IFNET_CHECKSUMF | IFNET_MULTIPAGES);
ifp->if_capabilities &= ~SK_IFCAP_CSUM;
ifp->if_capenable &= ~SK_IFCAP_CSUM;
if ((sk_fsw_tx_agg_tcp != 0) &&
(na->na_type == NA_NETIF_HOST)) {
ifp->if_hwassist &= ~IFNET_TSOF;
ifp->if_capabilities &= ~IFCAP_TSO;
ifp->if_capenable &= ~IFCAP_TSO;
}
/* Restore driver original flags */
ifp->if_hwassist |= (nif->nif_hwassist &
(IFNET_CHECKSUMF | IFNET_TSOF | IFNET_MULTIPAGES));
ifp->if_capabilities |=
(nif->nif_capabilities & (SK_IFCAP_CSUM | IFCAP_TSO));
ifp->if_capenable |=
(nif->nif_capenable & (SK_IFCAP_CSUM | IFCAP_TSO));
}
ifnet_lock_done(ifp);
}
int
nx_netif_host_na_activate(struct nexus_adapter *na, na_activate_mode_t mode)
{
struct ifnet *ifp = na->na_ifp;
int error = 0;
ASSERT(na->na_type == NA_NETIF_HOST ||
na->na_type == NA_NETIF_COMPAT_HOST);
ASSERT(na->na_flags & NAF_HOST_ONLY);
SK_DF(SK_VERB_NETIF, "na \"%s\" (0x%llx) %s", na->na_name,
SK_KVA(na), na_activate_mode2str(mode));
switch (mode) {
case NA_ACTIVATE_MODE_ON:
VERIFY(SKYWALK_CAPABLE(ifp));
nx_netif_host_adjust_if_capabilities(na, true);
/*
* Make skywalk control the packet steering
* Don't intercept tx packets if this is a netif compat
* adapter attached to a flowswitch
*/
nx_netif_host_catch_tx(na, true);
os_atomic_or(&na->na_flags, NAF_ACTIVE, relaxed);
break;
case NA_ACTIVATE_MODE_DEFUNCT:
VERIFY(SKYWALK_CAPABLE(ifp));
break;
case NA_ACTIVATE_MODE_OFF:
/* Release packet steering control. */
nx_netif_host_catch_tx(na, false);
/*
* Note that here we cannot assert SKYWALK_CAPABLE()
* as we're called in the destructor path.
*/
os_atomic_andnot(&na->na_flags, NAF_ACTIVE, relaxed);
nx_netif_host_adjust_if_capabilities(na, false);
break;
default:
VERIFY(0);
/* NOTREACHED */
__builtin_unreachable();
}
return error;
}
/* na_krings_create callback for netif host adapters */
int
nx_netif_host_krings_create(struct nexus_adapter *na, struct kern_channel *ch)
{
int ret;
SK_LOCK_ASSERT_HELD();
ASSERT(na->na_type == NA_NETIF_HOST ||
na->na_type == NA_NETIF_COMPAT_HOST);
ASSERT(na->na_flags & NAF_HOST_ONLY);
ret = na_rings_mem_setup(na, FALSE, ch);
if (ret == 0) {
struct __kern_channel_ring *kring;
uint32_t i;
/* drop by default until fully bound */
if (NA_KERNEL_ONLY(na)) {
na_kr_drop(na, TRUE);
}
for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
kring = &NAKR(na, NR_RX)[i];
/* initialize the nx_mbq for the sw rx ring */
nx_mbq_safe_init(kring, &kring->ckr_rx_queue,
NX_MBQ_NO_LIMIT, &nexus_mbq_lock_group,
&nexus_lock_attr);
SK_DF(SK_VERB_NETIF,
"na \"%s\" (0x%llx) initialized host kr \"%s\" "
"(0x%llx) krflags 0x%b", na->na_name, SK_KVA(na),
kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
CKRF_BITS);
}
}
return ret;
}
/*
* Destructor for netif host adapters; they also have an mbuf queue
* on the rings connected to the host so we need to purge them first.
*/
void
nx_netif_host_krings_delete(struct nexus_adapter *na, struct kern_channel *ch,
boolean_t defunct)
{
struct __kern_channel_ring *kring;
uint32_t i;
SK_LOCK_ASSERT_HELD();
ASSERT(na->na_type == NA_NETIF_HOST ||
na->na_type == NA_NETIF_COMPAT_HOST);
ASSERT(na->na_flags & NAF_HOST_ONLY);
if (NA_KERNEL_ONLY(na)) {
na_kr_drop(na, TRUE);
}
for (i = 0; i < na_get_nrings(na, NR_RX); i++) {
struct nx_mbq *q;
kring = &NAKR(na, NR_RX)[i];
q = &kring->ckr_rx_queue;
SK_DF(SK_VERB_NETIF,
"na \"%s\" (0x%llx) destroy host kr \"%s\" (0x%llx) "
"krflags 0x%b with qlen %u", na->na_name, SK_KVA(na),
kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
CKRF_BITS, nx_mbq_len(q));
nx_mbq_purge(q);
if (!defunct) {
nx_mbq_safe_destroy(q);
}
}
na_rings_mem_teardown(na, ch, defunct);
}
/* kring->ckr_na_sync callback for the host rx ring */
int
nx_netif_host_na_rxsync(struct __kern_channel_ring *kring,
struct proc *p, uint32_t flags)
{
#pragma unused(kring, p, flags)
return 0;
}
/*
* kring->ckr_na_sync callback for the host tx ring.
*/
int
nx_netif_host_na_txsync(struct __kern_channel_ring *kring, struct proc *p,
uint32_t flags)
{
#pragma unused(kring, p, flags)
return 0;
}
int
nx_netif_host_na_special(struct nexus_adapter *na, struct kern_channel *ch,
struct chreq *chr, nxspec_cmd_t spec_cmd)
{
ASSERT(na->na_type == NA_NETIF_HOST ||
na->na_type == NA_NETIF_COMPAT_HOST);
return nx_netif_na_special_common(na, ch, chr, spec_cmd);
}
/*
* Intercept the packet steering routine in the tx path,
* so that we can decide which queue is used for an mbuf.
* Second argument is TRUE to intercept, FALSE to restore.
*/
static void
nx_netif_host_catch_tx(struct nexus_adapter *na, bool activate)
{
struct ifnet *ifp = na->na_ifp;
int err = 0;
ASSERT(na->na_type == NA_NETIF_HOST ||
na->na_type == NA_NETIF_COMPAT_HOST);
ASSERT(na->na_flags & NAF_HOST_ONLY);
/*
* Common case is NA_KERNEL_ONLY: if the netif is plumbed
* below the flowswitch. For TXSTART compat driver and legacy:
* don't intercept DLIL output handler, since in this model
* packets from both BSD stack and flowswitch are directly
* enqueued to the classq via ifnet_enqueue().
*
* Otherwise, it's the uncommon case where a user channel is
* opened directly to the netif. Here we either intercept
* or restore the DLIL output handler.
*/
if (activate) {
if (__improbable(!NA_KERNEL_ONLY(na))) {
return;
}
/*
* For native drivers only, intercept if_output();
* for compat, leave it alone since we don't need
* to perform any mbuf-pkt conversion.
*/
if (na->na_type == NA_NETIF_HOST) {
err = ifnet_set_output_handler(ifp,
nx_netif_host_is_gso_needed(na) ?
netif_gso_dispatch : nx_netif_host_output);
VERIFY(err == 0);
}
} else {
if (__improbable(!NA_KERNEL_ONLY(na))) {
return;
}
/*
* Restore original if_output() for native drivers.
*/
if (na->na_type == NA_NETIF_HOST) {
ifnet_reset_output_handler(ifp);
}
}
}
static int
get_af_from_mbuf(struct mbuf *m)
{
/*
* -fbounds-safety: Although m_pkthdr.pkt_hdr is a void * without
* annotations, here we can just mark the uint8_t *pkt_hdr as __single
* becase we don't do any arithmetic and the only place we dereference
* it is to read the ip version, where having the bounds of a single
* 8-bit size is enough.
*/
uint8_t *__single pkt_hdr;
uint8_t ipv;
struct mbuf *m0;
int af;
pkt_hdr = m->m_pkthdr.pkt_hdr;
for (m0 = m; m0 != NULL; m0 = m0->m_next) {
if (pkt_hdr >= (uint8_t *)m0->m_data &&
pkt_hdr < (uint8_t *)m0->m_data + m0->m_len) {
break;
}
}
if (m0 == NULL) {
DTRACE_SKYWALK1(bad__pkthdr, struct mbuf *, m);
af = AF_UNSPEC;
goto done;
}
ipv = IP_VHL_V(*pkt_hdr);
if (ipv == 4) {
af = AF_INET;
} else if (ipv == 6) {
af = AF_INET6;
} else {
af = AF_UNSPEC;
}
done:
DTRACE_SKYWALK2(mbuf__af, int, af, struct mbuf *, m);
return af;
}
/*
* if_output() callback called by dlil_output() to handle mbufs coming out
* of the host networking stack. The mbuf will get converted to a packet,
* and enqueued to the classq of a Skywalk native interface.
*/
int
nx_netif_host_output(struct ifnet *ifp, struct mbuf *m_chain)
{
struct nx_netif *nif = NA(ifp)->nifna_netif;
struct __kern_channel_ring *currentkring = NULL;
struct kern_nexus *nx = nif->nif_nx;
struct nexus_adapter *hwna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_DEV);
struct nexus_adapter *hostna = nx_port_get_na(nx, NEXUS_PORT_NET_IF_HOST);
struct netif_stats *nifs = &NX_NETIF_PRIVATE(hwna->na_nx)->nif_stats;
struct mbuf *m_head = m_chain, *m = NULL, *drop_list = NULL, *free_list = NULL;
struct __kern_packet *pkt_chain_head, *pkt_chain_tail;
struct netif_qset *__single qset = NULL;
struct pktq pkt_q;
uint64_t qset_id;
bool qset_id_valid = false;
boolean_t pkt_drop = FALSE;
uint32_t n_pkts = 0, n_bytes = 0;
errno_t error = 0;
ASSERT(ifp->if_eflags & IFEF_SKYWALK_NATIVE);
ASSERT(hostna->na_type == NA_NETIF_HOST);
KPKTQ_INIT(&pkt_q);
while (m_head) {
struct __kern_channel_ring *kring;
pkt_drop = FALSE;
m = m_head;
m_head = m_head->m_nextpkt;
m->m_nextpkt = NULL;
uint32_t sc_idx = MBUF_SCIDX(m_get_service_class(m));
struct __kern_packet *kpkt;
/*
* nx_netif_host_catch_tx() must only be steering the output
* packets here only for native interfaces, otherwise we must
* not get here for compat.
*/
ASSERT(sc_idx < KPKT_SC_MAX_CLASSES);
kring = &hwna->na_tx_rings[hwna->na_kring_svc_lut[sc_idx]];
if (currentkring != kring) {
if (currentkring != NULL) {
KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(currentkring),
error);
}
currentkring = kring;
KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_START), SK_KVA(currentkring));
}
if (__improbable(!NA_IS_ACTIVE(hwna) || !NA_IS_ACTIVE(hostna))) {
STATS_INC(nifs, NETIF_STATS_DROP_NA_INACTIVE);
SK_ERR("\"%s\" (0x%llx) not in skywalk mode anymore",
hwna->na_name, SK_KVA(hwna));
error = ENXIO;
pkt_drop = TRUE;
goto out;
}
/*
* Drop if the kring no longer accepts packets.
*/
if (__improbable(KR_DROP(&hostna->na_rx_rings[0]) || KR_DROP(kring))) {
STATS_INC(nifs, NETIF_STATS_DROP_KRDROP_MODE);
/* not a serious error, so no need to be chatty here */
SK_DF(SK_VERB_NETIF,
"kr \"%s\" (0x%llx) krflags 0x%b or %s in drop mode",
kring->ckr_name, SK_KVA(kring), kring->ckr_flags,
CKRF_BITS, ifp->if_xname);
error = ENXIO;
pkt_drop = TRUE;
goto out;
}
if (__improbable(((unsigned)m_pktlen(m) + ifp->if_tx_headroom) >
kring->ckr_max_pkt_len)) { /* too long for us */
STATS_INC(nifs, NETIF_STATS_DROP_BADLEN);
SK_ERR("\"%s\" (0x%llx) from_host, drop packet size %u > %u",
hwna->na_name, SK_KVA(hwna), m_pktlen(m),
kring->ckr_max_pkt_len);
pkt_drop = TRUE;
goto out;
}
/*
* Convert mbuf to packet and enqueue it.
*/
kpkt = nx_netif_mbuf_to_kpkt(hwna, m);
if (kpkt == NULL) {
error = ENOBUFS;
pkt_drop = TRUE;
goto out;
}
if ((m->m_pkthdr.pkt_flags & PKTF_SKIP_PKTAP) == 0 &&
pktap_total_tap_count != 0) {
int af = get_af_from_mbuf(m);
if (af != AF_UNSPEC) {
nx_netif_pktap_output(ifp, af, kpkt);
}
}
if (NX_LLINK_PROV(nif->nif_nx) &&
ifp->if_traffic_rule_count > 0 &&
!qset_id_valid &&
nxctl_inet_traffic_rule_find_qset_id_with_pkt(ifp->if_xname,
kpkt, &qset_id) == 0) {
qset_id_valid = true;
/*
* This always returns a qset because if the qset id
* is invalid the default qset is returned.
*/
qset = nx_netif_find_qset(nif, qset_id);
ASSERT(qset != NULL);
}
if (qset != NULL) {
kpkt->pkt_qset_idx = qset->nqs_idx;
}
if (!netif_chain_enqueue_enabled(ifp)) {
if (qset != NULL) {
error = ifnet_enqueue_ifcq_pkt(ifp,
qset->nqs_ifcq, kpkt,
false, &pkt_drop);
nx_netif_qset_release(&qset);
} else {
/* callee consumes packet */
error = ifnet_enqueue_pkt(ifp, kpkt, false, &pkt_drop);
}
if (pkt_drop) {
STATS_INC(nifs, NETIF_STATS_TX_DROP_ENQ_AQM);
}
} else {
KPKTQ_ENQUEUE(&pkt_q, kpkt);
n_pkts++;
n_bytes += m->m_pkthdr.len;
}
out:
/* always free mbuf (even in the success case) */
m->m_nextpkt = free_list;
free_list = m;
if (__improbable(pkt_drop)) {
STATS_INC(nifs, NETIF_STATS_DROP);
}
if (__improbable(error)) {
break;
}
}
if (currentkring != NULL) {
KDBG((SK_KTRACE_NETIF_HOST_ENQUEUE | DBG_FUNC_END), SK_KVA(currentkring),
error);
}
if (__probable(!KPKTQ_EMPTY(&pkt_q))) {
pkt_chain_head = KPKTQ_FIRST(&pkt_q);
pkt_chain_tail = KPKTQ_LAST(&pkt_q);
if (qset != NULL) {
error = ifnet_enqueue_ifcq_pkt_chain(ifp, qset->nqs_ifcq,
pkt_chain_head, pkt_chain_tail, n_pkts, n_bytes, false, &pkt_drop);
nx_netif_qset_release(&qset);
} else {
/* callee consumes packet */
error = ifnet_enqueue_pkt_chain(ifp, pkt_chain_head, pkt_chain_tail,
n_pkts, n_bytes, false, &pkt_drop);
}
if (pkt_drop) {
STATS_ADD(nifs, NETIF_STATS_TX_DROP_ENQ_AQM, n_pkts);
STATS_ADD(nifs, NETIF_STATS_DROP, n_pkts);
}
}
if (error) {
drop_list = m_head;
while (m_head != NULL) {
m_head = m_head->m_nextpkt;
STATS_INC(nifs, NETIF_STATS_DROP);
}
m_freem_list(drop_list);
}
m_freem_list(free_list);
netif_transmit(ifp, NETIF_XMIT_FLAG_HOST);
return error;
}
static inline int
get_l2_hlen(struct mbuf *m, uint8_t *l2len)
{
/*
* -fbounds-safety: Although m_pkthdr.pkt_hdr is a void * without
* annotations, here we mark char *pkt_hdr as __single because we don't
* dereference this pointer, and we're mostly just using this pointer
* for comparisons.
*/
char *__single pkt_hdr;
struct mbuf *m0;
uint64_t len = 0;
pkt_hdr = m->m_pkthdr.pkt_hdr;
for (m0 = m; m0 != NULL; m0 = m0->m_next) {
if (pkt_hdr >= m_mtod_current(m0) &&
pkt_hdr < m_mtod_current(m0) + m0->m_len) {
break;
}
len += m0->m_len;
}
if (m0 == NULL) {
DTRACE_SKYWALK2(bad__pkthdr, struct mbuf *, m, char *, pkt_hdr);
return EINVAL;
}
len += (pkt_hdr - m_mtod_current(m0));
if (len > UINT8_MAX) {
DTRACE_SKYWALK2(bad__l2len, struct mbuf *, m, uint64_t, len);
return EINVAL;
}
*l2len = (uint8_t)len;
return 0;
}
#if SK_LOG
/* Hoisted out of line to reduce kernel stack footprint */
SK_LOG_ATTRIBUTE
static void
nx_netif_mbuf_to_kpkt_log(struct __kern_packet *kpkt, uint32_t len,
uint32_t poff)
{
uint8_t *baddr;
uint32_t pkt_len;
MD_BUFLET_ADDR_ABS(kpkt, baddr);
pkt_len = __packet_get_real_data_length(kpkt);
SK_DF(SK_VERB_HOST | SK_VERB_TX, "mlen %u dplen %u"
" hr %u l2 %u poff %u", len, kpkt->pkt_length,
kpkt->pkt_headroom, kpkt->pkt_l2_len, poff);
SK_DF(SK_VERB_HOST | SK_VERB_TX | SK_VERB_DUMP, "%s",
sk_dump("buf", baddr, pkt_len, 128, NULL, 0));
}
#endif /* SK_LOG */
static inline struct __kern_packet *
nx_netif_mbuf_to_kpkt(struct nexus_adapter *na, struct mbuf *m)
{
struct netif_stats *nifs = &NX_NETIF_PRIVATE(na->na_nx)->nif_stats;
struct nexus_netif_adapter *nifna = NIFNA(na);
struct nx_netif *nif = nifna->nifna_netif;
uint16_t poff = na->na_ifp->if_tx_headroom;
uint32_t len;
struct kern_pbufpool *pp;
struct __kern_packet *kpkt;
kern_packet_t ph;
boolean_t copysum;
uint8_t l2hlen;
int err;
pp = skmem_arena_nexus(na->na_arena)->arn_tx_pp;
ASSERT((pp != NULL) && (pp->pp_md_type == NEXUS_META_TYPE_PACKET) &&
(pp->pp_md_subtype == NEXUS_META_SUBTYPE_RAW));
ASSERT(!PP_HAS_TRUNCATED_BUF(pp));
len = m_pktlen(m);
VERIFY((poff + len) <= (PP_BUF_SIZE_DEF(pp) * pp->pp_max_frags));
/* alloc packet */
ph = pp_alloc_packet_by_size(pp, poff + len, SKMEM_NOSLEEP);
if (__improbable(ph == 0)) {
STATS_INC(nifs, NETIF_STATS_DROP_NOMEM_PKT);
SK_DF(SK_VERB_MEM,
"%s(%d) pp \"%s\" (0x%llx) has no more "
"packet for %s", sk_proc_name_address(current_proc()),
sk_proc_pid(current_proc()), pp->pp_name, SK_KVA(pp),
if_name(na->na_ifp));
return NULL;
}
copysum = ((m->m_pkthdr.csum_flags & (CSUM_DATA_VALID |
CSUM_PARTIAL)) == (CSUM_DATA_VALID | CSUM_PARTIAL));
STATS_INC(nifs, NETIF_STATS_TX_COPY_MBUF);
if (copysum) {
STATS_INC(nifs, NETIF_STATS_TX_COPY_SUM);
}
kpkt = SK_PTR_ADDR_KPKT(ph);
kpkt->pkt_link_flags = 0;
nif->nif_pkt_copy_from_mbuf(NR_TX, ph, poff, m, 0, len,
copysum, m->m_pkthdr.csum_tx_start);
kpkt->pkt_headroom = (uint8_t)poff;
if ((err = get_l2_hlen(m, &l2hlen)) == 0) {
kpkt->pkt_l2_len = l2hlen;
} else {
kpkt->pkt_l2_len = 0;
}
/* finalize the packet */
METADATA_ADJUST_LEN(kpkt, 0, poff);
err = __packet_finalize(ph);
VERIFY(err == 0);
#if SK_LOG
if (__improbable((sk_verbose & SK_VERB_HOST) != 0) && kpkt != NULL) {
nx_netif_mbuf_to_kpkt_log(kpkt, len, poff);
}
#endif /* SK_LOG */
return kpkt;
}