This is xnu-11215.1.10. See this file in:
/*
* Copyright (c) 2015-2023 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <errno.h>
#include <os/atomic_private.h>
#include <skywalk/os_skywalk_private.h>
#include <skywalk/os_packet_private.h>
#ifndef LIBSYSCALL_INTERFACE
#error "LIBSYSCALL_INTERFACE not defined"
#endif /* !LIBSYSCALL_INTERFACE */
/*
* Defined here as we don't have Libc
*/
extern int __getpid(void);
extern int __kill(int pid, int signum, int posix);
extern int __exit(int) __attribute__((noreturn));
static ring_id_t _ring_id(struct ch_info *cinfo, const ring_id_type_t type);
static void os_channel_info2attr(struct channel *chd, channel_attr_t cha);
static int _flowadv_id_equal(struct __flowadv_entry *, uuid_t);
/*
* This is pretty much what an inlined memcmp() would do for UUID
* comparison; since we don't have access to memcmp() here, we
* manually handle it ourselves.
*/
#define UUID_COMPARE(a, b) \
(a[0] == b[0] && a[1] == b[1] && a[2] == b[2] && a[3] == b[3] && \
a[4] == b[4] && a[5] == b[5] && a[6] == b[6] && a[7] == b[7] && \
a[8] == b[8] && a[9] == b[9] && a[10] == b[10] && a[11] == b[11] && \
a[12] == b[12] && a[13] == b[13] && a[14] == b[14] && a[15] == b[15])
#define _SLOT_INDEX(_chrd, _slot) \
((slot_idx_t)((_slot - (_chrd)->chrd_slot_desc)))
#define _SLOT_DESC(_chrd, _idx) \
(SLOT_DESC_USD(&(_chrd)->chrd_slot_desc[_idx]))
#define _METADATA(_chrd, _ring, _midx) \
((void *)((_chrd)->chrd_md_base_addr + \
((_midx) * (_ring)->ring_md_size) + METADATA_PREAMBLE_SZ))
#define _SLOT_METADATA(_chrd, _ring, _idx) \
_METADATA(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
#define _SLOT_METADATA_IDX_VERIFY(_chrd, _md, _midx) do { \
if (__improbable((_md) != _METADATA((_chrd), (_chrd)->chrd_ring, \
(_midx))) && !_CHANNEL_RING_IS_DEFUNCT(_chrd)) { \
SK_ABORT_WITH_CAUSE("bad packet handle", (_midx)); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
} while (0)
#define _BFT_INDEX(_chrd, _bft) (_bft)->buf_bft_idx_reg
#define _SLOT_BFT_METADATA(_chrd, _ring, _idx) \
_CHANNEL_RING_BFT(_chrd, _ring, _SLOT_DESC(_chrd, _idx)->sd_md_idx)
#define _SLOT_BFT_METADATA_IDX_VERIFY(_chrd, _md, _midx) do { \
if (__improbable((mach_vm_address_t)(_md) != \
_CHANNEL_RING_BFT((_chrd), (_chrd)->chrd_ring, (_midx))) && \
!_CHANNEL_RING_IS_DEFUNCT(_chrd)) { \
SK_ABORT_WITH_CAUSE("bad buflet handle", (_midx)); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
} while (0)
#define _SLOT_DESC_VERIFY(_chrd, _sdp) do { \
if (__improbable(!SD_VALID_METADATA(_sdp)) && \
!_CHANNEL_RING_IS_DEFUNCT(_chrd)) { \
SK_ABORT("Slot descriptor has no metadata"); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
} while (0)
#define _METADATA_VERIFY(_chrd, _md) do { \
if (__improbable(METADATA_PREAMBLE(_md)->mdp_redzone != \
(((mach_vm_address_t)(_md) - (_chrd)->chrd_md_base_addr) ^ \
__os_ch_md_redzone_cookie)) && \
!_CHANNEL_RING_IS_DEFUNCT(_chrd)) { \
SK_ABORT_WITH_CAUSE("Metadata redzone corrupted", \
METADATA_PREAMBLE(_md)->mdp_redzone); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
} while (0)
#define _PKT_BUFCNT_VERIFY(_chrd, _bcnt, _bmax) do { \
if (__improbable((_chrd)->chrd_max_bufs < (_bmax))) { \
SK_ABORT_WITH_CAUSE("Invalid max bufcnt", (_bmax)); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
if (__improbable((_bcnt) > (_bmax))) { \
SK_ABORT_WITH_CAUSE("Invalid bufcnt", (_bcnt)); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
} while (0)
#define _ABORT_MSGSZ 1024
#define _SCHEMA_VER_VERIFY(_chd) do { \
/* ensure all stores are globally visible */ \
os_atomic_thread_fence(seq_cst); \
if (CHD_SCHEMA(_chd)->csm_ver != CSM_CURRENT_VERSION) { \
char *_msg = malloc(_ABORT_MSGSZ); \
uint32_t _ver = (uint32_t)CHD_SCHEMA(_chd)->csm_ver; \
/* we're stuck with %x and %s formatters */ \
(void) _mach_snprintf(_msg, _ABORT_MSGSZ, \
"Schema region version mismatch: 0x%x != 0x%x\n" \
"Kernel version: %s - did you forget to install " \
"a matching libsystem_kernel.dylib?\n" \
"Kernel UUID: %x%x%x%x-%x%x-%x%x-%x%x-%x%x%x%x%x%x", \
_ver, (uint32_t)CSM_CURRENT_VERSION, \
CHD_SCHEMA(_chd)->csm_kern_name, \
CHD_SCHEMA(_chd)->csm_kern_uuid[0], \
CHD_SCHEMA(_chd)->csm_kern_uuid[1], \
CHD_SCHEMA(_chd)->csm_kern_uuid[2], \
CHD_SCHEMA(_chd)->csm_kern_uuid[3], \
CHD_SCHEMA(_chd)->csm_kern_uuid[4], \
CHD_SCHEMA(_chd)->csm_kern_uuid[5], \
CHD_SCHEMA(_chd)->csm_kern_uuid[6], \
CHD_SCHEMA(_chd)->csm_kern_uuid[7], \
CHD_SCHEMA(_chd)->csm_kern_uuid[8], \
CHD_SCHEMA(_chd)->csm_kern_uuid[9], \
CHD_SCHEMA(_chd)->csm_kern_uuid[10], \
CHD_SCHEMA(_chd)->csm_kern_uuid[11], \
CHD_SCHEMA(_chd)->csm_kern_uuid[12], \
CHD_SCHEMA(_chd)->csm_kern_uuid[13], \
CHD_SCHEMA(_chd)->csm_kern_uuid[14], \
CHD_SCHEMA(_chd)->csm_kern_uuid[15]); \
SK_ABORT_DYNAMIC(_msg); \
/* NOTREACHED */ \
__builtin_unreachable(); \
} \
} while (0)
#define _SLOT_ATTACH_METADATA(_usd, _md_idx) do { \
(_usd)->sd_md_idx = (_md_idx); \
(_usd)->sd_flags |= SD_IDX_VALID; \
} while (0)
#define _SLOT_DETACH_METADATA(_usd) do { \
(_usd)->sd_md_idx = OBJ_IDX_NONE; \
(_usd)->sd_flags &= ~SD_IDX_VALID; \
} while (0)
#define _CHANNEL_OFFSET(_type, _ptr, _offset) \
((_type)(void *)((uintptr_t)(_ptr) + (_offset)))
#define _CHANNEL_SCHEMA(_base, _off) \
_CHANNEL_OFFSET(struct __user_channel_schema *, _base, _off)
#define _CHANNEL_RING_DEF_BUF(_chrd, _ring, _idx) \
((_chrd)->chrd_def_buf_base_addr + \
((_idx) * (_ring)->ring_def_buf_size))
#define _CHANNEL_RING_LARGE_BUF(_chrd, _ring, _idx) \
((_chrd)->chrd_large_buf_base_addr + \
((_idx) * (_ring)->ring_large_buf_size))
#define _CHANNEL_RING_BUF(_chrd, _ring, _bft) \
BUFLET_HAS_LARGE_BUF(_bft) ? \
_CHANNEL_RING_LARGE_BUF(_chrd, _ring, (_bft)->buf_idx) : \
_CHANNEL_RING_DEF_BUF(_chrd, _ring, (_bft)->buf_idx)
#define _CHANNEL_RING_BFT(_chrd, _ring, _idx) \
((_chrd)->chrd_bft_base_addr + ((_idx) * (_ring)->ring_bft_size))
#define _CHANNEL_RING_NEXT(_ring, _cur) \
(__improbable((_cur) + 1 == (_ring)->ring_num_slots) ? 0 : (_cur) + 1)
#define _CHANNEL_RING_IS_DEFUNCT(_chrd) \
(!(*(_chrd)->chrd_csm_flags & CSM_ACTIVE))
#define _CHANNEL_IS_DEFUNCT(_chd) \
(!(CHD_SCHEMA(_chd)->csm_flags & CSM_ACTIVE))
#define _CH_PKT_GET_FIRST_BUFLET(_pkt, _bft, _chrd, _ring) do { \
if (__probable((_pkt)->pkt_qum_buf.buf_idx != OBJ_IDX_NONE)) { \
(_bft) = &(_pkt)->pkt_qum_buf; \
} else if ((_pkt)->pkt_qum_buf.buf_nbft_idx != OBJ_IDX_NONE) { \
(_bft) = _CHANNEL_RING_BFT(_chrd, _ring, \
(_pkt)->pkt_qum_buf.buf_nbft_idx); \
} else { \
(_bft) = NULL; \
} \
} while (0)
/*
* A per process copy of the channel metadata redzone cookie.
*/
__attribute__((visibility("hidden")))
static uint64_t __os_ch_md_redzone_cookie = 0;
__attribute__((always_inline, visibility("hidden")))
static inline uint32_t
_num_tx_rings(struct ch_info *ci)
{
ring_id_t first, last;
first = _ring_id(ci, CHANNEL_FIRST_TX_RING);
last = _ring_id(ci, CHANNEL_LAST_TX_RING);
return (last - first) + 1;
}
__attribute__((always_inline, visibility("hidden")))
static inline uint32_t
_num_rx_rings(struct ch_info *ci)
{
ring_id_t first, last;
first = _ring_id(ci, CHANNEL_FIRST_RX_RING);
last = _ring_id(ci, CHANNEL_LAST_RX_RING);
return (last - first) + 1;
}
__attribute__((always_inline, visibility("hidden")))
static inline uint32_t
_num_allocator_rings(const struct __user_channel_schema *csm)
{
return csm->csm_allocator_ring_pairs << 1;
}
__attribute__((visibility("hidden")))
static void
os_channel_init_ring(struct channel_ring_desc *chrd,
struct channel *chd, uint32_t ring_index)
{
struct __user_channel_schema *csm = CHD_SCHEMA(chd);
struct __user_channel_ring *ring = NULL;
struct __slot_desc *sd = NULL;
nexus_meta_type_t md_type;
nexus_meta_subtype_t md_subtype;
ring = _CHANNEL_OFFSET(struct __user_channel_ring *, csm,
csm->csm_ring_ofs[ring_index].ring_off);
sd = _CHANNEL_OFFSET(struct __slot_desc *, csm,
csm->csm_ring_ofs[ring_index].sd_off);
md_type = csm->csm_md_type;
md_subtype = csm->csm_md_subtype;
if (ring == NULL || sd == NULL) {
SK_ABORT("Channel schema not valid");
/* NOTREACHED */
__builtin_unreachable();
} else if (!(md_type == NEXUS_META_TYPE_QUANTUM ||
md_type == NEXUS_META_TYPE_PACKET)) {
SK_ABORT_WITH_CAUSE("Metadata type unknown", md_type);
/* NOTREACHED */
__builtin_unreachable();
} else if (!(md_subtype == NEXUS_META_SUBTYPE_PAYLOAD ||
md_subtype == NEXUS_META_SUBTYPE_RAW)) {
SK_ABORT_WITH_CAUSE("Metadata subtype unknown", md_subtype);
/* NOTREACHED */
__builtin_unreachable();
}
chrd->chrd_slot_desc = sd;
chrd->chrd_csm_flags = &chd->chd_schema->csm_flags;
/* const overrides */
*(struct channel **)(uintptr_t)&chrd->chrd_channel = chd;
*(struct __user_channel_ring **)(uintptr_t)&chrd->chrd_ring = ring;
*(nexus_meta_type_t *)(uintptr_t)&chrd->chrd_md_type = md_type;
*(nexus_meta_subtype_t *)(uintptr_t)&chrd->chrd_md_subtype = md_subtype;
*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_shmem_base_addr =
CHD_INFO(chd)->cinfo_mem_base;
*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_def_buf_base_addr =
(mach_vm_address_t)((uintptr_t)ring + ring->ring_def_buf_base);
*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_md_base_addr =
(mach_vm_address_t)((uintptr_t)ring + ring->ring_md_base);
*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_sd_base_addr =
(mach_vm_address_t)((uintptr_t)ring + ring->ring_sd_base);
*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_bft_base_addr =
(mach_vm_address_t)((uintptr_t)ring + ring->ring_bft_base);
*(mach_vm_address_t *)(uintptr_t)&chrd->chrd_large_buf_base_addr =
(mach_vm_address_t)((uintptr_t)ring + ring->ring_large_buf_base);
*(uint32_t *)(uintptr_t)&chrd->chrd_max_bufs =
CHD_PARAMS(chd)->nxp_max_frags;
}
__attribute__((always_inline, visibility("hidden")))
static inline mach_vm_address_t
_initialize_metadata_address(const channel_ring_t chrd,
struct __user_quantum *q, uint16_t *bdoff)
{
int i;
struct __user_buflet *ubft0;
const struct __user_channel_ring *ring = chrd->chrd_ring;
switch (chrd->chrd_md_type) {
case NEXUS_META_TYPE_PACKET: {
struct __user_buflet *ubft, *pbft;
struct __user_packet *p = (struct __user_packet *)q;
uint16_t bcnt = p->pkt_bufs_cnt;
uint16_t bmax = p->pkt_bufs_max;
_CASSERT(sizeof(p->pkt_qum_buf.buf_addr) ==
sizeof(mach_vm_address_t));
/*
* In the event of a defunct, we'd be accessing zero-filled
* memory and end up with 0 for bcnt or bmax.
*/
if (__improbable((bcnt == 0) || (bmax == 0))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT("bad bufcnt");
/* NOTREACHED */
__builtin_unreachable();
}
return 0;
}
_PKT_BUFCNT_VERIFY(chrd, bcnt, bmax);
_CH_PKT_GET_FIRST_BUFLET(p, ubft, chrd, ring);
if (__improbable(ubft == NULL)) {
SK_ABORT("bad packet: no buflet");
/* NOTREACHED */
__builtin_unreachable();
}
/*
* special handling for empty packet buflet.
*/
if (__improbable(p->pkt_qum_buf.buf_idx == OBJ_IDX_NONE)) {
*__DECONST(mach_vm_address_t *,
&p->pkt_qum_buf.buf_addr) = 0;
*__DECONST(mach_vm_address_t *,
&p->pkt_qum_buf.buf_nbft_addr) =
(mach_vm_address_t)ubft;
}
ubft0 = ubft;
for (i = 0; (i < bcnt) && (ubft != NULL); i++) {
pbft = ubft;
if (__probable(pbft->buf_idx != OBJ_IDX_NONE)) {
*(mach_vm_address_t *)(uintptr_t)
&(pbft->buf_addr) = _CHANNEL_RING_BUF(chrd,
ring, pbft);
} else {
*(mach_vm_address_t *)(uintptr_t)
&(pbft->buf_addr) = NULL;
}
if (pbft->buf_nbft_idx != OBJ_IDX_NONE) {
ubft = _CHANNEL_RING_BFT(chrd, ring,
pbft->buf_nbft_idx);
} else {
ubft = NULL;
}
*__DECONST(mach_vm_address_t *, &pbft->buf_nbft_addr) =
(mach_vm_address_t)ubft;
}
if (__improbable(pbft->buf_nbft_idx != OBJ_IDX_NONE)) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT("non terminating buflet chain");
/* NOTREACHED */
__builtin_unreachable();
}
return 0;
}
if (__improbable(i != bcnt)) {
SK_ABORT_WITH_CAUSE("invalid buflet count", bcnt);
/* NOTREACHED */
__builtin_unreachable();
}
break;
}
default:
ubft0 = &q->qum_buf[0];
_CASSERT(sizeof(q->qum_buf[0].buf_addr) ==
sizeof(mach_vm_address_t));
/* immutable: compute pointers from the index */
*(mach_vm_address_t *)(uintptr_t)&ubft0->buf_addr =
_CHANNEL_RING_BUF(chrd, ring, ubft0);
break;
}
/* return address and offset of the first buffer */
*bdoff = ubft0->buf_doff;
return ubft0->buf_addr;
}
/*
* _slot_index_is_valid
* - verify that the slot index is within valid bounds
* - if the head is less than (or equal to) the tail (case A below)
* head <= valid < tail
* - if the head is greater than the tail (case B below)
* valid < tail
* or
* head <= valid < num_slots
*
* case A: x x x x x x x H o o o o o T x x x x x x
* case B: o o o o o T x x x x H o o o o o o o o o
*
* 'H' - head
* 'T' - tail
* 'x' - invalid
* 'o' - valid
*/
__attribute__((always_inline, visibility("hidden")))
static inline int
_slot_index_is_valid(const struct __user_channel_ring *ring, slot_idx_t idx)
{
int is_valid = 0;
if (ring->ring_head <= ring->ring_tail) {
if (__probable(idx >= ring->ring_head && idx < ring->ring_tail)) {
is_valid = 1;
}
} else {
if (__probable(idx < ring->ring_tail ||
(idx >= ring->ring_head && idx < ring->ring_num_slots))) {
is_valid = 1;
}
}
return is_valid;
}
channel_t
os_channel_create_extended(const uuid_t uuid, const nexus_port_t port,
const ring_dir_t dir, const ring_id_t ring, const channel_attr_t cha)
{
uint32_t num_tx_rings, num_rx_rings, num_allocator_rings;
uint32_t ring_offset, ring_index, num_event_rings, num_large_buf_alloc_rings;
struct __user_channel_schema *ucs;
struct channel *chd = NULL;
struct ch_info *ci = NULL;
struct ch_init init;
int i, fd = -1;
int err = 0;
size_t chd_sz;
SK_ALIGN64_CASSERT(struct ch_info, cinfo_mem_map_size);
switch (dir) {
case CHANNEL_DIR_TX_RX:
case CHANNEL_DIR_TX:
case CHANNEL_DIR_RX:
break;
default:
err = EINVAL;
goto done;
}
ci = malloc(CHD_INFO_SIZE);
if (ci == NULL) {
err = errno = ENOMEM;
goto done;
}
bzero(ci, CHD_INFO_SIZE);
bzero(&init, sizeof(init));
init.ci_version = CHANNEL_INIT_CURRENT_VERSION;
if (cha != NULL) {
if (cha->cha_exclusive != 0) {
init.ci_ch_mode |= CHMODE_EXCLUSIVE;
}
if (cha->cha_user_packet_pool != 0) {
init.ci_ch_mode |= CHMODE_USER_PACKET_POOL;
}
if (cha->cha_nexus_defunct_ok != 0) {
init.ci_ch_mode |= CHMODE_DEFUNCT_OK;
}
if (cha->cha_enable_event_ring != 0) {
/* User packet pool is required for event rings */
if (cha->cha_user_packet_pool == 0) {
err = EINVAL;
goto done;
}
init.ci_ch_mode |= CHMODE_EVENT_RING;
}
if (cha->cha_monitor != 0) {
if (dir == CHANNEL_DIR_TX_RX) {
init.ci_ch_mode |= CHMODE_MONITOR;
} else if (dir == CHANNEL_DIR_TX) {
init.ci_ch_mode |= CHMODE_MONITOR_TX;
} else if (dir == CHANNEL_DIR_RX) {
init.ci_ch_mode |= CHMODE_MONITOR_RX;
}
if (cha->cha_monitor == CHANNEL_MONITOR_NO_COPY) {
init.ci_ch_mode |= CHMODE_MONITOR_NO_COPY;
}
}
if (cha->cha_filter != 0) {
init.ci_ch_mode |= CHMODE_FILTER;
}
if (cha->cha_low_latency != 0) {
init.ci_ch_mode |= CHMODE_LOW_LATENCY;
}
init.ci_key_len = cha->cha_key_len;
init.ci_key = cha->cha_key;
init.ci_tx_lowat = cha->cha_tx_lowat;
init.ci_rx_lowat = cha->cha_rx_lowat;
}
init.ci_ch_ring_id = ring;
init.ci_nx_port = port;
bcopy(uuid, init.ci_nx_uuid, sizeof(uuid_t));
fd = __channel_open(&init, sizeof(init));
if (fd == -1) {
err = errno;
goto done;
}
err = __channel_get_info(fd, ci, CHD_INFO_SIZE);
if (err != 0) {
err = errno;
goto done;
}
ucs = _CHANNEL_SCHEMA(ci->cinfo_mem_base, ci->cinfo_schema_offset);
num_tx_rings = _num_tx_rings(ci); /* # of channel tx rings */
num_rx_rings = _num_rx_rings(ci); /* # of channel rx rings */
num_allocator_rings = _num_allocator_rings(ucs);
num_event_rings = ucs->csm_num_event_rings;
num_large_buf_alloc_rings = ucs->csm_large_buf_alloc_rings;
/*
* if the user requested packet allocation mode for channel, then
* check that channel was opened in packet allocation mode and
* allocator rings were created.
*/
if ((init.ci_ch_mode & CHMODE_USER_PACKET_POOL) &&
((num_allocator_rings < 2) ||
!(ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL))) {
err = errno = ENXIO;
goto done;
}
if ((init.ci_ch_mode & CHMODE_EVENT_RING) && ((num_event_rings == 0) ||
!(ci->cinfo_ch_mode & CHMODE_EVENT_RING))) {
err = errno = ENXIO;
goto done;
}
chd_sz = CHD_SIZE(num_tx_rings + num_rx_rings + num_allocator_rings +
num_event_rings + num_large_buf_alloc_rings);
chd = malloc(chd_sz);
if (chd == NULL) {
err = errno = ENOMEM;
goto done;
}
bzero(chd, chd_sz);
chd->chd_fd = fd;
chd->chd_guard = init.ci_guard;
/* claim ch_info (will be freed along with the channel itself) */
CHD_INFO(chd) = ci;
ci = NULL;
/* const override */
*(struct __user_channel_schema **)(uintptr_t)&chd->chd_schema = ucs;
/* make sure we're running on the right kernel */
_SCHEMA_VER_VERIFY(chd);
*(nexus_meta_type_t *)&chd->chd_md_type = CHD_SCHEMA(chd)->csm_md_type;
*(nexus_meta_subtype_t *)&chd->chd_md_subtype =
CHD_SCHEMA(chd)->csm_md_subtype;
if (CHD_SCHEMA(chd)->csm_stats_ofs != 0) {
*(void **)(uintptr_t)&chd->chd_nx_stats =
_CHANNEL_OFFSET(void *, CHD_INFO(chd)->cinfo_mem_base,
CHD_SCHEMA(chd)->csm_stats_ofs);
}
if (CHD_SCHEMA(chd)->csm_flowadv_ofs != 0) {
*(struct __flowadv_entry **)(uintptr_t)&chd->chd_nx_flowadv =
_CHANNEL_OFFSET(struct __flowadv_entry *,
CHD_INFO(chd)->cinfo_mem_base,
CHD_SCHEMA(chd)->csm_flowadv_ofs);
}
if (CHD_SCHEMA(chd)->csm_nexusadv_ofs != 0) {
struct __kern_nexus_adv_metadata *adv_md;
*(struct __kern_nexus_adv_metadata **)
(uintptr_t)&chd->chd_nx_adv =
_CHANNEL_OFFSET(struct __kern_nexus_adv_metadata *,
CHD_INFO(chd)->cinfo_mem_base,
CHD_SCHEMA(chd)->csm_nexusadv_ofs);
adv_md = CHD_NX_ADV_MD(chd);
if (adv_md->knam_version != NX_ADVISORY_MD_CURRENT_VERSION &&
!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("nexus advisory metadata version"
" mismatch", NX_ADVISORY_MD_CURRENT_VERSION);
/* NOTREACHED */
__builtin_unreachable();
}
if (chd->chd_nx_adv->knam_type == NEXUS_ADVISORY_TYPE_NETIF) {
struct netif_nexus_advisory *netif_adv;
netif_adv = CHD_NX_ADV_NETIF(adv_md);
if (netif_adv->nna_version !=
NX_NETIF_ADVISORY_CURRENT_VERSION &&
!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("nexus advisory "
"version mismatch for netif",
NX_NETIF_ADVISORY_CURRENT_VERSION);
/* NOTREACHED */
__builtin_unreachable();
}
} else if (chd->chd_nx_adv->knam_type ==
NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
struct sk_nexusadv *fsw_adv;
fsw_adv = CHD_NX_ADV_FSW(adv_md);
if (fsw_adv->nxadv_ver !=
NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION &&
!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("nexus advisory "
"version mismatch for flowswitch",
NX_FLOWSWITCH_ADVISORY_CURRENT_VERSION);
/* NOTREACHED */
__builtin_unreachable();
}
} else if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("nexus advisory metadata type"
" unknown", NX_ADVISORY_MD_CURRENT_VERSION);
/* NOTREACHED */
__builtin_unreachable();
}
}
if (cha != NULL) {
os_channel_info2attr(chd, cha);
}
ring_offset = 0;
for (i = 0; i < num_tx_rings; i++) {
ring_index = ring_offset + i;
os_channel_init_ring(&chd->chd_rings[ring_index], chd,
ring_index);
}
ring_offset += num_tx_rings;
for (i = 0; i < num_rx_rings; i++) {
ring_index = ring_offset + i;
os_channel_init_ring(&chd->chd_rings[ring_index], chd,
ring_index);
}
ring_offset += num_rx_rings;
for (i = 0; i < num_allocator_rings; i++) {
ring_index = ring_offset + i;
os_channel_init_ring(&chd->chd_rings[ring_index], chd,
ring_index);
}
ring_offset += num_allocator_rings;
for (i = 0; i < num_event_rings; i++) {
ring_index = ring_offset + i;
os_channel_init_ring(&chd->chd_rings[ring_index], chd,
ring_index);
}
ring_offset += num_event_rings;
for (i = 0; i < num_large_buf_alloc_rings; i++) {
ring_index = ring_offset + i;
os_channel_init_ring(&chd->chd_rings[ring_index], chd,
ring_index);
}
if (init.ci_ch_mode & CHMODE_USER_PACKET_POOL) {
chd->chd_sync_flags = CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_FREE;
*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
num_tx_rings + num_rx_rings;
if (num_allocator_rings > 2) {
chd->chd_sync_flags |= CHANNEL_SYNCF_ALLOC_BUF;
*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
chd->chd_alloc_ring_idx + 1;
*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
chd->chd_buf_alloc_ring_idx + 1;
*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
chd->chd_free_ring_idx + 1;
} else {
*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
CHD_RING_IDX_NONE;
*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
CHD_RING_IDX_NONE;
*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
chd->chd_alloc_ring_idx + 1;
}
if (num_large_buf_alloc_rings > 0) {
*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
num_tx_rings + num_rx_rings + num_allocator_rings +
num_event_rings;
} else {
*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
CHD_RING_IDX_NONE;
}
} else {
*__DECONST(uint8_t *, &chd->chd_alloc_ring_idx) =
CHD_RING_IDX_NONE;
*__DECONST(uint8_t *, &chd->chd_free_ring_idx) =
CHD_RING_IDX_NONE;
*__DECONST(uint8_t *, &chd->chd_buf_alloc_ring_idx) =
CHD_RING_IDX_NONE;
*__DECONST(uint8_t *, &chd->chd_buf_free_ring_idx) =
CHD_RING_IDX_NONE;
*__DECONST(uint8_t *, &chd->chd_large_buf_alloc_ring_idx) =
CHD_RING_IDX_NONE;
}
if (__os_ch_md_redzone_cookie == 0) {
__os_ch_md_redzone_cookie =
CHD_SCHEMA(chd)->csm_md_redzone_cookie;
}
/* ensure all stores are globally visible */
os_atomic_thread_fence(seq_cst);
done:
if (err != 0) {
if (fd != -1) {
(void) guarded_close_np(fd, &init.ci_guard);
}
if (chd != NULL) {
if (CHD_INFO(chd) != NULL) {
free(CHD_INFO(chd));
CHD_INFO(chd) = NULL;
}
free(chd);
chd = NULL;
}
if (ci != NULL) {
free(ci);
ci = NULL;
}
errno = err;
}
return chd;
}
channel_t
os_channel_create(const uuid_t uuid, const nexus_port_t port)
{
return os_channel_create_extended(uuid, port, CHANNEL_DIR_TX_RX,
CHANNEL_RING_ID_ANY, NULL);
}
int
os_channel_get_fd(const channel_t chd)
{
return chd->chd_fd;
}
int
os_channel_read_attr(const channel_t chd, channel_attr_t cha)
{
int err;
if ((err = __channel_get_info(chd->chd_fd, CHD_INFO(chd),
CHD_INFO_SIZE)) == 0) {
os_channel_info2attr(chd, cha);
}
return err;
}
int
os_channel_write_attr(const channel_t chd, channel_attr_t cha)
{
int err = 0;
if (CHD_INFO(chd)->cinfo_tx_lowat.cet_unit !=
cha->cha_tx_lowat.cet_unit ||
CHD_INFO(chd)->cinfo_tx_lowat.cet_value !=
cha->cha_tx_lowat.cet_value) {
if ((err = __channel_set_opt(chd->chd_fd, CHOPT_TX_LOWAT_THRESH,
&cha->cha_tx_lowat, sizeof(cha->cha_tx_lowat))) != 0) {
goto done;
}
/* update local copy */
CHD_INFO(chd)->cinfo_tx_lowat = cha->cha_tx_lowat;
}
if (CHD_INFO(chd)->cinfo_rx_lowat.cet_unit !=
cha->cha_rx_lowat.cet_unit ||
CHD_INFO(chd)->cinfo_rx_lowat.cet_value !=
cha->cha_rx_lowat.cet_value) {
if ((err = __channel_set_opt(chd->chd_fd, CHOPT_RX_LOWAT_THRESH,
&cha->cha_rx_lowat, sizeof(cha->cha_rx_lowat))) != 0) {
goto done;
}
/* update local copy */
CHD_INFO(chd)->cinfo_rx_lowat = cha->cha_rx_lowat;
}
done:
return err;
}
int
os_channel_read_nexus_extension_info(const channel_t chd, nexus_type_t *nt,
uint64_t *ext)
{
struct nxprov_params *nxp;
nxp = &CHD_INFO(chd)->cinfo_nxprov_params;
if (nt != NULL) {
*nt = nxp->nxp_type;
}
if (ext != NULL) {
*ext = (uint64_t)nxp->nxp_extensions;
}
return 0;
}
int
os_channel_sync(const channel_t chd, const sync_mode_t mode)
{
if (__improbable(mode != CHANNEL_SYNC_TX && mode != CHANNEL_SYNC_RX)) {
return EINVAL;
}
return __channel_sync(chd->chd_fd, mode,
(mode == CHANNEL_SYNC_TX) ? chd->chd_sync_flags :
(chd->chd_sync_flags &
~(CHANNEL_SYNCF_ALLOC | CHANNEL_SYNCF_ALLOC_BUF)));
}
void
os_channel_destroy(channel_t chd)
{
if (chd->chd_fd != -1) {
(void) guarded_close_np(chd->chd_fd, &chd->chd_guard);
}
if (CHD_INFO(chd) != NULL) {
free(CHD_INFO(chd));
CHD_INFO(chd) = NULL;
}
free(chd);
}
int
os_channel_is_defunct(channel_t chd)
{
return _CHANNEL_IS_DEFUNCT(chd);
}
__attribute__((always_inline, visibility("hidden")))
static inline ring_id_t
_ring_id(struct ch_info *cinfo, const ring_id_type_t type)
{
ring_id_t rid = CHANNEL_RING_ID_ANY; /* make it crash */
switch (type) {
case CHANNEL_FIRST_TX_RING:
rid = cinfo->cinfo_first_tx_ring;
break;
case CHANNEL_LAST_TX_RING:
rid = cinfo->cinfo_last_tx_ring;
break;
case CHANNEL_FIRST_RX_RING:
rid = cinfo->cinfo_first_rx_ring;
break;
case CHANNEL_LAST_RX_RING:
rid = cinfo->cinfo_last_rx_ring;
break;
}
return rid;
}
ring_id_t
os_channel_ring_id(const channel_t chd, const ring_id_type_t type)
{
return _ring_id(CHD_INFO(chd), type);
}
channel_ring_t
os_channel_tx_ring(const channel_t chd, const ring_id_t rid)
{
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
ci->cinfo_ch_ring_id != rid) ||
rid < _ring_id(ci, CHANNEL_FIRST_TX_RING) ||
rid > _ring_id(ci, CHANNEL_LAST_TX_RING))) {
return NULL;
}
return &chd->chd_rings[rid - _ring_id(ci, CHANNEL_FIRST_TX_RING)];
}
channel_ring_t
os_channel_rx_ring(const channel_t chd, const ring_id_t rid)
{
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ci->cinfo_ch_ring_id != CHANNEL_RING_ID_ANY &&
ci->cinfo_ch_ring_id != rid) ||
rid < _ring_id(ci, CHANNEL_FIRST_RX_RING) ||
rid > _ring_id(ci, CHANNEL_LAST_RX_RING))) {
return NULL;
}
return &chd->chd_rings[_num_tx_rings(ci) + /* add tx rings */
(rid - _ring_id(ci, CHANNEL_FIRST_RX_RING))];
}
/*
* Return 1 if we have pending transmissions in the tx ring. When everything
* is complete ring->ring_head == ring->ring_khead.
*/
int
os_channel_pending(const channel_ring_t chrd)
{
struct __user_channel_ring *ring =
__DECONST(struct __user_channel_ring *, chrd->chrd_ring);
return ring->ring_head != ring->ring_khead;
}
uint64_t
os_channel_ring_sync_time(const channel_ring_t chrd)
{
return chrd->chrd_ring->ring_sync_time;
}
uint64_t
os_channel_ring_notify_time(const channel_ring_t chrd)
{
return chrd->chrd_ring->ring_notify_time;
}
uint32_t
os_channel_available_slot_count(const channel_ring_t chrd)
{
const struct __user_channel_ring *ring = chrd->chrd_ring;
uint32_t count;
int n;
if (ring->ring_kind == CR_KIND_TX) {
n = ring->ring_head - ring->ring_khead;
if (n < 0) {
n += ring->ring_num_slots;
}
count = (ring->ring_num_slots - n - 1);
} else {
n = ring->ring_tail - ring->ring_head;
if (n < 0) {
n += ring->ring_num_slots;
}
count = n;
}
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? 0 : count;
}
int
os_channel_advance_slot(channel_ring_t chrd, const channel_slot_t slot)
{
struct __user_channel_ring *ring =
__DECONST(struct __user_channel_ring *, chrd->chrd_ring);
slot_idx_t idx;
int err;
idx = _SLOT_INDEX(chrd, slot);
if (__probable(_slot_index_is_valid(ring, idx))) {
ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
err = 0;
} else {
err = (_CHANNEL_RING_IS_DEFUNCT(chrd) ? ENXIO : EINVAL);
}
return err;
}
channel_slot_t
os_channel_get_next_slot(const channel_ring_t chrd, const channel_slot_t slot0,
slot_prop_t *prop)
{
const struct __user_channel_ring *ring = chrd->chrd_ring;
const struct __slot_desc *slot;
slot_idx_t idx;
if (__probable(slot0 != NULL)) {
idx = _SLOT_INDEX(chrd, slot0);
if (__probable(_slot_index_is_valid(ring, idx))) {
idx = _CHANNEL_RING_NEXT(ring, idx);
} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
/* slot is out of bounds */
SK_ABORT_WITH_CAUSE("Index out of bounds in gns", idx);
/* NOTREACHED */
__builtin_unreachable();
} else {
/*
* In case of a defunct, pretend as if we've
* advanced to the last slot; this will result
* in a NULL slot below.
*/
idx = ring->ring_tail;
}
} else {
idx = ring->ring_head;
}
if (__probable(idx != ring->ring_tail)) {
slot = &chrd->chrd_slot_desc[idx];
} else {
/* we just advanced to the last slot */
slot = NULL;
}
if (__probable(slot != NULL)) {
uint16_t ring_kind = ring->ring_kind;
struct __user_quantum *q;
mach_vm_address_t baddr;
uint16_t bdoff;
if (__improbable((ring_kind == CR_KIND_TX) &&
(CHD_INFO(chrd->chrd_channel)->cinfo_ch_mode &
CHMODE_USER_PACKET_POOL))) {
if (SD_VALID_METADATA(SLOT_DESC_USD(slot))) {
SK_ABORT_WITH_CAUSE("Tx slot has attached "
"metadata", idx);
/* NOTREACHED */
__builtin_unreachable();
}
if (prop != NULL) {
prop->sp_len = 0;
prop->sp_flags = 0;
prop->sp_buf_ptr = 0;
prop->sp_mdata_ptr = 0;
}
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
NULL : (channel_slot_t)slot;
}
_SLOT_DESC_VERIFY(chrd, SLOT_DESC_USD(slot));
q = _SLOT_METADATA(chrd, ring, idx);
_METADATA_VERIFY(chrd, q);
baddr = _initialize_metadata_address(chrd, q, &bdoff);
if (__improbable(baddr == 0)) {
return NULL;
}
/* No multi-buflet support for slot based interface */
if (__probable(prop != NULL)) {
/* immutable: slot index */
prop->sp_idx = idx;
prop->sp_flags = 0;
prop->sp_buf_ptr = baddr + bdoff;
prop->sp_mdata_ptr = q;
/* reset slot length if this is to be used for tx */
prop->sp_len = (ring_kind == CR_KIND_TX) ?
ring->ring_def_buf_size : q->qum_len;
}
}
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ?
NULL : (channel_slot_t)slot;
}
void
os_channel_set_slot_properties(const channel_ring_t chrd,
const channel_slot_t slot, const slot_prop_t *prop)
{
const struct __user_channel_ring *ring = chrd->chrd_ring;
slot_idx_t idx = _SLOT_INDEX(chrd, slot);
if (__probable(_slot_index_is_valid(ring, idx))) {
struct __user_quantum *q;
_METADATA_VERIFY(chrd, prop->sp_mdata_ptr);
_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
/*
* In the event of a defunct, we'd be accessing zero-filled
* memory; this is fine we ignore all changes made to the
* region at that time.
*/
q = _SLOT_METADATA(chrd, ring, idx);
q->qum_len = prop->sp_len;
switch (chrd->chrd_md_type) {
case NEXUS_META_TYPE_PACKET: {
struct __user_packet *p = (struct __user_packet *)q;
/* No multi-buflet support for slot based interface */
p->pkt_qum_buf.buf_dlen = prop->sp_len;
p->pkt_qum_buf.buf_doff = 0;
break;
}
default:
q->qum_buf[0].buf_dlen = prop->sp_len;
q->qum_buf[0].buf_doff = 0;
break;
}
} else if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
/* slot is out of bounds */
SK_ABORT_WITH_CAUSE("Index out of bounds in ssp", idx);
/* NOTREACHED */
__builtin_unreachable();
}
}
packet_t
os_channel_slot_get_packet(const channel_ring_t chrd, const channel_slot_t slot)
{
const struct __user_channel_ring *ring = chrd->chrd_ring;
struct __user_quantum *q = NULL;
if (__probable(slot != NULL)) {
slot_idx_t idx = _SLOT_INDEX(chrd, slot);
if (__improbable(!_slot_index_is_valid(ring, idx)) &&
!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
/* slot is out of bounds */
SK_ABORT_WITH_CAUSE("Index out of bounds in sgp", idx);
/* NOTREACHED */
__builtin_unreachable();
}
if (__probable(SD_VALID_METADATA(_SLOT_DESC(chrd, idx)))) {
obj_idx_t midx;
q = _SLOT_METADATA(chrd, ring, idx);
_METADATA_VERIFY(chrd, q);
/*
* In the event of a defunct, we'd be accessing
* zero-filed memory and end up with 0 for midx;
* this is fine since we ignore all changes made
* to the region at that time.
*/
midx = METADATA_IDX(q);
_SLOT_METADATA_IDX_VERIFY(chrd, q, midx);
}
}
return (q == NULL) ? 0 :
SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
}
void *
os_channel_get_stats_region(const channel_t chd, const channel_stats_id_t id)
{
void *sp = CHD_NX_STATS(chd);
struct __nx_stats_fsw *nxs_fsw;
void *ptr = NULL;
/* we currently deal only with flowswitch */
if (sp == NULL ||
CHD_SCHEMA(chd)->csm_stats_type != NEXUS_STATS_TYPE_FSW) {
return NULL;
}
nxs_fsw = sp;
switch (id) {
case CHANNEL_STATS_ID_IP:
ptr = &nxs_fsw->nxs_ipstat;
break;
case CHANNEL_STATS_ID_IP6:
ptr = &nxs_fsw->nxs_ip6stat;
break;
case CHANNEL_STATS_ID_TCP:
ptr = &nxs_fsw->nxs_tcpstat;
break;
case CHANNEL_STATS_ID_UDP:
ptr = &nxs_fsw->nxs_udpstat;
break;
case CHANNEL_STATS_ID_QUIC:
ptr = &nxs_fsw->nxs_quicstat;
break;
default:
ptr = NULL;
break;
}
return ptr;
}
void *
os_channel_get_advisory_region(const channel_t chd)
{
struct __kern_nexus_adv_metadata *adv_md;
/*
* To be backward compatible this API will only return
* the advisory region for flowswitch.
*/
adv_md = CHD_NX_ADV_MD(chd);
if (adv_md == NULL ||
adv_md->knam_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH) {
return NULL;
}
return CHD_NX_ADV_FSW(adv_md);
}
__attribute__((always_inline, visibility("hidden")))
static inline int
_flowadv_id_equal(struct __flowadv_entry *fe, uuid_t id)
{
/*
* Anticipate a nicely (8-bytes) aligned UUID from
* caller; the one in fae_id is always 8-byte aligned.
*/
if (__probable(IS_P2ALIGNED(id, sizeof(uint64_t)))) {
uint64_t *id_64 = (uint64_t *)(uintptr_t)id;
return fe->fae_id_64[0] == id_64[0] &&
fe->fae_id_64[1] == id_64[1];
} else if (__probable(IS_P2ALIGNED(id, sizeof(uint32_t)))) {
uint32_t *id_32 = (uint32_t *)(uintptr_t)id;
return fe->fae_id_32[0] == id_32[0] &&
fe->fae_id_32[1] == id_32[1] &&
fe->fae_id_32[2] == id_32[2] &&
fe->fae_id_32[3] == id_32[3];
}
return UUID_COMPARE(fe->fae_id, id);
}
int
os_channel_flow_admissible(const channel_ring_t chrd, uuid_t flow_id,
const flowadv_idx_t flow_index)
{
const struct __user_channel_ring *ring = chrd->chrd_ring;
const struct channel *chd = chrd->chrd_channel;
struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
/*
* Currently, flow advisory is on a per-nexus port basis.
* To anticipate for future requirements, we use the ring
* as parameter instead, even though we use it only to
* check if this is a TX ring for now.
*/
if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
return ENXIO;
} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
return EINVAL;
}
/*
* Rather than checking if the UUID is all zeroes, check
* against fae_flags since the presence of FLOWADV_VALID
* means fae_id is non-zero. This avoids another round of
* comparison against zeroes.
*/
fe = &CHD_NX_FLOWADV(chd)[flow_index];
if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
return ENOENT;
}
return __improbable((fe->fae_flags & FLOWADVF_SUSPENDED) != 0) ?
ENOBUFS: 0;
}
int
os_channel_flow_adv_get_ce_count(const channel_ring_t chrd, uuid_t flow_id,
const flowadv_idx_t flow_index, uint32_t *ce_cnt, uint32_t *pkt_cnt)
{
const struct __user_channel_ring *ring = chrd->chrd_ring;
const struct channel *chd = chrd->chrd_channel;
struct __flowadv_entry *fe = CHD_NX_FLOWADV(chd);
/*
* Currently, flow advisory is on a per-nexus port basis.
* To anticipate for future requirements, we use the ring
* as parameter instead, even though we use it only to
* check if this is a TX ring for now.
*/
if (__improbable(CHD_NX_FLOWADV(chd) == NULL)) {
return ENXIO;
} else if (__improbable(ring->ring_kind != CR_KIND_TX ||
flow_index >= CHD_PARAMS(chd)->nxp_flowadv_max)) {
return EINVAL;
}
/*
* Rather than checking if the UUID is all zeroes, check
* against fae_flags since the presence of FLOWADV_VALID
* means fae_id is non-zero. This avoids another round of
* comparison against zeroes.
*/
fe = &CHD_NX_FLOWADV(chd)[flow_index];
if (__improbable(fe->fae_flags == 0 || !_flowadv_id_equal(fe, flow_id))) {
return ENOENT;
}
*ce_cnt = fe->fae_ce_cnt;
*pkt_cnt = fe->fae_pkt_cnt;
return 0;
}
channel_attr_t
os_channel_attr_create(void)
{
struct channel_attr *cha;
cha = malloc(sizeof(*cha));
if (cha != NULL) {
bzero(cha, sizeof(*cha));
}
return cha;
}
channel_attr_t
os_channel_attr_clone(const channel_attr_t cha)
{
struct channel_attr *ncha;
ncha = os_channel_attr_create();
if (ncha != NULL && cha != NULL) {
bcopy(cha, ncha, sizeof(*ncha));
ncha->cha_key = NULL;
ncha->cha_key_len = 0;
if (cha->cha_key != NULL && cha->cha_key_len != 0 &&
os_channel_attr_set_key(ncha, cha->cha_key,
cha->cha_key_len) != 0) {
os_channel_attr_destroy(ncha);
ncha = NULL;
}
}
return ncha;
}
int
os_channel_attr_set(const channel_attr_t cha, const channel_attr_type_t type,
const uint64_t value)
{
int err = 0;
switch (type) {
case CHANNEL_ATTR_TX_RINGS:
case CHANNEL_ATTR_RX_RINGS:
case CHANNEL_ATTR_TX_SLOTS:
case CHANNEL_ATTR_RX_SLOTS:
case CHANNEL_ATTR_SLOT_BUF_SIZE:
case CHANNEL_ATTR_SLOT_META_SIZE:
case CHANNEL_ATTR_NEXUS_EXTENSIONS:
case CHANNEL_ATTR_NEXUS_MHINTS:
case CHANNEL_ATTR_NEXUS_IFINDEX:
case CHANNEL_ATTR_NEXUS_STATS_SIZE:
case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
case CHANNEL_ATTR_NEXUS_META_TYPE:
case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
case CHANNEL_ATTR_NEXUS_ADV_SIZE:
case CHANNEL_ATTR_MAX_FRAGS:
case CHANNEL_ATTR_NUM_BUFFERS:
case CHANNEL_ATTR_LARGE_BUF_SIZE:
err = ENOTSUP;
break;
case CHANNEL_ATTR_EXCLUSIVE:
cha->cha_exclusive = (uint32_t)value;
break;
case CHANNEL_ATTR_NO_AUTO_SYNC:
if (value == 0) {
err = ENOTSUP;
}
break;
case CHANNEL_ATTR_MONITOR:
switch (value) {
case CHANNEL_MONITOR_OFF:
case CHANNEL_MONITOR_NO_COPY:
case CHANNEL_MONITOR_COPY:
cha->cha_monitor = (uint32_t)value;
goto done;
}
err = EINVAL;
break;
case CHANNEL_ATTR_TX_LOWAT_UNIT:
case CHANNEL_ATTR_RX_LOWAT_UNIT:
switch (value) {
case CHANNEL_THRESHOLD_UNIT_BYTES:
case CHANNEL_THRESHOLD_UNIT_SLOTS:
if (type == CHANNEL_ATTR_TX_LOWAT_UNIT) {
cha->cha_tx_lowat.cet_unit =
(channel_threshold_unit_t)value;
} else {
cha->cha_rx_lowat.cet_unit =
(channel_threshold_unit_t)value;
}
goto done;
}
err = EINVAL;
break;
case CHANNEL_ATTR_TX_LOWAT_VALUE:
cha->cha_tx_lowat.cet_value = (uint32_t)value;
break;
case CHANNEL_ATTR_RX_LOWAT_VALUE:
cha->cha_rx_lowat.cet_value = (uint32_t)value;
break;
case CHANNEL_ATTR_USER_PACKET_POOL:
cha->cha_user_packet_pool = (value != 0);
break;
case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
cha->cha_nexus_defunct_ok = (value != 0);
break;
case CHANNEL_ATTR_FILTER:
cha->cha_filter = (uint32_t)value;
break;
case CHANNEL_ATTR_EVENT_RING:
cha->cha_enable_event_ring = (value != 0);
break;
case CHANNEL_ATTR_LOW_LATENCY:
cha->cha_low_latency = (value != 0);
break;
default:
err = EINVAL;
break;
}
done:
return err;
}
int
os_channel_attr_set_key(const channel_attr_t cha, const void *key,
const uint32_t key_len)
{
int err = 0;
if ((key == NULL && key_len != 0) || (key != NULL && key_len == 0) ||
(key_len != 0 && key_len > NEXUS_MAX_KEY_LEN)) {
err = EINVAL;
goto done;
}
cha->cha_key_len = 0;
if (key_len == 0 && cha->cha_key != NULL) {
free(cha->cha_key);
cha->cha_key = NULL;
} else if (key != NULL && key_len != 0) {
if (cha->cha_key != NULL) {
free(cha->cha_key);
}
if ((cha->cha_key = malloc(key_len)) == NULL) {
err = ENOMEM;
goto done;
}
cha->cha_key_len = key_len;
bcopy(key, cha->cha_key, key_len);
}
done:
return err;
}
int
os_channel_attr_get(const channel_attr_t cha, const channel_attr_type_t type,
uint64_t *value)
{
int err = 0;
switch (type) {
case CHANNEL_ATTR_TX_RINGS:
*value = cha->cha_tx_rings;
break;
case CHANNEL_ATTR_RX_RINGS:
*value = cha->cha_rx_rings;
break;
case CHANNEL_ATTR_TX_SLOTS:
*value = cha->cha_tx_slots;
break;
case CHANNEL_ATTR_RX_SLOTS:
*value = cha->cha_rx_slots;
break;
case CHANNEL_ATTR_SLOT_BUF_SIZE:
*value = cha->cha_buf_size;
break;
case CHANNEL_ATTR_SLOT_META_SIZE:
*value = cha->cha_meta_size;
break;
case CHANNEL_ATTR_NEXUS_STATS_SIZE:
*value = cha->cha_stats_size;
break;
case CHANNEL_ATTR_NEXUS_FLOWADV_MAX:
*value = cha->cha_flowadv_max;
break;
case CHANNEL_ATTR_EXCLUSIVE:
*value = cha->cha_exclusive;
break;
case CHANNEL_ATTR_NO_AUTO_SYNC:
*value = 1;
break;
case CHANNEL_ATTR_MONITOR:
*value = cha->cha_monitor;
break;
case CHANNEL_ATTR_TX_LOWAT_UNIT:
*value = cha->cha_tx_lowat.cet_unit;
break;
case CHANNEL_ATTR_TX_LOWAT_VALUE:
*value = cha->cha_tx_lowat.cet_value;
break;
case CHANNEL_ATTR_RX_LOWAT_UNIT:
*value = cha->cha_rx_lowat.cet_unit;
break;
case CHANNEL_ATTR_RX_LOWAT_VALUE:
*value = cha->cha_rx_lowat.cet_value;
break;
case CHANNEL_ATTR_NEXUS_TYPE:
*value = cha->cha_nexus_type;
break;
case CHANNEL_ATTR_NEXUS_EXTENSIONS:
*value = cha->cha_nexus_extensions;
break;
case CHANNEL_ATTR_NEXUS_MHINTS:
*value = cha->cha_nexus_mhints;
break;
case CHANNEL_ATTR_NEXUS_IFINDEX:
*value = cha->cha_nexus_ifindex;
break;
case CHANNEL_ATTR_NEXUS_META_TYPE:
*value = cha->cha_nexus_meta_type;
break;
case CHANNEL_ATTR_NEXUS_META_SUBTYPE:
*value = cha->cha_nexus_meta_subtype;
break;
case CHANNEL_ATTR_NEXUS_CHECKSUM_OFFLOAD:
*value = cha->cha_nexus_checksum_offload;
break;
case CHANNEL_ATTR_USER_PACKET_POOL:
*value = (cha->cha_user_packet_pool != 0);
break;
case CHANNEL_ATTR_NEXUS_ADV_SIZE:
*value = cha->cha_nexusadv_size;
break;
case CHANNEL_ATTR_NEXUS_DEFUNCT_OK:
*value = cha->cha_nexus_defunct_ok;
break;
case CHANNEL_ATTR_EVENT_RING:
*value = (cha->cha_enable_event_ring != 0);
break;
case CHANNEL_ATTR_MAX_FRAGS:
*value = cha->cha_max_frags;
break;
case CHANNEL_ATTR_NUM_BUFFERS:
*value = cha->cha_num_buffers;
break;
case CHANNEL_ATTR_LOW_LATENCY:
*value = (cha->cha_low_latency != 0);
break;
case CHANNEL_ATTR_LARGE_BUF_SIZE:
*value = cha->cha_large_buf_size;
break;
default:
err = EINVAL;
break;
}
return err;
}
int
os_channel_attr_get_key(const channel_attr_t cha, void *key,
uint32_t *key_len)
{
int err = 0;
if (key_len == NULL) {
err = EINVAL;
goto done;
} else if (key == NULL || cha->cha_key == NULL) {
*key_len = (cha->cha_key != NULL) ? cha->cha_key_len : 0;
goto done;
}
if (*key_len >= cha->cha_key_len) {
bcopy(cha->cha_key, key, cha->cha_key_len);
*key_len = cha->cha_key_len;
} else {
err = ENOMEM;
}
done:
return err;
}
__attribute__((visibility("hidden")))
static void
os_channel_info2attr(struct channel *chd, channel_attr_t cha)
{
struct ch_info *cinfo = CHD_INFO(chd);
/* Save these first before we wipe out the attribute */
uint32_t cha_key_len = cha->cha_key_len;
void *cha_key = cha->cha_key;
uint32_t caps;
_CASSERT(NEXUS_META_TYPE_INVALID == CHANNEL_NEXUS_META_TYPE_INVALID);
_CASSERT(NEXUS_META_TYPE_QUANTUM == CHANNEL_NEXUS_META_TYPE_QUANTUM);
_CASSERT(NEXUS_META_TYPE_PACKET == CHANNEL_NEXUS_META_TYPE_PACKET);
_CASSERT(NEXUS_META_SUBTYPE_INVALID ==
CHANNEL_NEXUS_META_SUBTYPE_INVALID);
_CASSERT(NEXUS_META_SUBTYPE_PAYLOAD ==
CHANNEL_NEXUS_META_SUBTYPE_PAYLOAD);
_CASSERT(NEXUS_META_SUBTYPE_RAW == CHANNEL_NEXUS_META_SUBTYPE_RAW);
bzero(cha, sizeof(*cha));
cha->cha_tx_rings = CHD_PARAMS(chd)->nxp_tx_rings;
cha->cha_rx_rings = CHD_PARAMS(chd)->nxp_rx_rings;
cha->cha_tx_slots = CHD_PARAMS(chd)->nxp_tx_slots;
cha->cha_rx_slots = CHD_PARAMS(chd)->nxp_rx_slots;
cha->cha_buf_size = CHD_PARAMS(chd)->nxp_buf_size;
cha->cha_meta_size = CHD_PARAMS(chd)->nxp_meta_size;
cha->cha_stats_size = CHD_PARAMS(chd)->nxp_stats_size;
cha->cha_flowadv_max = CHD_PARAMS(chd)->nxp_flowadv_max;
cha->cha_exclusive = !!(cinfo->cinfo_ch_mode & CHMODE_EXCLUSIVE);
cha->cha_user_packet_pool = !!(cinfo->cinfo_ch_mode &
CHMODE_USER_PACKET_POOL);
cha->cha_nexus_defunct_ok = !!(cinfo->cinfo_ch_mode &
CHMODE_DEFUNCT_OK);
cha->cha_nexusadv_size = CHD_PARAMS(chd)->nxp_nexusadv_size;
if (cinfo->cinfo_ch_mode & CHMODE_MONITOR) {
cha->cha_monitor =
(cinfo->cinfo_ch_mode & CHMODE_MONITOR_NO_COPY) ?
CHANNEL_MONITOR_NO_COPY : CHANNEL_MONITOR_COPY;
} else {
cha->cha_monitor = CHANNEL_MONITOR_OFF;
}
cha->cha_key_len = cha_key_len;
cha->cha_key = cha_key;
cha->cha_tx_lowat = cinfo->cinfo_tx_lowat;
cha->cha_rx_lowat = cinfo->cinfo_rx_lowat;
cha->cha_nexus_type = CHD_PARAMS(chd)->nxp_type;
cha->cha_nexus_extensions = CHD_PARAMS(chd)->nxp_extensions;
cha->cha_nexus_mhints = CHD_PARAMS(chd)->nxp_mhints;
cha->cha_nexus_ifindex = CHD_PARAMS(chd)->nxp_ifindex;
cha->cha_nexus_meta_type = chd->chd_md_type;
cha->cha_nexus_meta_subtype = chd->chd_md_subtype;
cha->cha_enable_event_ring =
(cinfo->cinfo_ch_mode & CHMODE_EVENT_RING) != 0;
cha->cha_low_latency =
(cinfo->cinfo_ch_mode & CHMODE_LOW_LATENCY) != 0;
caps = CHD_PARAMS(chd)->nxp_capabilities;
if (caps & NXPCAP_CHECKSUM_PARTIAL) {
cha->cha_nexus_checksum_offload =
CHANNEL_NEXUS_CHECKSUM_PARTIAL;
} else {
cha->cha_nexus_checksum_offload = 0;
}
cha->cha_max_frags = CHD_PARAMS(chd)->nxp_max_frags;
cha->cha_num_buffers = cinfo->cinfo_num_bufs;
cha->cha_large_buf_size = CHD_PARAMS(chd)->nxp_large_buf_size;
}
void
os_channel_attr_destroy(channel_attr_t cha)
{
if (cha->cha_key != NULL) {
free(cha->cha_key);
cha->cha_key = NULL;
}
free(cha);
}
static int
os_channel_packet_alloc_common(const channel_t chd, packet_t *ph, bool large)
{
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
struct __user_quantum *q;
slot_idx_t idx;
mach_vm_address_t baddr;
uint16_t bdoff;
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
if (__improbable(large &&
chd->chd_large_buf_alloc_ring_idx == CHD_RING_IDX_NONE)) {
return ENOTSUP;
}
chrd = &chd->chd_rings[large ?
chd->chd_large_buf_alloc_ring_idx : chd->chd_alloc_ring_idx];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
if (__improbable(idx == ring->ring_tail)) {
/*
* do a sync to get more packets;
* since we are paying the cost of a syscall do a sync for
* free ring as well.
*/
int err;
sync_flags_t flags;
if (large) {
flags = (chd->chd_sync_flags &
~(CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_ALLOC)) |
CHANNEL_SYNCF_LARGE_ALLOC;
} else {
flags = chd->chd_sync_flags & ~CHANNEL_SYNCF_ALLOC_BUF;
}
err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, flags);
if (__improbable(err != 0)) {
if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("packet pool alloc "
"sync failed", err);
/* NOTREACHED */
__builtin_unreachable();
}
return err;
}
}
if (__improbable(idx == ring->ring_tail)) {
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
ENXIO : ENOMEM;
}
_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
q = _SLOT_METADATA(chrd, ring, idx);
_METADATA_VERIFY(chrd, q);
*ph = SK_PTR_ENCODE(q, chrd->chrd_md_type, chrd->chrd_md_subtype);
_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
/*
* Initialize the metadata buffer address. In the event of a
* defunct, we'd be accessing zero-filled memory; this is fine
* since we ignore all changes made to region at that time.
*/
baddr = _initialize_metadata_address(chrd, q, &bdoff);
if (__improbable(baddr == 0)) {
return ENXIO;
}
ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
}
int
os_channel_packet_alloc(const channel_t chd, packet_t *ph)
{
return os_channel_packet_alloc_common(chd, ph, false);
}
int
os_channel_large_packet_alloc(const channel_t chd, packet_t *ph)
{
return os_channel_packet_alloc_common(chd, ph, true);
}
int
os_channel_packet_free(const channel_t chd, packet_t ph)
{
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
slot_idx_t idx;
obj_idx_t midx;
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
chrd = &chd->chd_rings[chd->chd_free_ring_idx];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
if (__improbable(idx == ring->ring_tail)) {
/*
* do a sync to reclaim space in free ring;
*/
int err;
err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
CHANNEL_SYNCF_FREE);
if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("packet pool free "
"sync failed", err);
/* NOTREACHED */
__builtin_unreachable();
}
}
if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT("no free ring space");
/* NOTREACHED */
__builtin_unreachable();
}
/*
* In the event of a defunct, midx will be 0 and we'll end up
* attaching it to the slot; this is fine since we ignore all
* changes made to the slot descriptors at that time.
*/
midx = METADATA_IDX(QUM_ADDR(ph));
_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
}
int
os_channel_slot_attach_packet(const channel_ring_t chrd,
const channel_slot_t slot, packet_t ph)
{
slot_idx_t idx;
obj_idx_t midx;
if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
if (__improbable(!__packet_is_finalized(ph))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT("packet not finalized");
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
idx = _SLOT_INDEX(chrd, slot);
if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT_WITH_CAUSE("Invalid slot", slot);
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
if (__improbable(SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT_WITH_CAUSE("Slot has attached packet", slot);
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
/*
* In the event of a defunct, midx will be 0 and we'll end up
* attaching it to the slot; this is fine since we ignore all
* changes made to the slot descriptors at that time.
*/
midx = METADATA_IDX(QUM_ADDR(ph));
_SLOT_METADATA_IDX_VERIFY(chrd, QUM_ADDR(ph), midx);
_SLOT_ATTACH_METADATA(SLOT_DESC_USD(slot), midx);
done:
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
}
int
os_channel_slot_detach_packet(const channel_ring_t chrd,
const channel_slot_t slot, packet_t ph)
{
slot_idx_t idx;
if (__improbable((chrd->chrd_channel->chd_info->cinfo_ch_mode &
CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
idx = _SLOT_INDEX(chrd, slot);
if (__improbable(!_slot_index_is_valid(chrd->chrd_ring, idx))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT_WITH_CAUSE("Invalid slot", slot);
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
if (__improbable(!SD_VALID_METADATA(SLOT_DESC_USD(slot)))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT_WITH_CAUSE("Slot has no attached packet",
slot);
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
if (__improbable(ph != SK_PTR_ENCODE(_SLOT_METADATA(chrd,
chrd->chrd_ring, idx), chrd->chrd_md_type,
chrd->chrd_md_subtype))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT("packet handle mismatch");
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
if (__improbable(!__packet_is_finalized(ph))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT("packet not finalized");
/* NOTREACHED */
__builtin_unreachable();
}
goto done;
}
/*
* In the event of a defunct, we ignore any changes made to
* the slot descriptors, and so doing this is harmless.
*/
_SLOT_DETACH_METADATA(SLOT_DESC_USD(slot));
done:
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
}
__attribute__((visibility("hidden")))
static inline int
os_channel_purge_packet_alloc_ring_common(const channel_t chd, bool large)
{
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
uint32_t curr_ws;
slot_idx_t idx;
packet_t ph;
int npkts, err;
chrd = &chd->chd_rings[large ?
chd->chd_large_buf_alloc_ring_idx : chd->chd_alloc_ring_idx];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
/* calculate the number of packets in alloc pool */
npkts = ring->ring_tail - idx;
if (npkts < 0) {
npkts += ring->ring_num_slots;
}
curr_ws = ring->ring_alloc_ws;
while ((uint32_t)npkts-- > curr_ws) {
struct __user_quantum *q;
_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
q = _SLOT_METADATA(chrd, ring, idx);
_METADATA_VERIFY(chrd, q);
ph = SK_PTR_ENCODE(q, chrd->chrd_md_type,
chrd->chrd_md_subtype);
_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
/*
* Initialize the metadata buffer address. In the event of a
* defunct, we'd be accessing zero-filled memory; this is fine
* since we ignore all changes made to region at that time.
*/
if (chrd->chrd_md_type == NEXUS_META_TYPE_PACKET) {
struct __user_packet *p = (struct __user_packet *)q;
uint16_t bcnt = p->pkt_bufs_cnt;
uint16_t bmax = p->pkt_bufs_max;
if (__improbable((bcnt == 0) || (bmax == 0))) {
if (!_CHANNEL_RING_IS_DEFUNCT(chrd)) {
SK_ABORT("pkt pool purge, bad bufcnt");
/* NOTREACHED */
__builtin_unreachable();
} else {
return ENXIO;
}
}
/*
* alloc ring will not have multi-buflet packets.
*/
_PKT_BUFCNT_VERIFY(chrd, bcnt, 1);
}
*(mach_vm_address_t *) (uintptr_t)&q->qum_buf[0].buf_addr =
_CHANNEL_RING_BUF(chrd, ring, &q->qum_buf[0]);
idx = _CHANNEL_RING_NEXT(ring, idx);
ring->ring_head = idx;
err = os_channel_packet_free(chd, ph);
if (__improbable(err != 0)) {
if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("packet pool purge "
"free failed", err);
/* NOTREACHED */
__builtin_unreachable();
}
return err;
}
}
return 0;
}
__attribute__((visibility("hidden")))
static inline int
os_channel_purge_packet_alloc_ring(const channel_t chd)
{
return os_channel_purge_packet_alloc_ring_common(chd, false);
}
__attribute__((visibility("hidden")))
static inline int
os_channel_purge_large_packet_alloc_ring(const channel_t chd)
{
return os_channel_purge_packet_alloc_ring_common(chd, true);
}
__attribute__((visibility("hidden")))
static inline int
os_channel_purge_buflet_alloc_ring(const channel_t chd)
{
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
uint32_t curr_ws;
slot_idx_t idx;
int nbfts, err;
chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
/* calculate the number of packets in alloc pool */
nbfts = ring->ring_tail - idx;
if (nbfts < 0) {
nbfts += ring->ring_num_slots;
}
curr_ws = ring->ring_alloc_ws;
while ((uint32_t)nbfts-- > curr_ws) {
struct __user_buflet *ubft;
obj_idx_t nbft_idx;
_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
/*
* Initialize the buflet metadata buffer address.
*/
*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
_CHANNEL_RING_BUF(chrd, ring, ubft);
if (__improbable(ubft->buf_addr == 0)) {
SK_ABORT_WITH_CAUSE("buflet with NULL buffer",
ubft->buf_idx);
/* NOTREACHED */
__builtin_unreachable();
}
nbft_idx = ubft->buf_nbft_idx;
if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
if (_CHANNEL_IS_DEFUNCT(chd)) {
return ENXIO;
} else {
SK_ABORT_WITH_CAUSE("buflet with invalid nidx",
nbft_idx);
/* NOTREACHED */
__builtin_unreachable();
}
}
idx = _CHANNEL_RING_NEXT(ring, idx);
ring->ring_head = idx;
err = os_channel_buflet_free(chd, ubft);
if (__improbable(err != 0)) {
if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("buflet pool purge "
"free failed", err);
/* NOTREACHED */
__builtin_unreachable();
}
return err;
}
}
return 0;
}
int
os_channel_packet_pool_purge(const channel_t chd)
{
struct ch_info *ci = CHD_INFO(chd);
int err;
if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
((chd->chd_sync_flags & ~CHANNEL_SYNCF_FREE) | CHANNEL_SYNCF_PURGE));
if (__improbable(err != 0)) {
if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("packet pool purge sync failed",
err);
/* NOTREACHED */
__builtin_unreachable();
}
return err;
}
err = os_channel_purge_packet_alloc_ring(chd);
if (__improbable(err != 0)) {
return err;
}
if (chd->chd_large_buf_alloc_ring_idx != CHD_RING_IDX_NONE) {
err = os_channel_purge_large_packet_alloc_ring(chd);
if (__improbable(err != 0)) {
return err;
}
}
if (_num_allocator_rings(CHD_SCHEMA(chd)) > 2) {
err = os_channel_purge_buflet_alloc_ring(chd);
if (__improbable(err != 0)) {
return err;
}
}
err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP, CHANNEL_SYNCF_FREE);
if (__improbable(err != 0)) {
if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("packet pool free sync failed",
err);
/* NOTREACHED */
__builtin_unreachable();
}
return err;
}
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
}
int
os_channel_get_next_event_handle(const channel_t chd,
os_channel_event_handle_t *ehandle, os_channel_event_type_t *etype,
uint32_t *nevents)
{
struct __kern_channel_event_metadata *emd;
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
struct __user_quantum *qum;
mach_vm_address_t baddr;
uint16_t bdoff;
slot_idx_t idx;
struct __user_channel_schema *csm = CHD_SCHEMA(chd);
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ehandle == NULL) || (etype == NULL) ||
(nevents == NULL))) {
return EINVAL;
}
if (__improbable((ci->cinfo_ch_mode & CHMODE_EVENT_RING) == 0)) {
return ENOTSUP;
}
*ehandle = NULL;
chrd = &chd->chd_rings[_num_tx_rings(ci) + _num_rx_rings(ci) +
_num_allocator_rings(csm)];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
if (__improbable(idx == ring->ring_tail)) {
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
ENXIO : ENODATA;
}
_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
qum = _SLOT_METADATA(chrd, ring, idx);
_METADATA_VERIFY(chrd, qum);
_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
baddr = _initialize_metadata_address(chrd, qum, &bdoff);
if (__improbable(baddr == 0)) {
return ENXIO;
}
*ehandle = SK_PTR_ENCODE(qum, chrd->chrd_md_type,
chrd->chrd_md_subtype);
emd = (void *)(baddr + bdoff);
*etype = emd->emd_etype;
*nevents = emd->emd_nevents;
ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
}
int
os_channel_event_free(const channel_t chd, os_channel_event_handle_t ehandle)
{
return os_channel_packet_free(chd, (packet_t)ehandle);
}
int
os_channel_get_interface_advisory(const channel_t chd,
struct ifnet_interface_advisory *advisory)
{
struct __kern_netif_intf_advisory *intf_adv;
struct __kern_nexus_adv_metadata *adv_md;
nexus_advisory_type_t adv_type;
/*
* Interface advisory is only supported for netif and flowswitch.
*/
adv_md = CHD_NX_ADV_MD(chd);
if (adv_md == NULL) {
return ENOENT;
}
adv_type = adv_md->knam_type;
if (__improbable(adv_type != NEXUS_ADVISORY_TYPE_NETIF &&
adv_type != NEXUS_ADVISORY_TYPE_FLOWSWITCH)) {
return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : ENOENT;
}
if (adv_type == NEXUS_ADVISORY_TYPE_NETIF) {
intf_adv = &(CHD_NX_ADV_NETIF(adv_md))->__kern_intf_adv;
} else {
intf_adv = &(CHD_NX_ADV_FSW(adv_md))->_nxadv_intf_adv;
}
if (intf_adv->cksum != os_cpu_copy_in_cksum(&intf_adv->adv, advisory,
sizeof(*advisory), 0)) {
return _CHANNEL_IS_DEFUNCT(chd) ? ENXIO : EAGAIN;
}
return 0;
}
int
os_channel_configure_interface_advisory(const channel_t chd, boolean_t enable)
{
uint32_t value = enable;
return __channel_set_opt(chd->chd_fd, CHOPT_IF_ADV_CONF,
&value, sizeof(value));
}
int
os_channel_buflet_alloc(const channel_t chd, buflet_t *bft)
{
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
struct __user_buflet *ubft;
obj_idx_t nbft_idx;
slot_idx_t idx;
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
return ENOTSUP;
}
chrd = &chd->chd_rings[chd->chd_buf_alloc_ring_idx];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
if (__improbable(idx == ring->ring_tail)) {
/*
* do a sync to get more buflets;
*/
int err;
err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
CHANNEL_SYNCF_ALLOC_BUF | CHANNEL_SYNCF_FREE);
if (__improbable(err != 0)) {
if (!_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("buflet pool alloc "
"sync failed", err);
/* NOTREACHED */
__builtin_unreachable();
}
return err;
}
}
if (__improbable(idx == ring->ring_tail)) {
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ?
ENXIO : ENOMEM;
}
_SLOT_DESC_VERIFY(chrd, _SLOT_DESC(chrd, idx));
ubft = _SLOT_BFT_METADATA(chrd, ring, idx);
_SLOT_DETACH_METADATA(_SLOT_DESC(chrd, idx));
/*
* Initialize the buflet metadata buffer address.
*/
*(mach_vm_address_t *)(uintptr_t)&(ubft->buf_addr) =
_CHANNEL_RING_BUF(chrd, ring, ubft);
if (__improbable(ubft->buf_addr == 0)) {
SK_ABORT_WITH_CAUSE("buflet alloc with NULL buffer",
ubft->buf_idx);
/* NOTREACHED */
__builtin_unreachable();
}
nbft_idx = ubft->buf_nbft_idx;
if (__improbable(nbft_idx != OBJ_IDX_NONE)) {
if (_CHANNEL_IS_DEFUNCT(chd)) {
return ENXIO;
} else {
SK_ABORT_WITH_CAUSE("buflet alloc with invalid nidx",
nbft_idx);
/* NOTREACHED */
__builtin_unreachable();
}
}
ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
*bft = ubft;
return __improbable(_CHANNEL_IS_DEFUNCT(chd)) ? ENXIO : 0;
}
int
os_channel_buflet_free(const channel_t chd, buflet_t ubft)
{
struct __user_channel_ring *ring;
struct channel_ring_desc *chrd;
slot_idx_t idx;
obj_idx_t midx;
struct ch_info *ci = CHD_INFO(chd);
if (__improbable((ci->cinfo_ch_mode & CHMODE_USER_PACKET_POOL) == 0)) {
return ENOTSUP;
}
if (__improbable(_num_allocator_rings(CHD_SCHEMA(chd)) < 4)) {
return ENOTSUP;
}
chrd = &chd->chd_rings[chd->chd_buf_free_ring_idx];
ring = __DECONST(struct __user_channel_ring *, chrd->chrd_ring);
idx = ring->ring_head;
if (__improbable(idx == ring->ring_tail)) {
/*
* do a sync to reclaim space in free ring;
*/
int err;
err = __channel_sync(chd->chd_fd, CHANNEL_SYNC_UPP,
CHANNEL_SYNCF_FREE);
if (__improbable(err != 0) && !_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT_WITH_CAUSE("buflet pool free "
"sync failed", err);
/* NOTREACHED */
__builtin_unreachable();
}
}
if (__improbable(idx == ring->ring_tail) && !_CHANNEL_IS_DEFUNCT(chd)) {
SK_ABORT("no ring space in buflet free ring");
/* NOTREACHED */
__builtin_unreachable();
}
midx = _BFT_INDEX(chrd, ubft);
_SLOT_BFT_METADATA_IDX_VERIFY(chrd, ubft, midx);
_SLOT_ATTACH_METADATA(_SLOT_DESC(chrd, idx), midx);
ring->ring_head = _CHANNEL_RING_NEXT(ring, idx);
return __improbable(_CHANNEL_RING_IS_DEFUNCT(chrd)) ? ENXIO : 0;
}