This is xnu-11215.1.10. See this file in:
/*
 * Copyright (c) 2016-2023 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

/*
 * Once a packet is classified, it goes through checks to see if there
 * is a matching flow entry in the flow table.  The key used to search
 * the entry is composed of the fields contained in struct flow_ptrs.
 *
 * Flow entry insertion and deletion to the flow table, on behalf of
 * the owning client process, requires the use of the rule ID (UUID)
 * as the search key.
 *
 * Because of the above, each flow entry simultaneously exists in two
 * respective trees: flow_entry_tree and flow_entry_id_tree.
 *
 * Using a single RW lock to protect the two trees is simple, but the
 * data path performance is impacted during flow insertion and deletion,
 * especially as the number of client processes and flows grow.
 *
 * To solve that, we deploy the following scheme:
 *
 * Given that the flow_entry_tree is searched on a per-packet basis,
 * we break it down into a series of trees, each one contained within
 * a flow_bucket structure.  The hash from flow_ptrs determines the
 * index of the flow_bucket to search the flow_entry_tree from.
 *
 * The flow_entry_id_tree is searched on each flow insertion and
 * deletion, and similarly we break it down into a series of trees,
 * each contained within a flow_owner_bucket structure. We use the
 * client process ID (pid_t) to determine the bucket index.
 *
 * Each flow_bucket and flow_owner_bucket structure is dynamically
 * created, and is aligned on the CPU cache boundary.  The amount
 * of those buckets is determined by client module at the time the
 * flow manager context is initialized.  This is done to avoid false
 * sharing, especially given that each bucket has its own RW lock.
 */

#ifndef _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_
#define _SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_

#ifdef BSD_KERNEL_PRIVATE
#include <skywalk/core/skywalk_var.h>
#include <skywalk/lib/cuckoo_hashtable.h>
#include <skywalk/namespace/netns.h>
#include <skywalk/namespace/protons.h>
#include <skywalk/packet/packet_var.h>
#include <net/flowhash.h>
#include <netinet/ip.h>
#include <netinet/in_stat.h>
#include <netinet/ip6.h>
#include <sys/eventhandler.h>

RB_HEAD(flow_owner_tree, flow_owner);

struct flow_owner_bucket {
	decl_lck_mtx_data(, fob_lock);
	struct flow_owner_tree  fob_owner_head;
	uint16_t                fob_busy_flags;
	uint16_t                fob_open_waiters;
	uint16_t                fob_close_waiters;
	uint16_t                fob_dtor_waiters;
	const size_t            fob_idx;
};

#define FOBF_OPEN_BUSY          0x1     /* flow open monitor */
#define FOBF_CLOSE_BUSY         0x2     /* flow close monitor */
#define FOBF_DEAD               0x4     /* no longer usable */

#define FOB_LOCK(_fob)                  \
	lck_mtx_lock(&(_fob)->fob_lock)
#define FOB_LOCK_SPIN(_fob)             \
	lck_mtx_lock_spin(&(_fob)->fob_lock)
#define FOB_LOCK_CONVERT(_fob)          \
	lck_mtx_convert_spin(&(_fob)->fob_lock)
#define FOB_TRY_LOCK(_fob)              \
	lck_mtx_try_lock(&(_fob)->fob_lock)
#define FOB_LOCK_ASSERT_HELD(_fob)      \
	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_OWNED)
#define FOB_LOCK_ASSERT_NOTHELD(_fob)   \
	LCK_MTX_ASSERT(&(_fob)->fob_lock, LCK_MTX_ASSERT_NOTOWNED)
#define FOB_UNLOCK(_fob)                \
	lck_mtx_unlock(&(_fob)->fob_lock)

RB_HEAD(flow_entry_id_tree, flow_entry);

#define FLOW_PROCESS_NAME_LENGTH        24

struct flow_owner {
	RB_ENTRY(flow_owner)    fo_link;
	struct flow_entry_id_tree fo_flow_entry_id_head;
	const struct flow_owner_bucket *fo_bucket;
	void                    *fo_context;
	pid_t                   fo_pid;
	bool                    fo_nx_port_pid_bound;
	bool                    fo_nx_port_destroyed;
	bool                    fo_low_latency;
	nexus_port_t            fo_nx_port;
	uuid_t                  fo_key;

	struct nexus_adapter *  const fo_nx_port_na;
	struct nx_flowswitch *  const fo_fsw;

	/*
	 * Array of bitmaps to manage the flow advisory table indices.
	 * Currently we are restricting a flow owner to a single nexus
	 * port, so this structure is effectively managing the flow advisory
	 * indices for a port.
	 */
	bitmap_t                *__counted_by(fo_num_flowadv_bmaps)fo_flowadv_bmap;
	uint32_t                fo_flowadv_max;
	uint32_t                fo_num_flowadv;
	uint32_t                fo_num_flowadv_bmaps;

	/* for debugging */
	char                    fo_name[FLOW_PROCESS_NAME_LENGTH];
};

#define FO_BUCKET(_fo)  \
	__DECONST(struct flow_owner_bucket *, (_fo)->fo_bucket)

RB_PROTOTYPE_SC_PREV(__private_extern__, flow_owner_tree, flow_owner,
    fo_link, fo_cmp);
RB_PROTOTYPE_SC_PREV(__private_extern__, flow_entry_id_tree, flow_entry,
    fe_id_link, fe_id_cmp);

typedef enum {
	/*
	 * TCP states.
	 */
	FT_STATE_CLOSED = 0,            /* closed */
	FT_STATE_LISTEN,                /* listening for connection */
	FT_STATE_SYN_SENT,              /* active, have sent SYN */
	FT_STATE_SYN_RECEIVED,          /* have sent and rcvd SYN */
	FT_STATE_ESTABLISHED,           /* established */
	FT_STATE_CLOSE_WAIT,            /* rcvd FIN, waiting close */
	FT_STATE_FIN_WAIT_1,            /* have sent FIN */
	FT_STATE_CLOSING,               /* exchanged FINs, waiting FIN|ACK */
	FT_STATE_LAST_ACK,              /* rcvd FIN, closed, waiting FIN|ACK */
	FT_STATE_FIN_WAIT_2,            /* closed, FIN is ACK'd */
	FT_STATE_TIME_WAIT,             /* quiet wait after close */

	/*
	 * UDP states.
	 */
	FT_STATE_NO_TRAFFIC = 20,       /* no packet observed */
	FT_STATE_SINGLE,                /* single packet */
	FT_STATE_MULTIPLE,              /* multiple packets */

	FT_STATE_MAX = 255
} flow_track_state_t;

struct flow_track_rtt {
	uint64_t        frtt_timestamp; /* tracked segment timestamp */
	uint64_t        frtt_last;      /* previous net_uptime(rate limiting) */
	uint32_t        frtt_seg_begin; /* tracked segment begin SEQ */
	uint32_t        frtt_seg_end;   /* tracked segment end SEQ */
	uint32_t        frtt_usec;      /* avg RTT in usec */
};

#define FLOWTRACK_RTT_SAMPLE_INTERVAL   2       /* sample ACK RTT every 2 sec */

struct flow_track {
	/*
	 * TCP specific tracking info.
	 */
	uint32_t fse_seqlo;     /* max sequence number sent */
	uint32_t fse_seqhi;     /* max the other end ACKd + win	*/
	uint32_t fse_seqlast;   /* last sequence number (FIN) */
	uint16_t fse_max_win;   /* largest window (pre scaling)	*/
	uint16_t fse_mss;       /* maximum segment size option */
	uint8_t fse_state;      /* active state level (FT_STATE_*) */
	uint8_t fse_wscale;     /* window scaling factor */
	uint16_t fse_flags;     /* FLOWSTATEF_* */
	uint32_t fse_syn_ts;    /* SYN timestamp */
	uint32_t fse_syn_cnt;   /* # of SYNs per second */

	struct flow_track_rtt   fse_rtt;        /* ACK RTT tracking */
#define fse_rtt_usec    fse_rtt.frtt_usec
} __sk_aligned(8);

/* valid values for fse_flags */
#define FLOWSTATEF_WSCALE       0x1     /* fse_wscale is valid */

struct flow_llhdr {
	uint32_t                flh_gencnt;     /* link-layer address gencnt */

	const uint8_t           flh_off;
	const uint8_t           flh_len;
	uint16_t                flh_pad;        /* for future */

	union _flh_u {
		uint64_t        _buf[2];
		struct {
			uint16_t _eth_pad;
			struct ether_header _eth;
		} _eth_padded;
	}  __sk_aligned(8)      _flh;
#define flh_eth_padded          _flh._eth_padded
#define flh_eth                 _flh._eth_padded._eth
};

typedef enum {
	FE_QSET_SELECT_NONE,
	FE_QSET_SELECT_FIXED,
	FE_QSET_SELECT_DYNAMIC
} flow_qset_select_t;

extern kern_allocation_name_t skmem_tag_flow_demux;
typedef int (*flow_demux_memcmp_mask_t)(const uint8_t *src1, const uint8_t *src2,
    const uint8_t *byte_mask);

struct kern_flow_demux_pattern {
	struct flow_demux_pattern  fdp_demux_pattern;
	flow_demux_memcmp_mask_t   fdp_memcmp_mask;
};

#define MAX_PKT_DEMUX_LIMIT        1000

TAILQ_HEAD(flow_entry_list, flow_entry);

#define FLOW_PROC_FLAG_GSO        0x0001
typedef void (*flow_tx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
    uint32_t flags);

#define FLOW_PROC_FLAG_FRAGMENTS  0x0001
typedef void (*flow_rx_action_t)(struct nx_flowswitch *fsw, struct flow_entry *fe,
    struct pktq *pkts, uint32_t rx_bytes, uint32_t flags);

struct flow_entry {
	/**** Common Group ****/
	os_refcnt_t             fe_refcnt;
	struct flow_key         fe_key;
	uint32_t                fe_flags;
	uint32_t                fe_key_hash;
	struct cuckoo_node      fe_cnode;

	uuid_t                  fe_uuid __sk_aligned(8);
	nexus_port_t            fe_nx_port;
	uint32_t                fe_laddr_gencnt;
	uint32_t                fe_want_nonviable;
	uint32_t                fe_want_withdraw;
	uint8_t                 fe_transport_protocol;

	/**** Rx Group ****/
	/*
	 * If multiple threads end up working on the same flow entry, the one
	 * that reaches rx_flow_batch_packets first will be responsible for
	 * sending up all the packets from different RX completion queues.
	 * fe_rx_worker_tid marks its thread ID. Other threads only enqueues their
	 * packets into fe_rx_pktq but do not call fe_rx_process on the flow entry.
	 */
	uint16_t                fe_rx_frag_count;
	uint32_t                fe_rx_pktq_bytes;
	decl_lck_mtx_data(, fe_rx_pktq_lock);
	struct pktq             fe_rx_pktq;
	TAILQ_ENTRY(flow_entry) fe_rx_link;
	flow_rx_action_t        fe_rx_process;
	uint64_t                fe_rx_worker_tid;

	/*
	 * largest allocated packet size.
	 * used by:
	 *  - mbuf batch allocation logic during RX aggregtion and netif copy.
	 *  - packet allocation logic during RX aggregation.
	 */
	uint32_t                fe_rx_largest_size;

	/**** Tx Group ****/
	bool                    fe_tx_is_cont_frag;
	uint32_t                fe_tx_frag_id;
	struct pktq             fe_tx_pktq;
	TAILQ_ENTRY(flow_entry) fe_tx_link;
	flow_tx_action_t        fe_tx_process;

	uuid_t                  fe_eproc_uuid __sk_aligned(8);
	flowadv_idx_t           fe_adv_idx;
	kern_packet_svc_class_t fe_svc_class;
	uint32_t                fe_policy_id;   /* policy id matched to flow */
	uint32_t                fe_skip_policy_id; /* skip policy id matched to flow */

	/**** Misc Group ****/
	struct nx_flowswitch *  const fe_fsw;
	struct ns_token         *fe_port_reservation;
	struct protons_token    *fe_proto_reservation;
	void                    *fe_ipsec_reservation;

	struct flow_track       fe_ltrack;      /* local endpoint state */
	struct flow_track       fe_rtrack;      /* remote endpoint state */

	/*
	 * Flow stats are kept externally stand-alone, refcnt'ed by various
	 * users (e.g. flow_entry, necp_client_flow, etc.)
	 */
	struct flow_stats       *fe_stats;
	struct flow_route       *fe_route;

	RB_ENTRY(flow_entry)    fe_id_link;

	TAILQ_ENTRY(flow_entry) fe_linger_link;
	uint64_t                fe_linger_expire; /* expiration deadline */
	uint32_t                fe_linger_wait;   /* linger time (seconds) */

	pid_t                   fe_pid;
	pid_t                   fe_epid;
	char                    fe_proc_name[FLOW_PROCESS_NAME_LENGTH];
	char                    fe_eproc_name[FLOW_PROCESS_NAME_LENGTH];

	uint32_t                fe_flowid; /* globally unique flow ID */

	/* Logical link related information */
	struct netif_qset      *fe_qset;
	uint64_t                fe_qset_id;
	flow_qset_select_t      fe_qset_select;
	uint32_t                fe_tr_genid;

	/* Parent child information */
	decl_lck_rw_data(, fe_child_list_lock);
	struct flow_entry_list          fe_child_list;
	TAILQ_ENTRY(flow_entry)         fe_child_link;
#if DEVELOPMENT || DEBUG
	int16_t                         fe_child_count;
#endif // DEVELOPMENT || DEBUG
	uint8_t                         fe_demux_pattern_count;
	struct kern_flow_demux_pattern  *__counted_by(fe_demux_pattern_count)fe_demux_patterns;
	uint8_t                         *__sized_by_or_null(FLOW_DEMUX_MAX_LEN) fe_demux_pkt_data;
};

/* valid values for fe_flags */
#define FLOWENTF_INITED                 0x00000001 /* {src,dst} states initialized */
#define FLOWENTF_TRACK                  0x00000010 /* enable state tracking */
#define FLOWENTF_CONNECTED              0x00000020 /* connected mode */
#define FLOWENTF_LISTENER               0x00000040 /* listener mode */
#define FLOWENTF_QOS_MARKING            0x00000100 /* flow can have qos marking */
#define FLOWENTF_LOW_LATENCY            0x00000200 /* low latency flow */
#define FLOWENTF_WAIT_CLOSE             0x00001000 /* defer free after close */
#define FLOWENTF_CLOSE_NOTIFY           0x00002000 /* notify NECP upon tear down */
#define FLOWENTF_EXTRL_PORT             0x00004000 /* port reservation is held externally */
#define FLOWENTF_EXTRL_PROTO            0x00008000 /* proto reservation is held externally */
#define FLOWENTF_EXTRL_FLOWID           0x00010000 /* flowid reservation is held externally */
#define FLOWENTF_CHILD                  0x00020000 /* child flow */
#define FLOWENTF_PARENT                 0x00040000 /* parent flow */
#define FLOWENTF_NOWAKEFROMSLEEP        0x00080000 /* don't wake for this flow */
#define FLOWENTF_ABORTED                0x01000000 /* has sent RST to peer */
#define FLOWENTF_NONVIABLE              0x02000000 /* disabled; awaiting tear down */
#define FLOWENTF_WITHDRAWN              0x04000000 /* flow has been withdrawn */
#define FLOWENTF_TORN_DOWN              0x08000000 /* torn down and awaiting destroy */
#define FLOWENTF_HALF_CLOSED            0x10000000 /* flow is half closed */
#define FLOWENTF_DESTROYED              0x40000000 /* not in RB trees anymore */
#define FLOWENTF_LINGERING              0x80000000 /* destroyed and in linger list */

#define FLOWENTF_BITS                                            \
    "\020\01INITED\05TRACK\06CONNECTED\07LISTNER\011QOS_MARKING" \
    "\012LOW_LATENCY\015WAIT_CLOSE\016CLOSE_NOTIFY\017EXT_PORT"  \
    "\020EXT_PROTO\021EXT_FLOWID\031ABORTED\032NONVIABLE\033WITHDRAWN"  \
    "\034TORN_DOWN\035HALF_CLOSED\037DESTROYED\40LINGERING"

TAILQ_HEAD(flow_entry_linger_head, flow_entry);

struct flow_entry_dead {
	LIST_ENTRY(flow_entry_dead)     fed_link;

	boolean_t               fed_want_nonviable;
	boolean_t               fed_want_clonotify;

	/* rule (flow) UUID */
	union {
		uint64_t        fed_uuid_64[2];
		uint32_t        fed_uuid_32[4];
		uuid_t          fed_uuid;
	} __sk_aligned(8);
};

/*
 * Minimum refcnt for a flow route entry to be considered as idle.
 */
#define FLOW_ROUTE_MINREF       2       /* for the 2 RB trees */

struct flow_route {
	RB_ENTRY(flow_route)    fr_link;
	RB_ENTRY(flow_route)    fr_id_link;

	/*
	 * fr_laddr represents the local address that the system chooses
	 * for the foreign destination in fr_faddr.  The flow entry that
	 * is referring to this flow route object may choose a different
	 * local address if it wishes.
	 *
	 * fr_gaddr represents the gateway address to reach the final
	 * foreign destination fr_faddr, valid only if the destination is
	 * not directly attached (FLOWRTF_GATEWAY is set).
	 *
	 * The use of sockaddr for storage is for convenience; the port
	 * value is not applicable for this object, as this is shared
	 * among flow entries.
	 */
	union sockaddr_in_4_6   fr_laddr;       /* local IP address */
	union sockaddr_in_4_6   fr_faddr;       /* remote IP address */
#define fr_af                   fr_faddr.sa.sa_family
	union sockaddr_in_4_6   fr_gaddr;       /* gateway IP address */

	struct flow_llhdr       fr_llhdr;
#define fr_eth_padded           fr_llhdr.flh_eth_padded
#define fr_eth                  fr_llhdr.flh_eth

	/*
	 * In flow_route_tree, we use the destination address as key.
	 * To speed up searches, we initialize fr_addr_key to the address
	 * portion of fr_faddr depending on the address family.
	 */
	void                    *fr_addr_key;

	/* flow route UUID */
	uuid_t                  fr_uuid __sk_aligned(8);

	/*
	 * fr_usecnt is updated atomically; incremented when a flow entry
	 * refers to this object and decremented otherwise.  Periodically,
	 * the flowswitch instance garbage collects flow_route objects
	 * that aren't being referred to by any flow entries.
	 *
	 * fr_expire is set when fr_usecnt reaches its minimum count, and
	 * is cleared when it goes above the minimum count.
	 *
	 * The spin lock fr_reflock is used to serialize both.
	 */
	decl_lck_spin_data(, fr_reflock);
	uint64_t                fr_expire;
	volatile uint32_t       fr_usecnt;

	uint32_t                fr_flags;
	uint32_t                fr_laddr_gencnt; /* local IP gencnt */
	uint32_t                fr_addr_len;     /* sizeof {in,in6}_addr */

	volatile uint32_t       fr_want_configure;
	volatile uint32_t       fr_want_probe;

	/* lock to serialize resolver */
	decl_lck_mtx_data(, fr_lock);

	/*
	 * fr_rt_dst is the route to final destination, and along with
	 * fr_rt_evhdlr_tag, they are used in route event registration.
	 *
	 * fr_rt_gw is valid only if FLOWRTF_GATEWAY is set.
	 */
	eventhandler_tag        fr_rt_evhdlr_tag;
	struct rtentry          *fr_rt_dst;
	struct rtentry          *fr_rt_gw;

	/* nexus UUID */
	uuid_t                  fr_nx_uuid __sk_aligned(8);

	const struct flow_mgr   *fr_mgr;
	const struct flow_route_bucket  *fr_frb;
	const struct flow_route_id_bucket *fr_frib;
};

/* valid values for fr_flags */
#define FLOWRTF_ATTACHED        0x00000001 /* attached to RB trees */
#define FLOWRTF_ONLINK          0x00000010 /* dst directly on the link */
#define FLOWRTF_GATEWAY         0x00000020 /* gw IP address is valid */
#define FLOWRTF_RESOLVED        0x00000040 /* flow route is resolved */
#define FLOWRTF_HAS_LLINFO      0x00000080 /* has dst link-layer address */
#define FLOWRTF_DELETED         0x00000100 /* route has been deleted */
#define FLOWRTF_DST_LL_MCAST    0x00000200 /* dst is link layer multicast */
#define FLOWRTF_DST_LL_BCAST    0x00000400 /* dst is link layer broadcast */
#define FLOWRTF_STABLE_ADDR     0x00000800 /* local address prefers stable */

#define FR_LOCK(_fr)                    \
	lck_mtx_lock(&(_fr)->fr_lock)
#define FR_TRY_LOCK(_fr)                \
	lck_mtx_try_lock(&(_fr)->fr_lock)
#define FR_LOCK_ASSERT_HELD(_fr)        \
	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_OWNED)
#define FR_LOCK_ASSERT_NOTHELD(_fr)     \
	LCK_MTX_ASSERT(&(_fr)->fr_lock, LCK_MTX_ASSERT_NOTOWNED)
#define FR_UNLOCK(_fr)                  \
	lck_mtx_unlock(&(_fr)->fr_lock)

#define FLOWRT_UPD_ETH_DST(_fr, _addr)  do {                            \
	bcopy((_addr), (_fr)->fr_eth.ether_dhost, ETHER_ADDR_LEN);      \
	(_fr)->fr_flags &= ~(FLOWRTF_DST_LL_MCAST|FLOWRTF_DST_LL_BCAST);\
	if (ETHER_IS_MULTICAST(_addr)) {                                \
	        if (_ether_cmp(etherbroadcastaddr, (_addr)) == 0)       \
	                (_fr)->fr_flags |= FLOWRTF_DST_LL_BCAST;        \
	        else                                                    \
	                (_fr)->fr_flags |= FLOWRTF_DST_LL_MCAST;        \
	}                                                               \
} while (0)

RB_HEAD(flow_route_tree, flow_route);
RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_tree, flow_route,
    fr_link, fr_cmp);

struct flow_route_bucket {
	decl_lck_rw_data(, frb_lock);
	struct flow_route_tree  frb_head;
	const uint32_t          frb_idx;
};

#define FRB_WLOCK(_frb)                 \
	lck_rw_lock_exclusive(&(_frb)->frb_lock)
#define FRB_WLOCKTORLOCK(_frb)          \
	lck_rw_lock_exclusive_to_shared(&(_frb)->frb_lock)
#define FRB_WTRYLOCK(_frb)              \
	lck_rw_try_lock_exclusive(&(_frb)->frb_lock)
#define FRB_WUNLOCK(_frb)               \
	lck_rw_unlock_exclusive(&(_frb)->frb_lock)
#define FRB_RLOCK(_frb)                 \
	lck_rw_lock_shared(&(_frb)->frb_lock)
#define FRB_RLOCKTOWLOCK(_frb)          \
	lck_rw_lock_shared_to_exclusive(&(_frb)->frb_lock)
#define FRB_RTRYLOCK(_frb)              \
	lck_rw_try_lock_shared(&(_frb)->frb_lock)
#define FRB_RUNLOCK(_frb)               \
	lck_rw_unlock_shared(&(_frb)->frb_lock)
#define FRB_UNLOCK(_frb)                \
	lck_rw_done(&(_frb)->frb_lock)
#define FRB_WLOCK_ASSERT_HELD(_frb)     \
	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_EXCLUSIVE)
#define FRB_RLOCK_ASSERT_HELD(_frb)     \
	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_SHARED)
#define FRB_LOCK_ASSERT_HELD(_frb)      \
	LCK_RW_ASSERT(&(_frb)->frb_lock, LCK_RW_ASSERT_HELD)

RB_HEAD(flow_route_id_tree, flow_route);
RB_PROTOTYPE_SC_PREV(__private_extern__, flow_route_id_tree, flow_route,
    fr_id_link, fr_id_cmp);

struct flow_route_id_bucket {
	decl_lck_rw_data(, frib_lock);
	struct flow_route_id_tree       frib_head;
	const uint32_t                  frib_idx;
};

#define FRIB_WLOCK(_frib)               \
	lck_rw_lock_exclusive(&(_frib)->frib_lock)
#define FRIB_WLOCKTORLOCK(_frib)        \
	lck_rw_lock_exclusive_to_shared(&(_frib)->frib_lock)
#define FRIB_WTRYLOCK(_frib)            \
	lck_rw_try_lock_exclusive(&(_frib)->frib_lock)
#define FRIB_WUNLOCK(_frib)             \
	lck_rw_unlock_exclusive(&(_frib)->frib_lock)
#define FRIB_RLOCK(_frib)               \
	lck_rw_lock_shared(&(_frib)->frib_lock)
#define FRIB_RLOCKTOWLOCK(_frib)        \
	lck_rw_lock_shared_to_exclusive(&(_frib)->frib_lock)
#define FRIB_RTRYLOCK(_frib)            \
	lck_rw_try_lock_shared(&(_frib)->frib_lock)
#define FRIB_RUNLOCK(_frib)             \
	lck_rw_unlock_shared(&(_frib)->frib_lock)
#define FRIB_UNLOCK(_frib)              \
	lck_rw_done(&(_frib)->frib_lock)
#define FRIB_WLOCK_ASSERT_HELD(_frib)   \
	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_EXCLUSIVE)
#define FRIB_RLOCK_ASSERT_HELD(_frib)   \
	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_SHARED)
#define FRIB_LOCK_ASSERT_HELD(_frib)    \
	LCK_RW_ASSERT(&(_frib)->frib_lock, LCK_RW_ASSERT_HELD)

struct flow_mgr {
	char            fm_name[IFNAMSIZ];
	uuid_t          fm_uuid;
	RB_ENTRY(flow_mgr) fm_link;

	struct cuckoo_hashtable *fm_flow_table;
	size_t   fm_flow_hash_count[FKMASK_IDX_MAX]; /* # of flows with mask */
	uint16_t fm_flow_hash_masks[FKMASK_IDX_MAX];

	void      *__sized_by(fm_owner_bucket_tot_sz) fm_owner_buckets;     /* cache-aligned fob */
	size_t    fm_owner_buckets_cnt;  /* total # of fobs */
	size_t    fm_owner_bucket_sz;    /* size of each fob */
	size_t    fm_owner_bucket_tot_sz; /* allocated size of each fob */

	void      *__sized_by(fm_route_bucket_tot_sz) fm_route_buckets;     /* cache-aligned frb */
	size_t    fm_route_buckets_cnt;  /* total # of frb */
	size_t    fm_route_bucket_sz;    /* size of each frb */
	size_t    fm_route_bucket_tot_sz; /* allocated size of each frb */

	void      *__sized_by(fm_route_id_bucket_tot_sz) fm_route_id_buckets;    /* cache-aligned frib */
	size_t    fm_route_id_buckets_cnt; /* total # of frib */
	size_t    fm_route_id_bucket_sz;   /* size of each frib */
	size_t    fm_route_id_bucket_tot_sz; /* allocated size of each frib */
};

/*
 * this func compare match with key;
 * return values:
 * 0 as long as @key(exact) matches what @match(wildcard) wants to match on.
 * 1 when it doesn't match
 */
static inline int
flow_key_cmp(const struct flow_key *match, const struct flow_key *key)
{
#define FK_CMP(field, mask)     \
	if ((match->fk_mask & mask) != 0) {     \
	        if ((key->fk_mask & mask) == 0) {       \
	                return 1;       \
	        }       \
	        int d = memcmp(&match->field, &key->field, sizeof(match->field));       \
	        if (d != 0) {   \
	                return d;       \
	        }       \
	}

	FK_CMP(fk_ipver, FKMASK_IPVER);
	FK_CMP(fk_proto, FKMASK_PROTO);
	FK_CMP(fk_src, FKMASK_SRC);
	FK_CMP(fk_dst, FKMASK_DST);
	FK_CMP(fk_sport, FKMASK_SPORT);
	FK_CMP(fk_dport, FKMASK_DPORT);

	return 0;
}

/*
 * Similar to flow_key_cmp() except using memory compare with mask,
 * done with SIMD instructions, if available for the platform.
 */
static inline int
flow_key_cmp_mask(const struct flow_key *match,
    const struct flow_key *key, const struct flow_key *mask)
{
	_CASSERT(FLOW_KEY_LEN == 48);
	_CASSERT(FLOW_KEY_LEN == sizeof(struct flow_key));
	_CASSERT((sizeof(struct flow_entry) % 16) == 0);
	_CASSERT((offsetof(struct flow_entry, fe_key) % 16) == 0);

	/* local variables are __bidi_indexable with -fbounds-safety */
	const struct flow_key *match_idx = match;
	const struct flow_key *key_idx = key;
	const struct flow_key *mask_idx = mask;

	return sk_memcmp_mask_48B((const uint8_t *)match_idx,
	           (const uint8_t *)key_idx, (const uint8_t *)mask_idx);
}

static inline uint32_t
flow_key_hash(const struct flow_key *key)
{
	uint32_t hash = FK_HASH_SEED;
#define FK_HASH(field, mask)    \
	if ((key->fk_mask & mask) != 0) {       \
	        hash = net_flowhash(&key->field, sizeof(key->field), hash);     \
	}

	FK_HASH(fk_ipver, FKMASK_IPVER);
	FK_HASH(fk_proto, FKMASK_PROTO);
	FK_HASH(fk_src, FKMASK_SRC);
	FK_HASH(fk_dst, FKMASK_DST);
	FK_HASH(fk_sport, FKMASK_SPORT);
	FK_HASH(fk_dport, FKMASK_DPORT);

	return hash;
}

__attribute__((always_inline))
static inline void
flow_key_unpack(const struct flow_key *key, union sockaddr_in_4_6 *laddr,
    union sockaddr_in_4_6 *faddr, uint8_t *protocol)
{
	*protocol = key->fk_proto;
	if (key->fk_ipver == IPVERSION) {
		laddr->sa.sa_family = AF_INET;
		laddr->sin.sin_addr = key->fk_src4;
		laddr->sin.sin_port = key->fk_sport;
		faddr->sa.sa_family = AF_INET;
		faddr->sin.sin_addr = key->fk_dst4;
		faddr->sin.sin_port = key->fk_dport;
	} else if (key->fk_ipver == IPV6_VERSION) {
		laddr->sa.sa_family = AF_INET6;
		laddr->sin6.sin6_addr = key->fk_src6;
		laddr->sin6.sin6_port = key->fk_sport;
		faddr->sa.sa_family = AF_INET6;
		faddr->sin6.sin6_addr = key->fk_dst6;
		faddr->sin6.sin6_port = key->fk_dport;
	}
}

__attribute__((always_inline))
static inline int
flow_req2key(struct nx_flow_req *req, struct flow_key *key)
{
	FLOW_KEY_CLEAR(key);

	if (req->nfr_saddr.sa.sa_family == AF_INET) {
		key->fk_ipver = IPVERSION;
		key->fk_proto = req->nfr_ip_protocol;
		key->fk_mask |= FKMASK_PROTO;
		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
			key->fk_src4 = req->nfr_saddr.sin.sin_addr;
			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
		}
		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
			key->fk_dst4 = req->nfr_daddr.sin.sin_addr;
			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
		}
		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
			key->fk_sport = req->nfr_saddr.sin.sin_port;
			key->fk_mask |= FKMASK_SPORT;
		}
		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
			key->fk_dport = req->nfr_daddr.sin.sin_port;
			key->fk_mask |= FKMASK_DPORT;
		}
	} else if (req->nfr_saddr.sa.sa_family == AF_INET6) {
		key->fk_ipver = IPV6_VERSION;
		key->fk_proto = req->nfr_ip_protocol;
		key->fk_mask |= FKMASK_PROTO;
		if (sk_sa_has_addr(SA(&req->nfr_saddr))) {
			key->fk_src6 = req->nfr_saddr.sin6.sin6_addr;
			key->fk_mask |= (FKMASK_IPVER | FKMASK_SRC);
		}
		if (sk_sa_has_addr(SA(&req->nfr_daddr))) {
			key->fk_dst6 = req->nfr_daddr.sin6.sin6_addr;
			key->fk_mask |= (FKMASK_IPVER | FKMASK_DST);
		}
		if (sk_sa_has_port(SA(&req->nfr_saddr))) {
			key->fk_sport = req->nfr_saddr.sin6.sin6_port;
			key->fk_mask |= FKMASK_SPORT;
		}
		if (sk_sa_has_port(SA(&req->nfr_daddr))) {
			key->fk_dport = req->nfr_daddr.sin6.sin6_port;
			key->fk_mask |= FKMASK_DPORT;
		}
	} else {
		SK_ERR("unknown AF %d", req->nfr_saddr.sa.sa_family);
		return ENOTSUP;
	}

	switch (key->fk_mask) {
	case FKMASK_5TUPLE:
	case FKMASK_4TUPLE:
	case FKMASK_3TUPLE:
	case FKMASK_2TUPLE:
	case FKMASK_IPFLOW3:
	case FKMASK_IPFLOW2:
	case FKMASK_IPFLOW1:
		break;
	default:
		SK_ERR("unknown flow key mask 0x%04x", key->fk_mask);
		return ENOTSUP;
	}

	return 0;
}

__attribute__((always_inline))
static inline void
flow_pkt2key(struct __kern_packet *pkt, boolean_t input,
    struct flow_key *key)
{
	struct __flow *flow = pkt->pkt_flow;

	FLOW_KEY_CLEAR(key);

	if (__improbable((pkt->pkt_qum_qflags & QUM_F_FLOW_CLASSIFIED) == 0)) {
		return;
	}

	ASSERT(flow->flow_l3._l3_ip_ver != 0);

	key->fk_ipver = flow->flow_l3._l3_ip_ver;
	key->fk_proto = flow->flow_ip_proto;
	if (input) {
		if (flow->flow_ip_ver == IPVERSION) {
			key->fk_src4 = flow->flow_ipv4_dst;
			key->fk_sport = flow->flow_tcp_dst;
			key->fk_dst4 = flow->flow_ipv4_src;
			key->fk_dport = flow->flow_tcp_src;
		} else {
			key->fk_src6 = flow->flow_ipv6_dst;
			key->fk_sport = flow->flow_tcp_dst;
			key->fk_dst6 = flow->flow_ipv6_src;
			key->fk_dport = flow->flow_tcp_src;
		}
	} else {
		if (flow->flow_ip_ver == IPVERSION) {
			key->fk_src4 = flow->flow_ipv4_src;
			key->fk_sport = flow->flow_tcp_src;
			key->fk_dst4 = flow->flow_ipv4_dst;
			key->fk_dport = flow->flow_tcp_dst;
		} else {
			key->fk_src6 = flow->flow_ipv6_src;
			key->fk_sport = flow->flow_tcp_src;
			key->fk_dst6 = flow->flow_ipv6_dst;
			key->fk_dport = flow->flow_tcp_dst;
		}
	}
}

__attribute__((always_inline))
static inline int
flow_ip_cmp(const void *a0, const void *b0, size_t alen)
{
	struct flow_ip_addr *a = __DECONST(struct flow_ip_addr *, a0),
	    *b = __DECONST(struct flow_ip_addr *, b0);

	switch (alen) {
	case sizeof(struct in_addr):
		if (a->_addr32[0] > b->_addr32[0]) {
			return 1;
		}
		if (a->_addr32[0] < b->_addr32[0]) {
			return -1;
		}
		break;

	case sizeof(struct in6_addr):
		if (a->_addr64[1] > b->_addr64[1]) {
			return 1;
		}
		if (a->_addr64[1] < b->_addr64[1]) {
			return -1;
		}
		if (a->_addr64[0] > b->_addr64[0]) {
			return 1;
		}
		if (a->_addr64[0] < b->_addr64[0]) {
			return -1;
		}
		break;

	default:
		VERIFY(0);
		/* NOTREACHED */
		__builtin_unreachable();
	}
	return 0;
}

__attribute__((always_inline))
static inline struct flow_owner_bucket *
flow_mgr_get_fob_at_idx(struct flow_mgr *fm, uint32_t idx)
{
	char *buckets = fm->fm_owner_buckets;
	void *bucket = buckets + (idx * fm->fm_owner_bucket_sz);
	return bucket;
}

__attribute__((always_inline))
static inline struct flow_route_bucket *
flow_mgr_get_frb_at_idx(struct flow_mgr *fm, uint32_t idx)
{
	char *buckets = fm->fm_route_buckets;
	void *bucket = buckets + (idx * fm->fm_route_bucket_sz);
	return bucket;
}

__attribute__((always_inline))
static inline struct flow_route_id_bucket *
flow_mgr_get_frib_at_idx(struct flow_mgr *fm, uint32_t idx)
{
	char *buckets = fm->fm_route_id_buckets;
	void *bucket = buckets + (idx * fm->fm_route_id_bucket_sz);
	return bucket;
}

__attribute__((always_inline))
static inline uint32_t
flow_mgr_get_fob_idx(struct flow_mgr *fm,
    struct flow_owner_bucket *bkt)
{
	ASSERT(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) %
	    fm->fm_owner_bucket_sz == 0);
	return (uint32_t)(((intptr_t)bkt - (intptr_t)fm->fm_owner_buckets) /
	       fm->fm_owner_bucket_sz);
}

__attribute__((always_inline))
static inline size_t
flow_mgr_get_num_flows(struct flow_mgr *mgr)
{
	ASSERT(mgr->fm_flow_table != NULL);
	return cuckoo_hashtable_entries(mgr->fm_flow_table);
}

extern unsigned int sk_fo_size;
extern struct skmem_cache *sk_fo_cache;

extern unsigned int sk_fe_size;
extern struct skmem_cache *sk_fe_cache;

extern unsigned int sk_fab_size;
extern struct skmem_cache *sk_fab_cache;

extern uint32_t flow_seed;

extern struct skmem_cache *flow_route_cache;
extern struct skmem_cache *flow_stats_cache;

__BEGIN_DECLS

typedef void (*flow_route_ctor_fn_t)(void *arg, struct flow_route *);
typedef int (*flow_route_resolve_fn_t)(void *arg, struct flow_route *,
    struct __kern_packet *);

extern int flow_init(void);
extern void flow_fini(void);

extern void flow_mgr_init(void);
extern void flow_mgr_fini(void);
extern struct flow_mgr *flow_mgr_find_lock(uuid_t);
extern void flow_mgr_unlock(void);
extern struct flow_mgr * flow_mgr_create(size_t, size_t, size_t, size_t);
extern void flow_mgr_destroy(struct flow_mgr *);
extern void flow_mgr_terminate(struct flow_mgr *);
extern int flow_mgr_flow_add(struct kern_nexus *nx, struct flow_mgr *fm,
    struct flow_owner *fo, struct ifnet *ifp, struct nx_flow_req *req,
    flow_route_ctor_fn_t fr_ctor, flow_route_resolve_fn_t fr_resolve, void *fr_arg);
extern struct flow_owner_bucket *flow_mgr_get_fob_by_pid(
	struct flow_mgr *, pid_t);
extern struct flow_entry *flow_mgr_get_fe_by_uuid_rlock(
	struct flow_mgr *, uuid_t);
extern struct flow_route_bucket *flow_mgr_get_frb_by_addr(
	struct flow_mgr *, union sockaddr_in_4_6 *);
extern struct flow_route_id_bucket *flow_mgr_get_frib_by_uuid(
	struct flow_mgr *, uuid_t);
extern int flow_mgr_flow_hash_mask_add(struct flow_mgr *fm, uint32_t mask);
extern int flow_mgr_flow_hash_mask_del(struct flow_mgr *fm, uint32_t mask);

extern struct flow_entry * fe_alloc(boolean_t can_block);

extern int flow_namespace_create(union sockaddr_in_4_6 *, uint8_t protocol,
    netns_token *, uint16_t, struct ns_flow_info *);
extern void flow_namespace_half_close(netns_token *token);
extern void flow_namespace_withdraw(netns_token *);
extern void flow_namespace_destroy(netns_token *);

extern struct flow_owner_bucket *__sized_by(*tot_sz)
flow_owner_buckets_alloc(size_t, size_t *, size_t * tot_sz);
extern void flow_owner_buckets_free(struct flow_owner_bucket *, size_t);
extern void flow_owner_bucket_init(struct flow_owner_bucket *);
extern void flow_owner_bucket_destroy(struct flow_owner_bucket *);
extern void flow_owner_bucket_purge_all(struct flow_owner_bucket *);
extern void flow_owner_attach_nexus_port(struct flow_mgr *, boolean_t,
    pid_t, nexus_port_t);
extern uint32_t flow_owner_detach_nexus_port(struct flow_mgr *,
    boolean_t, pid_t, nexus_port_t, boolean_t);
extern struct flow_owner *flow_owner_alloc(struct flow_owner_bucket *,
    struct proc *, nexus_port_t, bool, bool, struct nx_flowswitch*,
    struct nexus_adapter *, void *, bool);
extern void flow_owner_free(struct flow_owner_bucket *, struct flow_owner *);
extern struct flow_entry *flow_owner_create_entry(struct flow_owner *,
    struct nx_flow_req *, boolean_t, uint32_t, boolean_t,
    struct flow_route *, int *);
extern int flow_owner_destroy_entry(struct flow_owner *, uuid_t, bool, void *);
extern struct flow_owner *flow_owner_find_by_pid(struct flow_owner_bucket *,
    pid_t, void *, bool);
extern int flow_owner_flowadv_index_alloc(struct flow_owner *, flowadv_idx_t *);
extern void flow_owner_flowadv_index_free(struct flow_owner *, flowadv_idx_t);
extern uint32_t flow_owner_activate_nexus_port(struct flow_mgr *,
    boolean_t, pid_t, nexus_port_t, struct nexus_adapter *,
    na_activate_mode_t);

extern struct flow_entry *flow_mgr_find_fe_by_key(struct flow_mgr *,
    struct flow_key *);
extern struct flow_entry * flow_mgr_find_conflicting_fe(struct flow_mgr *fm,
    struct flow_key *fe_key);
extern void flow_mgr_foreach_flow(struct flow_mgr *fm,
    void (^flow_handler)(struct flow_entry *fe));
extern struct flow_entry *flow_entry_find_by_uuid(struct flow_owner *,
    uuid_t);
extern struct flow_entry * flow_entry_alloc(struct flow_owner *fo,
    struct nx_flow_req *req, int *perr);
extern void flow_entry_teardown(struct flow_owner *, struct flow_entry *);
extern void flow_entry_destroy(struct flow_owner *, struct flow_entry *, bool,
    void *);
extern void flow_entry_retain(struct flow_entry *fe);
extern void flow_entry_release(struct flow_entry **pfe);
extern uint32_t flow_entry_refcnt(struct flow_entry *fe);
extern bool rx_flow_demux_match(struct nx_flowswitch *, struct flow_entry *, struct __kern_packet *);
extern struct flow_entry *rx_lookup_child_flow(struct nx_flowswitch *fsw,
    struct flow_entry *, struct __kern_packet *);
extern struct flow_entry *tx_lookup_child_flow(struct flow_entry *, uuid_t);

extern struct flow_entry_dead *flow_entry_dead_alloc(zalloc_flags_t);
extern void flow_entry_dead_free(struct flow_entry_dead *);

extern void flow_entry_stats_get(struct flow_entry *, struct sk_stats_flow *);

extern int flow_pkt_classify(struct __kern_packet *pkt, struct ifnet *ifp,
    sa_family_t af, bool input);

extern void flow_track_stats(struct flow_entry *, uint64_t, uint64_t,
    bool, bool);
extern int flow_pkt_track(struct flow_entry *, struct __kern_packet *, bool);
extern boolean_t flow_track_tcp_want_abort(struct flow_entry *);
extern void flow_track_abort_tcp( struct flow_entry *fe,
    struct __kern_packet *in_pkt, struct __kern_packet *rst_pkt);
extern void flow_track_abort_quic(struct flow_entry *fe,
    uint8_t *__counted_by(QUIC_STATELESS_RESET_TOKEN_SIZE)token);

extern void fsw_host_rx(struct nx_flowswitch *, struct pktq *);
extern void fsw_host_sendup(struct ifnet *, struct mbuf *, struct mbuf *,
    uint32_t, uint32_t);

extern void flow_rx_agg_tcp(struct nx_flowswitch *fsw, struct flow_entry *fe,
    struct pktq *rx_pkts, uint32_t rx_bytes, uint32_t flags);

extern void flow_route_init(void);
extern void flow_route_fini(void);
extern struct flow_route_bucket *__sized_by(*tot_sz)
flow_route_buckets_alloc(size_t, size_t *, size_t * tot_sz);
extern void flow_route_buckets_free(struct flow_route_bucket *, size_t);
extern void flow_route_bucket_init(struct flow_route_bucket *);
extern void flow_route_bucket_destroy(struct flow_route_bucket *);
extern void flow_route_bucket_purge_all(struct flow_route_bucket *);
extern struct flow_route_id_bucket *__sized_by(*tot_sz)
flow_route_id_buckets_alloc(size_t, size_t *, size_t * tot_sz);
extern void flow_route_id_buckets_free(struct flow_route_id_bucket *, size_t);
extern void flow_route_id_bucket_init(struct flow_route_id_bucket *);
extern void flow_route_id_bucket_destroy(struct flow_route_id_bucket *);

extern int flow_route_select_laddr(union sockaddr_in_4_6 *,
    union sockaddr_in_4_6 *, struct ifnet *, struct rtentry *, uint32_t *, int);
extern int flow_route_find(struct kern_nexus *, struct flow_mgr *,
    struct ifnet *, struct nx_flow_req *, flow_route_ctor_fn_t,
    flow_route_resolve_fn_t, void *, struct flow_route **);
extern int flow_route_configure(struct flow_route *, struct ifnet *, struct nx_flow_req *);
extern void flow_route_retain(struct flow_route *);
extern void flow_route_release(struct flow_route *);
extern uint32_t flow_route_prune(struct flow_mgr *, struct ifnet *,
    uint32_t *);
extern void flow_route_cleanup(struct flow_route *);
extern boolean_t flow_route_laddr_validate(union sockaddr_in_4_6 *,
    struct ifnet *, uint32_t *);
extern boolean_t flow_route_key_validate(struct flow_key *, struct ifnet *,
    uint32_t *);
extern void flow_qset_select_dynamic(struct nx_flowswitch *,
    struct flow_entry *, boolean_t);
extern void flow_stats_init(void);
extern void flow_stats_fini(void);
extern struct flow_stats *flow_stats_alloc(boolean_t cansleep);

#if SK_LOG
#define FLOWKEY_DBGBUF_SIZE   256
#define FLOWENTRY_DBGBUF_SIZE   512
extern char *fk_as_string(const struct flow_key *fk, char *__counted_by(dsz)dst, size_t dsz);
extern char *fe_as_string(const struct flow_entry *fe, char *__counted_by(dsz)dst, size_t dsz);
#endif /* SK_LOG */
__END_DECLS
#endif /* BSD_KERNEL_PRIVATE */
#endif /* !_SKYWALK_NEXUS_FLOWSIWTCH_FLOW_FLOWVAR_H_ */