Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
SUSE:SLE-12:GA
libibverbs
libibverbs-Add-receive-flow-steering-support.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File libibverbs-Add-receive-flow-steering-support.patch of Package libibverbs
From db47da4a0b79fb3c20a744db2f70ab6b32c8a7eb Mon Sep 17 00:00:00 2001 From: Matan Barak <matanb@mellanox.com> Date: Thu, 6 Feb 2014 14:20:33 +0200 Subject: [PATCH 6/6] Add receive flow steering support The RDMA stack allows for applications to create IB_QPT_RAW_PACKET QPs, which receive plain Ethernet packets, specifically packets that don't carry any QPN to be matched by the receiving side. Applications using these QPs must be provided with a method to program some steering rule with the HW so packets arriving at the local port can be routed to them. In a similar manner, when the device supports flow streeing, IB UD QPs created by IPoIB allow user-space applications to steer specific TCP/IP flows to their QPs. This patch adds ibv_create_flow(), which allow providing a flow specification for a QP. When there's a match between the specification and a received packet, the packet is forwarded to that QP, in a the same way one uses ibv_attach_mcast() for IB UD multicast handling. Flow specifications are provided as instances of struct ibv_flow_spec_yyy, which describes L2, L3 and L4 headers. Currently specs for Ethernet, IPv4, TCP and UDP are defined. Flow specs are made of values and masks. The input to ib_create_flow() is a struct ib_flow_attr, which contains a few mandatory control elements and optional flow specs. struct ibv_flow_attr { uint32_t comp_mask; enum ibv_flow_attr_type type; uint16_t size; uint16_t priority; uint8_t num_of_specs; uint8_t port; uint32_t flags; /* Following are the optional layers according to user request * struct ibv_flow_spec_xxx [L2] * struct ibv_flow_spec_yyy [L3/L4] */ }; These flow specs are defined and used in a way which allows adding new spec types without kernel/user ABI change, just with a little API enhancement which defines the newly added spec. The flow spec structures are defined with TLV (Type-Length-Value) entries, which allows calling ib_create_flow() with a list of variable length of optional specs. For the actual processing of ibv_flow_attr the kernel uses the number of specs and the size mandatory fields along with the TLV nature of the specs. The returned value from ibv_create_flow() is a struct ibv_flow, which contains a handle provided by the kernel to be used when calling ibv_destroy_flow(). The ib_flow_attr enum type supports usage of flow steering for promiscuous and sniffer purposes: IBV_FLOW_ATTR_NORMAL - "regular" rule, steering according to rule specification IBV_FLOW_ATTR_ALL_DEFAULT - default unicast and multicast rule, receive all Ethernet traffic which isn't steered to any QP IBV_FLOW_ATTR_MC_DEFAULT - same as IB_FLOW_ATTR_ALL_DEFAULT but only for multicast ALL_DEFAULT and MC_DEFAULT rules options are valid only for Ethernet link type. Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Matan Barak <matanb@mellanox.com> --- include/infiniband/driver.h | 4 + include/infiniband/kern-abi.h | 99 ++++++++++++++++++++++++++++++++ include/infiniband/verbs.h | 128 +++++++++++++++++++++++++++++++++++++++++- src/cmd.c | 105 ++++++++++++++++++++++++++++++++++ src/device.c | 4 + src/libibverbs.map | 2 6 files changed, 340 insertions(+), 2 deletions(-) Index: libibverbs-1.1.7/include/infiniband/driver.h =================================================================== --- libibverbs-1.1.7.orig/include/infiniband/driver.h 2014-03-05 10:36:36.000000000 +0100 +++ libibverbs-1.1.7/include/infiniband/driver.h 2014-08-29 12:50:51.649871589 +0200 @@ -194,6 +194,10 @@ int ibv_cmd_destroy_ah(struct ibv_ah *ah int ibv_cmd_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); int ibv_cmd_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid); +struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp, + struct ibv_flow_attr *flow_attr); +int ibv_cmd_destroy_flow(struct ibv_flow *flow_id); + int ibv_dontfork_range(void *base, size_t size); int ibv_dofork_range(void *base, size_t size); Index: libibverbs-1.1.7/include/infiniband/kern-abi.h =================================================================== --- libibverbs-1.1.7.orig/include/infiniband/kern-abi.h 2014-03-05 10:36:36.000000000 +0100 +++ libibverbs-1.1.7/include/infiniband/kern-abi.h 2014-08-29 12:51:28.751407756 +0200 @@ -102,6 +102,13 @@ enum { #define IB_USER_VERBS_CMD_FLAG_EXTENDED 0x80ul +enum { + IB_USER_VERBS_CMD_CREATE_FLOW = (IB_USER_VERBS_CMD_FLAG_EXTENDED << + IB_USER_VERBS_CMD_FLAGS_SHIFT) + + IB_USER_VERBS_CMD_THRESHOLD, + IB_USER_VERBS_CMD_DESTROY_FLOW +}; + /* * Make sure that all structs defined in this file remain laid out so * that they pack the same way on 32-bit and 64-bit architectures (to @@ -676,6 +683,76 @@ struct ibv_kern_send_wr { } qp_type; }; +struct ibv_kern_eth_filter { + __u8 dst_mac[6]; + __u8 src_mac[6]; + __u16 ether_type; + __u16 vlan_tag; +}; + +struct ibv_kern_spec_eth { + __u32 type; + __u16 size; + __u16 reserved; + struct ibv_kern_eth_filter val; + struct ibv_kern_eth_filter mask; +}; + +struct ibv_kern_ipv4_filter { + __u32 src_ip; + __u32 dst_ip; +}; + +struct ibv_kern_spec_ipv4 { + __u32 type; + __u16 size; + __u16 reserved; + struct ibv_kern_ipv4_filter val; + struct ibv_kern_ipv4_filter mask; +}; + +struct ibv_kern_tcp_udp_filter { + __u16 dst_port; + __u16 src_port; +}; + +struct ibv_kern_spec_tcp_udp { + __u32 type; + __u16 size; + __u16 reserved; + struct ibv_kern_tcp_udp_filter val; + struct ibv_kern_tcp_udp_filter mask; +}; + + +struct ibv_kern_spec { + union { + struct { + __u32 type; + __u16 size; + __u16 reserved; + } hdr; + struct ibv_kern_spec_eth eth; + struct ibv_kern_spec_ipv4 ipv4; + struct ibv_kern_spec_tcp_udp tcp_udp; + }; + +}; + +struct ibv_kern_flow_attr { + __u32 type; + __u16 size; + __u16 priority; + __u8 num_of_specs; + __u8 reserved[2]; + __u8 port; + __u32 flags; + /* Following are the optional layers according to user request + * struct ibv_kern_flow_spec_xxx + * struct ibv_kern_flow_spec_yyy + */ +}; + struct ibv_post_send { __u32 command; __u16 in_words; @@ -763,6 +840,24 @@ struct ibv_attach_mcast { __u64 driver_data[0]; }; +struct ibv_create_flow { + struct ex_hdr hdr; + __u32 comp_mask; + __u32 qp_handle; + struct ibv_kern_flow_attr flow_attr; +}; + +struct ibv_create_flow_resp { + __u32 comp_mask; + __u32 flow_handle; +}; + +struct ibv_destroy_flow { + struct ex_hdr hdr; + __u32 comp_mask; + __u32 flow_handle; +}; + struct ibv_detach_mcast { __u32 command; __u16 in_words; @@ -904,7 +999,9 @@ enum { IB_USER_VERBS_CMD_OPEN_XRCD_V2 = -1, IB_USER_VERBS_CMD_CLOSE_XRCD_V2 = -1, IB_USER_VERBS_CMD_CREATE_XSRQ_V2 = -1, - IB_USER_VERBS_CMD_OPEN_QP_V2 = -1 + IB_USER_VERBS_CMD_OPEN_QP_V2 = -1, + IB_USER_VERBS_CMD_CREATE_FLOW_V2 = -1, + IB_USER_VERBS_CMD_DESTROY_FLOW_V2 = -1 }; struct ibv_modify_srq_v3 { Index: libibverbs-1.1.7/include/infiniband/verbs.h =================================================================== --- libibverbs-1.1.7.orig/include/infiniband/verbs.h 2014-08-29 12:50:51.644871652 +0200 +++ libibverbs-1.1.7/include/infiniband/verbs.h 2014-08-29 12:50:51.667871364 +0200 @@ -115,7 +115,8 @@ enum ibv_device_cap_flags { IBV_DEVICE_RC_RNR_NAK_GEN = 1 << 12, IBV_DEVICE_SRQ_RESIZE = 1 << 13, IBV_DEVICE_N_NOTIFY_CQ = 1 << 14, - IBV_DEVICE_XRC = 1 << 20 + IBV_DEVICE_XRC = 1 << 20, + IBV_DEVICE_MANAGED_FLOW_STEERING = 1 << 29 }; enum ibv_atomic_cap { @@ -965,8 +966,113 @@ enum verbs_context_mask { VERBS_CONTEXT_RESERVED = 1 << 4 }; +enum ibv_flow_flags { + IBV_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1, +}; + +enum ibv_flow_attr_type { + /* steering according to rule specifications */ + IBV_FLOW_ATTR_NORMAL = 0x0, + /* default unicast and multicast rule - + * receive all Eth traffic which isn't steered to any QP + */ + IBV_FLOW_ATTR_ALL_DEFAULT = 0x1, + /* default multicast rule - + * receive all Eth multicast traffic which isn't steered to any QP + */ + IBV_FLOW_ATTR_MC_DEFAULT = 0x2, +}; + +enum ibv_flow_spec_type { + IBV_FLOW_SPEC_ETH = 0x20, + IBV_FLOW_SPEC_IPV4 = 0x30, + IBV_FLOW_SPEC_TCP = 0x40, + IBV_FLOW_SPEC_UDP = 0x41, +}; + +struct ibv_flow_eth_filter { + uint8_t dst_mac[6]; + uint8_t src_mac[6]; + uint16_t ether_type; + /* + * same layout as 802.1q: prio 3, cfi 1, vlan id 12 + */ + uint16_t vlan_tag; +}; + +struct ibv_flow_spec_eth { + enum ibv_flow_spec_type type; + uint16_t size; + struct ibv_flow_eth_filter val; + struct ibv_flow_eth_filter mask; +}; + +struct ibv_flow_ipv4_filter { + uint32_t src_ip; + uint32_t dst_ip; +}; + +struct ibv_flow_spec_ipv4 { + enum ibv_flow_spec_type type; + uint16_t size; + struct ibv_flow_ipv4_filter val; + struct ibv_flow_ipv4_filter mask; +}; + +struct ibv_flow_tcp_udp_filter { + uint16_t dst_port; + uint16_t src_port; +}; + +struct ibv_flow_spec_tcp_udp { + enum ibv_flow_spec_type type; + uint16_t size; + struct ibv_flow_tcp_udp_filter val; + struct ibv_flow_tcp_udp_filter mask; +}; + +struct ibv_flow_spec { + union { + struct { + enum ibv_flow_spec_type type; + uint16_t size; + } hdr; + struct ibv_flow_spec_eth eth; + struct ibv_flow_spec_ipv4 ipv4; + struct ibv_flow_spec_tcp_udp tcp_udp; + }; +}; + +struct ibv_flow_attr { + uint32_t comp_mask; + enum ibv_flow_attr_type type; + uint16_t size; + uint16_t priority; + uint8_t num_of_specs; + uint8_t port; + uint32_t flags; + /* Following are the optional layers according to user request + * struct ibv_flow_spec_xxx [L2] + * struct ibv_flow_spec_yyy [L3/L4] + */ +}; + +struct ibv_flow { + uint32_t comp_mask; + struct ibv_context *context; + uint32_t handle; +}; + struct verbs_context { /* "grows up" - new fields go here */ + int (*drv_ibv_destroy_flow) (struct ibv_flow *flow); + int (*lib_ibv_destroy_flow) (struct ibv_flow *flow); + struct ibv_flow * (*drv_ibv_create_flow) (struct ibv_qp *qp, + struct ibv_flow_attr + *flow_attr); + struct ibv_flow * (*lib_ibv_create_flow) (struct ibv_qp *qp, + struct ibv_flow_attr + *flow_attr); int (*drv_query_port_ex)(struct ibv_context *context, uint8_t port_num, struct ibv_port_attr_ex *port_attr); int (*lib_query_port_ex)(struct ibv_context *context, uint8_t port_num, @@ -1156,6 +1262,26 @@ struct ibv_pd *ibv_alloc_pd(struct ibv_c */ int ibv_dealloc_pd(struct ibv_pd *pd); +static inline struct ibv_flow *ibv_create_flow(struct ibv_qp *qp, + struct ibv_flow_attr *flow) +{ + struct verbs_context *vctx = verbs_get_ctx_op(qp->context, + lib_ibv_create_flow); + if (!vctx || !vctx->lib_ibv_create_flow) + return NULL; + + return vctx->lib_ibv_create_flow(qp, flow); +} + +static inline int ibv_destroy_flow(struct ibv_flow *flow_id) +{ + struct verbs_context *vctx = verbs_get_ctx_op(flow_id->context, + lib_ibv_destroy_flow); + if (!vctx || !vctx->lib_ibv_destroy_flow) + return -ENOSYS; + return vctx->lib_ibv_destroy_flow(flow_id); +} + /** * ibv_open_xrcd - Open an extended connection domain */ Index: libibverbs-1.1.7/src/cmd.c =================================================================== --- libibverbs-1.1.7.orig/src/cmd.c 2014-03-05 10:36:36.000000000 +0100 +++ libibverbs-1.1.7/src/cmd.c 2014-08-29 12:50:51.667871364 +0200 @@ -1268,3 +1268,108 @@ int ibv_cmd_detach_mcast(struct ibv_qp * return 0; } + +static int ib_spec_to_kern_spec(struct ibv_flow_spec *ib_spec, + struct ibv_kern_spec *kern_spec) +{ + kern_spec->hdr.type = ib_spec->hdr.type; + + switch (ib_spec->hdr.type) { + case IBV_FLOW_SPEC_ETH: + kern_spec->eth.size = sizeof(struct ibv_kern_spec_eth); + memcpy(&kern_spec->eth.val, &ib_spec->eth.val, + sizeof(struct ibv_flow_eth_filter)); + memcpy(&kern_spec->eth.mask, &ib_spec->eth.mask, + sizeof(struct ibv_flow_eth_filter)); + break; + case IBV_FLOW_SPEC_IPV4: + kern_spec->ipv4.size = sizeof(struct ibv_kern_spec_ipv4); + memcpy(&kern_spec->ipv4.val, &ib_spec->ipv4.val, + sizeof(struct ibv_flow_ipv4_filter)); + memcpy(&kern_spec->ipv4.mask, &ib_spec->ipv4.mask, + sizeof(struct ibv_flow_ipv4_filter)); + break; + case IBV_FLOW_SPEC_TCP: + case IBV_FLOW_SPEC_UDP: + kern_spec->tcp_udp.size = sizeof(struct ibv_kern_spec_tcp_udp); + memcpy(&kern_spec->tcp_udp.val, &ib_spec->tcp_udp.val, + sizeof(struct ibv_flow_ipv4_filter)); + memcpy(&kern_spec->tcp_udp.mask, &ib_spec->tcp_udp.mask, + sizeof(struct ibv_flow_tcp_udp_filter)); + break; + default: + return -EINVAL; + } + return 0; +} + +struct ibv_flow *ibv_cmd_create_flow(struct ibv_qp *qp, + struct ibv_flow_attr *flow_attr) +{ + struct ibv_create_flow *cmd; + struct ibv_create_flow_resp resp; + struct ibv_flow *flow_id; + size_t cmd_size; + size_t written_size; + int i, err; + void *kern_spec; + void *ib_spec; + + cmd_size = sizeof(*cmd) + (flow_attr->num_of_specs * + sizeof(struct ibv_kern_spec)); + cmd = alloca(cmd_size); + flow_id = malloc(sizeof(*flow_id)); + if (!flow_id) + return NULL; + memset(cmd, 0, cmd_size); + + cmd->qp_handle = qp->handle; + + cmd->flow_attr.type = flow_attr->type; + cmd->flow_attr.priority = flow_attr->priority; + cmd->flow_attr.num_of_specs = flow_attr->num_of_specs; + cmd->flow_attr.port = flow_attr->port; + cmd->flow_attr.flags = flow_attr->flags; + + kern_spec = cmd + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs; i++) { + err = ib_spec_to_kern_spec(ib_spec, kern_spec); + if (err) + goto err; + cmd->flow_attr.size += + ((struct ibv_kern_spec *)kern_spec)->hdr.size; + kern_spec += ((struct ibv_kern_spec *)kern_spec)->hdr.size; + ib_spec += ((struct ibv_flow_spec *)ib_spec)->hdr.size; + } + + written_size = sizeof(*cmd) + cmd->flow_attr.size; + IBV_INIT_CMD_RESP_EX_VCMD(cmd, written_size, written_size, CREATE_FLOW, + &resp, sizeof(resp)); + if (write(qp->context->cmd_fd, cmd, written_size) != written_size) + goto err; + + VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof(resp)); + + flow_id->context = qp->context; + flow_id->handle = resp.flow_handle; + return flow_id; +err: + free(flow_id); + return NULL; +} + +int ibv_cmd_destroy_flow(struct ibv_flow *flow_id) +{ + struct ibv_destroy_flow cmd; + int ret = 0; + + memset(&cmd, 0, sizeof(cmd)); + IBV_INIT_CMD_EX(&cmd, sizeof(cmd), DESTROY_FLOW); + cmd.flow_handle = flow_id->handle; + + if (write(flow_id->context->cmd_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) + ret = errno; + free(flow_id); + return ret; +} Index: libibverbs-1.1.7/src/device.c =================================================================== --- libibverbs-1.1.7.orig/src/device.c 2014-08-29 12:50:51.644871652 +0200 +++ libibverbs-1.1.7/src/device.c 2014-08-29 12:50:51.667871364 +0200 @@ -171,6 +171,10 @@ struct ibv_context *__ibv_open_device(st */ context_ex->lib_query_port_ex = context_ex->drv_query_port_ex; + context_ex->lib_ibv_create_flow = + context_ex->drv_ibv_create_flow; + context_ex->lib_ibv_destroy_flow = + context_ex->drv_ibv_destroy_flow; } context->device = device; Index: libibverbs-1.1.7/src/libibverbs.map =================================================================== --- libibverbs-1.1.7.orig/src/libibverbs.map 2014-03-05 10:36:36.000000000 +0100 +++ libibverbs-1.1.7/src/libibverbs.map 2014-08-29 12:50:51.667871364 +0200 @@ -64,6 +64,8 @@ IBVERBS_1.0 { ibv_cmd_destroy_ah; ibv_cmd_attach_mcast; ibv_cmd_detach_mcast; + ibv_cmd_create_flow; + ibv_cmd_destroy_flow; ibv_copy_qp_attr_from_kern; ibv_copy_path_rec_from_kern; ibv_copy_path_rec_to_kern;
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor