From f12f5ea57d8c95621fe8ed92e34164b71d430a70 Mon Sep 17 00:00:00 2001 From: Jessie Yang Date: Thu, 14 Nov 2024 12:10:22 -0800 Subject: [PATCH] prov/efa: Implement the rma interface Rename efa_dgram_rma.c to efa_rma.c and move it to prov/efa/src as a common RMA interface for both rdm and dgram ep type. Update that dgram does not support rma. Implement rdma write and inject. Support inline rdma write. Signed-off-by: Jessie Yang --- libfabric.vcxproj | 2 +- prov/efa/Makefile.include | 2 +- prov/efa/src/dgram/efa_dgram_rma.c | 148 ---------- prov/efa/src/efa_base_ep.c | 10 +- prov/efa/src/efa_rma.c | 409 ++++++++++++++++++++++++++++ prov/efa/src/rdm/efa_rdm_ep_fiops.c | 7 +- 6 files changed, 421 insertions(+), 157 deletions(-) delete mode 100644 prov/efa/src/dgram/efa_dgram_rma.c create mode 100644 prov/efa/src/efa_rma.c diff --git a/libfabric.vcxproj b/libfabric.vcxproj index e85229c9f1a..3eef3ef0521 100644 --- a/libfabric.vcxproj +++ b/libfabric.vcxproj @@ -885,9 +885,9 @@ + - diff --git a/prov/efa/Makefile.include b/prov/efa/Makefile.include index e5961cb13d5..81e0fab0aed 100644 --- a/prov/efa/Makefile.include +++ b/prov/efa/Makefile.include @@ -48,9 +48,9 @@ _efa_files = \ prov/efa/src/efa_env.c \ prov/efa/src/efa_cntr.c \ prov/efa/src/efa_msg.c \ + prov/efa/src/efa_rma.c \ prov/efa/src/dgram/efa_dgram_ep.c \ prov/efa/src/dgram/efa_dgram_cq.c \ - prov/efa/src/dgram/efa_dgram_rma.c \ prov/efa/src/rdm/efa_rdm_peer.c \ prov/efa/src/rdm/efa_rdm_cq.c \ prov/efa/src/rdm/efa_rdm_ep_utils.c \ diff --git a/prov/efa/src/dgram/efa_dgram_rma.c b/prov/efa/src/dgram/efa_dgram_rma.c deleted file mode 100644 index 99f4c1a2929..00000000000 --- a/prov/efa/src/dgram/efa_dgram_rma.c +++ /dev/null @@ -1,148 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ -/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ - -#include -#include -#include -#include -#include "efa_dgram_ep.h" -#include "efa.h" -#include "efa_av.h" - - -/* - * efa_dgram_rma_post_read() will post a read request. - * - * Input: - * ep: endpoint - * msg: read operation information - * flags: currently no flags is taken - * self_comm: indicate whether the read is toward - * the end point itself. If self_comm is true, - * caller must set msg->addr to FI_ADDR_NOTAVAIL. - * - * On success return 0, - * If read iov and rma_iov count out of device limit, return -FI_EINVAL - * If read failed, return the error of read operation - */ -ssize_t efa_dgram_rma_post_read(struct efa_dgram_ep *ep, const struct fi_msg_rma *msg, - uint64_t flags, bool self_comm) -{ - struct efa_qp *qp; - struct efa_mr *efa_mr; - struct efa_conn *conn; -#ifndef _WIN32 - struct ibv_sge sge_list[msg->iov_count]; -#else - /* MSVC compiler does not support array declarations with runtime size, so hardcode - * the expected iov_limit/max_sq_sge from the lower-level efa provider. - */ - struct ibv_sge sge_list[EFA_DEV_ATTR_MAX_WR_SGE]; -#endif - int i; - - if (OFI_UNLIKELY(msg->iov_count > ep->base_ep.domain->device->ibv_attr.max_sge_rd)) { - EFA_WARN(FI_LOG_CQ, "invalid iov_count!\n"); - return -FI_EINVAL; - } - - if (OFI_UNLIKELY(msg->rma_iov_count > ep->base_ep.domain->info->tx_attr->rma_iov_limit)) { - EFA_WARN(FI_LOG_CQ, "invalid rma_iov_count!\n"); - return -FI_EINVAL; - } - - if (OFI_UNLIKELY(ofi_total_iov_len(msg->msg_iov, msg->iov_count) - > ep->base_ep.domain->device->max_rdma_size)) { - EFA_WARN(FI_LOG_CQ, "maximum rdma_size exceeded!\n"); - return -FI_EINVAL; - } - - /* caller must provide desc because EFA require FI_MR_LOCAL */ - assert(msg->desc); - - /* ep->domain->info->tx_attr->rma_iov_limit is set to 1 */ - qp = ep->base_ep.qp; - ibv_wr_start(qp->ibv_qp_ex); - qp->ibv_qp_ex->wr_id = (uintptr_t)msg->context; - ibv_wr_rdma_read(qp->ibv_qp_ex, msg->rma_iov[0].key, msg->rma_iov[0].addr); - - for (i = 0; i < msg->iov_count; ++i) { - sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base; - sge_list[i].length = msg->msg_iov[i].iov_len; - assert(msg->desc[i]); - efa_mr = (struct efa_mr *)msg->desc[i]; - sge_list[i].lkey = efa_mr->ibv_mr->lkey; - } - - ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list); - if (self_comm) { - assert(msg->addr == FI_ADDR_NOTAVAIL); - ibv_wr_set_ud_addr(qp->ibv_qp_ex, ep->base_ep.self_ah, - qp->qp_num, qp->qkey); - } else { - conn = efa_av_addr_to_conn(ep->base_ep.av, msg->addr); - assert(conn && conn->ep_addr); - ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, - conn->ep_addr->qpn, conn->ep_addr->qkey); - } - - return ibv_wr_complete(qp->ibv_qp_ex); -} - -static -ssize_t efa_dgram_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags) -{ - struct efa_dgram_ep *ep = container_of(ep_fid, struct efa_dgram_ep, base_ep.util_ep.ep_fid); - - return efa_dgram_rma_post_read(ep, msg, flags, false); -} - -static -ssize_t efa_dgram_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc, - size_t iov_count, fi_addr_t src_addr, uint64_t addr, - uint64_t key, void *context) -{ - struct fi_rma_iov rma_iov; - struct fi_msg_rma msg; - - rma_iov.addr = addr; - rma_iov.len = ofi_total_iov_len(iov, iov_count); - rma_iov.key = key; - - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = iov; - msg.desc = desc; - msg.iov_count = iov_count; - msg.addr = src_addr; - msg.context = context; - msg.rma_iov = &rma_iov; - msg.rma_iov_count = 1; - - return efa_dgram_rma_readmsg(ep, &msg, 0); -} - -static -ssize_t efa_dgram_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc, - fi_addr_t src_addr, uint64_t addr, uint64_t key, - void *context) -{ - struct iovec iov; - - iov.iov_base = (void *)buf; - iov.iov_len = len; - return efa_dgram_rma_readv(ep, &iov, &desc, 1, src_addr, addr, key, context); -} - -struct fi_ops_rma efa_dgram_ep_rma_ops = { - .size = sizeof(struct fi_ops_rma), - .read = efa_dgram_rma_read, - .readv = efa_dgram_rma_readv, - .readmsg = efa_dgram_rma_readmsg, - .write = fi_no_rma_write, - .writev = fi_no_rma_writev, - .writemsg = fi_no_rma_writemsg, - .inject = fi_no_rma_inject, - .writedata = fi_no_rma_writedata, - .injectdata = fi_no_rma_injectdata, -}; - diff --git a/prov/efa/src/efa_base_ep.c b/prov/efa/src/efa_base_ep.c index 55997a3cfe6..7e7b6b4a910 100644 --- a/prov/efa/src/efa_base_ep.c +++ b/prov/efa/src/efa_base_ep.c @@ -186,6 +186,12 @@ int efa_qp_create(struct efa_qp **qp, struct ibv_qp_init_attr_ex *init_attr_ex, init_attr_ex); } else { assert(init_attr_ex->qp_type == IBV_QPT_DRIVER); + if (efa_device_support_rdma_read()) + init_attr_ex->send_ops_flags |= IBV_QP_EX_WITH_RDMA_READ; + if (efa_device_support_rdma_write()) { + init_attr_ex->send_ops_flags |= IBV_QP_EX_WITH_RDMA_WRITE; + init_attr_ex->send_ops_flags |= IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM; + } #if HAVE_CAPS_UNSOLICITED_WRITE_RECV if (efa_rdm_use_unsolicited_write_recv()) efa_attr.flags |= EFADV_QP_FLAGS_UNSOLICITED_WRITE_RECV; @@ -362,7 +368,9 @@ int efa_base_ep_construct(struct efa_base_ep *base_ep, base_ep->max_msg_size = info->ep_attr->max_msg_size; base_ep->max_rma_size = info->ep_attr->max_msg_size; base_ep->inject_msg_size = info->tx_attr->inject_size; - base_ep->inject_rma_size = info->tx_attr->inject_size; + /* TODO: update inject_rma_size to inline size after firmware + * supports inline rdma write */ + base_ep->inject_rma_size = 0; return 0; } diff --git a/prov/efa/src/efa_rma.c b/prov/efa/src/efa_rma.c new file mode 100644 index 00000000000..468ea2e1f76 --- /dev/null +++ b/prov/efa/src/efa_rma.c @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ +/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ + +#include +#include +#include +#include +#include "efa.h" +#include "efa_av.h" + +#define EFA_SETUP_IOV(iov, buf, len) \ + do { \ + iov.iov_base = (void *) buf; \ + iov.iov_len = (size_t) len; \ + } while (0) + +#define EFA_SETUP_RMA_IOV(rma_iov, _addr, _len, _key) \ + do { \ + rma_iov.addr = (uint64_t) _addr; \ + rma_iov.len = (size_t) _len; \ + rma_iov.key = (uint64_t) _key; \ + } while (0) + +#define EFA_SETUP_MSG_RMA(msg, iov, _desc, count, _addr, _rma_iov, \ + _rma_iov_count, _context, _data) \ + do { \ + msg.msg_iov = (const struct iovec *) iov; \ + msg.desc = (void **) _desc; \ + msg.iov_count = (size_t) count; \ + msg.addr = (fi_addr_t) _addr; \ + msg.rma_iov = (const struct fi_rma_iov *) _rma_iov; \ + msg.rma_iov_count = (size_t) _rma_iov_count; \ + msg.context = (void *) _context; \ + msg.data = (uint32_t) _data; \ + } while (0) + +/** + * @brief check whether endpoint was configured with FI_RMA capability + * @return -FI_EOPNOTSUPP if FI_RMA wasn't requested, 0 if it was. + */ +static inline int efa_rma_check_cap(struct efa_base_ep *base_ep) { + if ((base_ep->info->caps & FI_RMA) == FI_RMA) + return 0; + EFA_WARN_ONCE(FI_LOG_EP_DATA, "Operation requires FI_RMA capability, which was not requested.\n"); + return -FI_EOPNOTSUPP; +} + +/* + * efa_rma_post_read() will post a read request. + * + * Input: + * base_ep: endpoint + * msg: read operation information + * flags: currently no flags is taken + * + * On success return 0, + * If read failed, return the error of read operation + */ +static inline ssize_t efa_rma_post_read(struct efa_base_ep *base_ep, + const struct fi_msg_rma *msg, + uint64_t flags) +{ + struct efa_qp *qp; + struct efa_mr *efa_mr; + struct efa_conn *conn; +#ifndef _WIN32 + struct ibv_sge sge_list[msg->iov_count]; +#else + /* MSVC compiler does not support array declarations with runtime size, so hardcode + * the expected iov_limit/max_sq_sge from the lower-level efa provider. + */ + struct ibv_sge sge_list[EFA_DEV_ATTR_MAX_WR_SGE]; +#endif + int i, err = 0; + + assert(msg->iov_count > 0 && + msg->iov_count <= base_ep->domain->info->tx_attr->iov_limit); + assert(msg->rma_iov_count > 0 && + msg->rma_iov_count <= base_ep->domain->info->tx_attr->rma_iov_limit); + assert(ofi_total_iov_len(msg->msg_iov, msg->iov_count) <= + base_ep->domain->device->max_rdma_size); + + qp = base_ep->qp; + if (!base_ep->is_wr_started) { + ibv_wr_start(qp->ibv_qp_ex); + base_ep->is_wr_started = true; + } + qp->ibv_qp_ex->wr_id = (uintptr_t)msg->context; + + /* ep->domain->info->tx_attr->rma_iov_limit is set to 1 */ + ibv_wr_rdma_read(qp->ibv_qp_ex, msg->rma_iov[0].key, msg->rma_iov[0].addr); + + for (i = 0; i < msg->iov_count; ++i) { + sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base; + sge_list[i].length = msg->msg_iov[i].iov_len; + assert(msg->desc && msg->desc[i]); + efa_mr = (struct efa_mr *)msg->desc[i]; + sge_list[i].lkey = efa_mr->ibv_mr->lkey; + } + + ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list); + + conn = efa_av_addr_to_conn(base_ep->av, msg->addr); + assert(conn && conn->ep_addr); + ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, conn->ep_addr->qpn, + conn->ep_addr->qkey); + + if (!(flags & FI_MORE)) { + err = ibv_wr_complete(qp->ibv_qp_ex); + base_ep->is_wr_started = false; + } + if (OFI_UNLIKELY(err)) + return err; + + return 0; +} + +static +ssize_t efa_rma_readmsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, uint64_t flags) +{ + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + return efa_rma_post_read(base_ep, msg, flags | base_ep->util_ep.tx_msg_flags); +} + +static +ssize_t efa_rma_readv(struct fid_ep *ep_fid, const struct iovec *iov, void **desc, + size_t iov_count, fi_addr_t src_addr, uint64_t addr, + uint64_t key, void *context) +{ + struct fi_rma_iov rma_iov; + struct fi_msg_rma msg; + struct efa_base_ep *base_ep; + size_t len; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + len = ofi_total_iov_len(iov, iov_count); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, iov, desc, iov_count, src_addr, &rma_iov, 1, + context, 0); + + return efa_rma_post_read(base_ep, &msg, efa_tx_flags(base_ep)); +} + +static +ssize_t efa_rma_read(struct fid_ep *ep_fid, void *buf, size_t len, void *desc, + fi_addr_t src_addr, uint64_t addr, uint64_t key, + void *context) +{ + struct iovec iov; + struct fi_rma_iov rma_iov; + struct fi_msg_rma msg; + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + assert(len <= base_ep->max_rma_size); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + EFA_SETUP_IOV(iov, buf, len); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, &iov, &desc, 1, src_addr, &rma_iov, 1, context, 0); + + return efa_rma_post_read(base_ep, &msg, efa_tx_flags(base_ep)); +} + +/** + * @brief Post a WRITE request + * + * Input: + * base_ep: endpoint + * msg: read operation information + * flags: flags passed + * @return On success return 0, otherwise return a negative libfabric error code. + */ +static inline ssize_t efa_rma_post_write(struct efa_base_ep *base_ep, + const struct fi_msg_rma *msg, + uint64_t flags) +{ + struct efa_qp *qp; + struct efa_conn *conn; +#ifndef _WIN32 + struct ibv_sge sge_list[msg->iov_count]; + struct ibv_data_buf inline_data_list[msg->iov_count]; +#else + /* MSVC compiler does not support array declarations with runtime size, so hardcode + * the expected iov_limit/max_sq_sge from the lower-level efa provider. + */ + struct ibv_sge sge_list[EFA_DEV_ATTR_MAX_WR_SGE]; + struct ibv_data_buf inline_data_list[EFA_DEV_ATTR_MAX_WR_SGE]; +#endif + size_t len; + int i, err = 0; + + qp = base_ep->qp; + if (!base_ep->is_wr_started) { + ibv_wr_start(qp->ibv_qp_ex); + base_ep->is_wr_started = true; + } + qp->ibv_qp_ex->wr_id = (uintptr_t)msg->context; + + if (flags & FI_REMOTE_CQ_DATA) { + ibv_wr_rdma_write_imm(qp->ibv_qp_ex, msg->rma_iov[0].key, + msg->rma_iov[0].addr, msg->data); + } else { + ibv_wr_rdma_write(qp->ibv_qp_ex, msg->rma_iov[0].key, msg->rma_iov[0].addr); + } + + len = ofi_total_iov_len(msg->msg_iov, msg->iov_count); + if (len <= base_ep->domain->device->efa_attr.inline_buf_size && + len <= base_ep->inject_rma_size && + (!msg->desc || !efa_mr_is_hmem(msg->desc[0]))) { + for (i = 0; i < msg->iov_count; i++) { + inline_data_list[i].addr = msg->msg_iov[i].iov_base; + inline_data_list[i].length = msg->msg_iov[i].iov_len; + } + ibv_wr_set_inline_data_list(qp->ibv_qp_ex, msg->iov_count, inline_data_list); + } else { + for (i = 0; i < msg->iov_count; ++i) { + sge_list[i].addr = (uint64_t)msg->msg_iov[i].iov_base; + sge_list[i].length = msg->msg_iov[i].iov_len; + assert(msg->desc && msg->desc[i]); + sge_list[i].lkey = ((struct efa_mr *)msg->desc[i])->ibv_mr->lkey; + } + ibv_wr_set_sge_list(qp->ibv_qp_ex, msg->iov_count, sge_list); + } + + conn = efa_av_addr_to_conn(base_ep->av, msg->addr); + assert(conn && conn->ep_addr); + ibv_wr_set_ud_addr(qp->ibv_qp_ex, conn->ah->ibv_ah, conn->ep_addr->qpn, + conn->ep_addr->qkey); + + if (!(flags & FI_MORE)) { + err = ibv_wr_complete(qp->ibv_qp_ex); + base_ep->is_wr_started = false; + } + + if (OFI_UNLIKELY(err)) + return err; + + return 0; +} + +ssize_t efa_rma_writemsg(struct fid_ep *ep_fid, const struct fi_msg_rma *msg, + uint64_t flags) +{ + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + return efa_rma_post_write(base_ep, msg, flags | base_ep->util_ep.tx_msg_flags); +} + +ssize_t efa_rma_writev(struct fid_ep *ep_fid, const struct iovec *iov, + void **desc, size_t iov_count, fi_addr_t dest_addr, + uint64_t addr, uint64_t key, void *context) +{ + struct fi_rma_iov rma_iov; + struct fi_msg_rma msg; + struct efa_base_ep *base_ep; + size_t len; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + len = ofi_total_iov_len(iov, iov_count); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, iov, desc, iov_count, dest_addr, &rma_iov, 1, + context, 0); + + return efa_rma_post_write(base_ep, &msg, efa_tx_flags(base_ep)); +} + +ssize_t efa_rma_write(struct fid_ep *ep_fid, const void *buf, size_t len, + void *desc, fi_addr_t dest_addr, uint64_t addr, + uint64_t key, void *context) +{ + struct iovec iov; + struct fi_rma_iov rma_iov; + struct fi_msg_rma msg; + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + assert(len <= base_ep->max_rma_size); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + EFA_SETUP_IOV(iov, buf, len); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, &iov, &desc, 1, dest_addr, &rma_iov, 1, context, 0); + + return efa_rma_post_write(base_ep, &msg, efa_tx_flags(base_ep)); +} + +ssize_t efa_rma_writedata(struct fid_ep *ep_fid, const void *buf, size_t len, + void *desc, uint64_t data, fi_addr_t dest_addr, + uint64_t addr, uint64_t key, void *context) +{ + struct iovec iov; + struct fi_rma_iov rma_iov; + struct fi_msg_rma msg; + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + assert(len <= base_ep->max_rma_size); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + EFA_SETUP_IOV(iov, buf, len); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, &iov, &desc, 1, dest_addr, &rma_iov, 1, context, data); + + return efa_rma_post_write(base_ep, &msg, FI_REMOTE_CQ_DATA | efa_tx_flags(base_ep)); +} + +ssize_t efa_rma_inject_write(struct fid_ep *ep_fid, const void *buf, size_t len, + fi_addr_t dest_addr, uint64_t addr, uint64_t key) +{ + struct fi_msg_rma msg; + struct iovec iov; + struct fi_rma_iov rma_iov; + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + assert(len <= base_ep->inject_rma_size); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + EFA_SETUP_IOV(iov, buf, len); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, &iov, NULL, 1, dest_addr, &rma_iov, 1, NULL, 0); + + return efa_rma_post_write(base_ep, &msg, FI_INJECT); +} + +ssize_t efa_rma_inject_writedata(struct fid_ep *ep_fid, const void *buf, + size_t len, uint64_t data, fi_addr_t dest_addr, + uint64_t addr, uint64_t key) +{ + struct fi_msg_rma msg; + struct iovec iov; + struct fi_rma_iov rma_iov; + struct efa_base_ep *base_ep; + int err; + + base_ep = container_of(ep_fid, struct efa_base_ep, util_ep.ep_fid); + assert(len <= base_ep->inject_rma_size); + err = efa_rma_check_cap(base_ep); + if (err) + return err; + + EFA_SETUP_IOV(iov, buf, len); + EFA_SETUP_RMA_IOV(rma_iov, addr, len, key); + EFA_SETUP_MSG_RMA(msg, &iov, NULL, 1, dest_addr, &rma_iov, 1, NULL, data); + + return efa_rma_post_write(base_ep, &msg, FI_INJECT | FI_REMOTE_CQ_DATA); +} + +struct fi_ops_rma efa_dgram_ep_rma_ops = { + .size = sizeof(struct fi_ops_rma), + .read = fi_no_rma_read, + .readv = fi_no_rma_readv, + .readmsg = fi_no_rma_readmsg, + .write = fi_no_rma_write, + .writev = fi_no_rma_writev, + .writemsg = fi_no_rma_writemsg, + .inject = fi_no_rma_inject, + .writedata = fi_no_rma_writedata, + .injectdata = fi_no_rma_injectdata, +}; + +struct fi_ops_rma efa_rma_ops = { + .size = sizeof(struct fi_ops_rma), + .read = efa_rma_read, + .readv = efa_rma_readv, + .readmsg = efa_rma_readmsg, + .write = efa_rma_write, + .writev = efa_rma_writev, + .writemsg = efa_rma_writemsg, + .inject = efa_rma_inject_write, + .writedata = efa_rma_writedata, + .injectdata = efa_rma_inject_writedata, +}; diff --git a/prov/efa/src/rdm/efa_rdm_ep_fiops.c b/prov/efa/src/rdm/efa_rdm_ep_fiops.c index d8a1a3fc5e9..98e1d0b4375 100644 --- a/prov/efa/src/rdm/efa_rdm_ep_fiops.c +++ b/prov/efa/src/rdm/efa_rdm_ep_fiops.c @@ -26,12 +26,6 @@ void efa_rdm_ep_construct_ibv_qp_init_attr_ex(struct efa_rdm_ep *ep, attr_ex->cap.max_recv_sge = ep->base_ep.domain->device->rdm_info->rx_attr->iov_limit; attr_ex->cap.max_inline_data = ep->base_ep.domain->device->efa_attr.inline_buf_size; attr_ex->qp_type = IBV_QPT_DRIVER; - if (efa_device_support_rdma_read()) - attr_ex->send_ops_flags |= IBV_QP_EX_WITH_RDMA_READ; - if (efa_device_support_rdma_write()) { - attr_ex->send_ops_flags |= IBV_QP_EX_WITH_RDMA_WRITE; - attr_ex->send_ops_flags |= IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM; - } attr_ex->pd = efa_rdm_ep_domain(ep)->ibv_pd; attr_ex->qp_context = ep; attr_ex->sq_sig_all = 1; @@ -564,6 +558,7 @@ int efa_rdm_ep_open(struct fid_domain *domain, struct fi_info *info, efa_rdm_ep->max_atomic_size = info->ep_attr->max_msg_size; efa_rdm_ep->inject_tagged_size = info->tx_attr->inject_size; efa_rdm_ep->inject_atomic_size = info->tx_attr->inject_size; + efa_rdm_ep->base_ep.inject_rma_size = info->tx_attr->inject_size; efa_rdm_ep->efa_max_outstanding_tx_ops = efa_domain->device->rdm_info->tx_attr->size; efa_rdm_ep->efa_max_outstanding_rx_ops = efa_domain->device->rdm_info->rx_attr->size; efa_rdm_ep->use_device_rdma = efa_rdm_get_use_device_rdma(info->fabric_attr->api_version);