Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support FI_OPT_CUDA_API_PERMITTED #10561

Merged
merged 3 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions man/fi_cxi.7.md
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,10 @@ The CXI provider checks for the following environment variables:
: Max amount of time to poll when LE invalidate disabling an MR configured with MR
match events.

*FI_CXI_FORCE_DEV_REG_COPY*
: Force the CXI provider to use the HMEM device register copy routines. If not
supported, RDMA operations or memory registration will fail.

Note: Use the fi_info utility to query provider environment variables:
<code>fi_info -p cxi -e</code>

Expand Down
51 changes: 41 additions & 10 deletions prov/cxi/include/cxip.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ struct cxip_environment {
int hybrid_unexpected_msg_preemptive;
size_t mr_cache_events_disable_poll_nsecs;
size_t mr_cache_events_disable_le_poll_nsecs;
int force_dev_reg_copy;
};

extern struct cxip_environment cxip_env;
Expand Down Expand Up @@ -1963,13 +1964,16 @@ struct cxip_rxc_rnr {
};

static inline void cxip_copy_to_md(struct cxip_md *md, void *dest,
const void *src, size_t size)
const void *src, size_t size,
bool require_dev_reg_copy)
{
ssize_t ret __attribute__((unused));
struct iovec iov;
bool dev_reg_copy = require_dev_reg_copy ||
(md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold);

/* Favor CPU store access instead of relying on HMEM copy functions. */
if (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold) {
/* Favor dev reg access instead of relying on HMEM copy functions. */
if (dev_reg_copy) {
ret = ofi_hmem_dev_reg_copy_to_hmem(md->info.iface, md->handle,
dest, src, size);
assert(ret == FI_SUCCESS);
Expand All @@ -1985,13 +1989,16 @@ static inline void cxip_copy_to_md(struct cxip_md *md, void *dest,
}

static inline void cxip_copy_from_md(struct cxip_md *md, void *dest,
const void *src, size_t size)
const void *src, size_t size,
bool require_dev_reg_copy)
{
ssize_t ret __attribute__((unused));
struct iovec iov;
bool dev_reg_copy = require_dev_reg_copy ||
(md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold);

/* Favor CPU store access instead of relying on HMEM copy functions. */
if (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold) {
/* Favor dev reg access instead of relying on HMEM copy functions. */
if (dev_reg_copy) {
ret = ofi_hmem_dev_reg_copy_from_hmem(md->info.iface,
md->handle,
dest, src, size);
Expand Down Expand Up @@ -2438,6 +2445,9 @@ struct cxip_ep_obj {
struct fi_tx_attr tx_attr;
struct fi_rx_attr rx_attr;

/* Require memcpy's via the dev reg APIs. */
bool require_dev_reg_copy[OFI_HMEM_MAX];

/* Collectives support */
struct cxip_ep_coll_obj coll;
struct cxip_ep_zbcoll_obj zbcoll;
Expand All @@ -2448,6 +2458,25 @@ struct cxip_ep_obj {
struct cxip_portals_table *ptable;
};

int cxip_ep_obj_map(struct cxip_ep_obj *ep, const void *buf, unsigned long len,
uint64_t flags, struct cxip_md **md);

static inline void
cxip_ep_obj_copy_to_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest,
const void *src, size_t size)
{
cxip_copy_to_md(md, dest, src, size,
ep->require_dev_reg_copy[md->info.iface]);
}

static inline void
cxip_ep_obj_copy_from_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest,
const void *src, size_t size)
{
cxip_copy_from_md(md, dest, src, size,
ep->require_dev_reg_copy[md->info.iface]);
}

static inline void cxip_txc_otx_reqs_inc(struct cxip_txc *txc)
{
assert(ofi_genlock_held(&txc->ep_obj->lock) == 1);
Expand Down Expand Up @@ -3641,17 +3670,19 @@ cxip_txc_copy_from_hmem(struct cxip_txc *txc, struct cxip_md *hmem_md,
*/
if (!cxip_env.fork_safe_requested) {
if (!hmem_md) {
ret = cxip_map(domain, hmem_src, size, 0, &hmem_md);
ret = cxip_ep_obj_map(txc->ep_obj, hmem_src, size, 0,
&hmem_md);
if (ret) {
TXC_WARN(txc, "cxip_map failed: %d:%s\n", ret,
fi_strerror(-ret));
TXC_WARN(txc, "cxip_ep_obj_map failed: %d:%s\n",
ret, fi_strerror(-ret));
return ret;
}

unmap_hmem_md = true;
}

cxip_copy_from_md(hmem_md, dest, hmem_src, size);
cxip_ep_obj_copy_from_md(txc->ep_obj, hmem_md, dest, hmem_src,
size);
if (unmap_hmem_md)
cxip_unmap(hmem_md);

Expand Down
15 changes: 9 additions & 6 deletions prov/cxi/src/cxip_atomic.c
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,9 @@ static int cxip_amo_emit_idc(struct cxip_txc *txc,
if (result_mr) {
result_md = result_mr->md;
} else {
ret = cxip_map(dom, result, atomic_type_len, 0,
&req->amo.result_md);
ret = cxip_ep_obj_map(txc->ep_obj, result,
atomic_type_len, 0,
&req->amo.result_md);
if (ret) {
TXC_WARN_RET(txc, ret,
"Failed to map result buffer\n");
Expand Down Expand Up @@ -930,8 +931,9 @@ static int cxip_amo_emit_dma(struct cxip_txc *txc,
/* Optionally register result MR. */
if (result) {
if (!result_mr) {
ret = cxip_map(dom, result, atomic_type_len, 0,
&req->amo.result_md);
ret = cxip_ep_obj_map(txc->ep_obj, result,
atomic_type_len, 0,
&req->amo.result_md);
if (ret) {
TXC_WARN(txc,
"Failed to map result buffer: %d:%s\n",
Expand Down Expand Up @@ -1017,8 +1019,9 @@ static int cxip_amo_emit_dma(struct cxip_txc *txc,
buf_md = buf_mr->md;
} else {
/* Map user operand buffer for DMA command. */
ret = cxip_map(dom, buf, atomic_type_len, 0,
&req->amo.oper1_md);
ret = cxip_ep_obj_map(txc->ep_obj, buf,
atomic_type_len, 0,
&req->amo.oper1_md);
if (ret) {
TXC_WARN(txc,
"Failed to map operand buffer: %d:%s\n",
Expand Down
4 changes: 2 additions & 2 deletions prov/cxi/src/cxip_coll.c
Original file line number Diff line number Diff line change
Expand Up @@ -1246,8 +1246,8 @@ static int _coll_add_buffers(struct cxip_coll_pte *coll_pte, size_t size,
ret = -FI_ENOMEM;
goto out;
}
ret = cxip_map(coll_pte->ep_obj->domain, (void *)buf->buffer,
size, 0, &buf->cxi_md);
ret = cxip_ep_obj_map(coll_pte->ep_obj, (void *)buf->buffer,
size, 0, &buf->cxi_md);
if (ret)
goto del_msg;
buf->bufsiz = size;
Expand Down
60 changes: 60 additions & 0 deletions prov/cxi/src/cxip_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,15 @@ int cxip_ep_getopt_priv(struct cxip_ep *ep, int level, int optname,
*optlen = sizeof(size_t);
break;

case FI_OPT_CUDA_API_PERMITTED:
if (!optval || !optlen)
return -FI_EINVAL;
if (*optlen < sizeof(bool))
return -FI_ETOOSMALL;

*(bool *)optval =
!ep->ep_obj->require_dev_reg_copy[FI_HMEM_CUDA];
break;
default:
return -FI_ENOPROTOOPT;
}
Expand All @@ -1140,6 +1149,7 @@ int cxip_ep_setopt_priv(struct cxip_ep *ep, int level, int optname,
const void *optval, size_t optlen)
{
size_t min_multi_recv;
bool cuda_api_permitted;

if (level != FI_OPT_ENDPOINT)
return -FI_ENOPROTOOPT;
Expand All @@ -1158,6 +1168,30 @@ int cxip_ep_setopt_priv(struct cxip_ep *ep, int level, int optname,
}
ep->ep_obj->rxc->min_multi_recv = min_multi_recv;
break;
/*
* If GDRCopy is required by the application (ie. it has set
* FI_OPT_CUDA_API_PERMITTED), and is not available, return not
* supported.
*/
case FI_OPT_CUDA_API_PERMITTED:
if (optlen != sizeof(bool))
return -FI_EINVAL;

if (!hmem_ops[FI_HMEM_CUDA].initialized) {
CXIP_WARN("FI_OPT_CUDA_API_PERMITTED cannot be set when CUDA library or CUDA device is not available\n");
return -FI_EOPNOTSUPP;
}

cuda_api_permitted = *(bool *)optval;

if (!cuda_api_permitted && !cuda_is_gdrcopy_enabled())
return -FI_EOPNOTSUPP;

if (!cxip_env.force_dev_reg_copy) {
ep->ep_obj->require_dev_reg_copy[FI_HMEM_CUDA] =
!cuda_api_permitted;
}
break;

default:
return -FI_ENOPROTOOPT;
Expand Down Expand Up @@ -1260,6 +1294,12 @@ int cxip_alloc_endpoint(struct cxip_domain *cxip_dom, struct fi_info *hints,
ep_obj->src_addr.pid = pid;
ep_obj->fi_addr = FI_ADDR_NOTAVAIL;

/* Default to allowing non-dev reg copy APIs unless the caller
* disables it.
*/
for (i = 0; i < OFI_HMEM_MAX; i++)
ep_obj->require_dev_reg_copy[i] = cxip_env.force_dev_reg_copy;

ofi_atomic_initialize32(&ep_obj->txq_ref, 0);
ofi_atomic_initialize32(&ep_obj->tgq_ref, 0);

Expand Down Expand Up @@ -1332,6 +1372,26 @@ int cxip_alloc_endpoint(struct cxip_domain *cxip_dom, struct fi_info *hints,
return ret;
}

int cxip_ep_obj_map(struct cxip_ep_obj *ep, const void *buf, unsigned long len,
uint64_t flags, struct cxip_md **md)
{
struct cxip_domain *dom = ep->domain;
int ret;

ret = cxip_map(dom, buf, len, flags, md);
if (ret != FI_SUCCESS)
return ret;

if (ep->require_dev_reg_copy[(*md)->info.iface] &&
!((*md)->handle_valid)) {
CXIP_WARN("Required dev registration copy failed\n");
cxip_unmap(*md);
return -FI_EOPNOTSUPP;
}

return FI_SUCCESS;
}

/*
* cxip_endpoint() - Provider fi_endpoint() implementation.
*/
Expand Down
7 changes: 7 additions & 0 deletions prov/cxi/src/cxip_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,7 @@ struct cxip_environment cxip_env = {
CXIP_MR_CACHE_EVENTS_DISABLE_POLL_NSECS,
.mr_cache_events_disable_le_poll_nsecs =
CXIP_MR_CACHE_EVENTS_DISABLE_LE_POLL_NSECS,
.force_dev_reg_copy = false,
};

static void cxip_env_init(void)
Expand Down Expand Up @@ -1288,6 +1289,12 @@ static void cxip_env_init(void)
fi_param_get_size_t(&cxip_prov, "mr_cache_events_disable_le_poll_nsecs",
&cxip_env.mr_cache_events_disable_le_poll_nsecs);

fi_param_define(&cxip_prov, "force_dev_reg_copy", FI_PARAM_BOOL,
"Force device register copy operations. Default: %d",
cxip_env.force_dev_reg_copy);
fi_param_get_bool(&cxip_prov, "force_dev_reg_copy",
&cxip_env.force_dev_reg_copy);

set_system_page_size();
}

Expand Down
13 changes: 13 additions & 0 deletions prov/cxi/src/cxip_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,15 @@ static int cxip_mr_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
break;
}

/* Zero length MRs do not have MD. */
if (mr->md &&
ep->ep_obj->require_dev_reg_copy[mr->md->info.iface] &&
!mr->md->handle_valid) {
CXIP_WARN("Cannot bind to endpoint without required dev reg support\n");
ret = -FI_EOPNOTSUPP;
break;
}

mr->ep = ep;
ofi_atomic_inc32(&ep->ep_obj->ref);
break;
Expand Down Expand Up @@ -1439,6 +1448,10 @@ static int cxip_regattr(struct fid *fid, const struct fi_mr_attr *attr,
_mr->mr_fid.key = _mr->key;

if (_mr->len) {
/* Do not check whether cuda_api_permitted is set at this point,
* because the mr is not bound to an endpoint. Check instead in
* cxip_mr_bind().
*/
ret = cxip_map(_mr->domain, (void *)_mr->buf, _mr->len, 0,
&_mr->md);
if (ret) {
Expand Down
12 changes: 6 additions & 6 deletions prov/cxi/src/cxip_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len,
int (*recv_cb)(struct cxip_req *req,
const union c_event *event))
{
struct cxip_domain *dom = rxc->domain;
struct cxip_req *req;
struct cxip_md *recv_md = NULL;
int ret;
Expand All @@ -79,7 +78,8 @@ int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len,
if (len) {
/* If hybrid descriptor not passed, map for dma */
if (!md) {
ret = cxip_map(dom, (void *)buf, len, 0, &recv_md);
ret = cxip_ep_obj_map(rxc->ep_obj, (void *)buf, len, 0,
&recv_md);
if (ret) {
RXC_WARN(rxc,
"Map of recv buffer failed: %d, %s\n",
Expand Down Expand Up @@ -718,8 +718,8 @@ int cxip_send_buf_init(struct cxip_req *req)

/* Triggered operation always requires memory registration. */
if (req->triggered)
return cxip_map(txc->domain, req->send.buf, req->send.len, 0,
&req->send.send_md);
return cxip_ep_obj_map(txc->ep_obj, req->send.buf,
req->send.len, 0, &req->send.send_md);

/* FI_INJECT operations always require an internal bounce buffer. This
* is needed to replay FI_INJECT operations which may experience flow
Expand Down Expand Up @@ -777,8 +777,8 @@ int cxip_send_buf_init(struct cxip_req *req)
}

/* Everything else requires memory registeration. */
return cxip_map(txc->domain, req->send.buf, req->send.len, 0,
&req->send.send_md);
return cxip_ep_obj_map(txc->ep_obj, req->send.buf, req->send.len, 0,
&req->send.send_md);

err_buf_fini:
cxip_send_buf_fini(req);
Expand Down
5 changes: 3 additions & 2 deletions prov/cxi/src/cxip_msg_hpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,8 +629,9 @@ static int cxip_ux_send(struct cxip_req *match_req, struct cxip_req *oflow_req,

/* Copy data out of overflow buffer. */
oflow_bytes = MIN(put_event->tgt_long.mlength, match_req->data_len);
cxip_copy_to_md(match_req->recv.recv_md, match_req->recv.recv_buf,
oflow_va, oflow_bytes);
cxip_ep_obj_copy_to_md(match_req->recv.rxc->ep_obj,
match_req->recv.recv_md,
match_req->recv.recv_buf, oflow_va, oflow_bytes);

if (oflow_req->type == CXIP_REQ_OFLOW)
oflow_req_put_bytes(oflow_req, put_event->tgt_long.mlength);
Expand Down
6 changes: 3 additions & 3 deletions prov/cxi/src/cxip_msg_rnr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1174,9 +1174,9 @@ cxip_send_common(struct cxip_txc *txc, uint32_t tclass, const void *buf,

if (send_req->send.len && !idc) {
if (!mr) {
ret = cxip_map(txc->domain, send_req->send.buf,
send_req->send.len, 0,
&send_req->send.send_md);
ret = cxip_ep_obj_map(txc->ep_obj, send_req->send.buf,
send_req->send.len, 0,
&send_req->send.send_md);
if (ret) {
TXC_WARN(txc,
"Local buffer map failed: %d %s\n",
Expand Down
4 changes: 2 additions & 2 deletions prov/cxi/src/cxip_ptelist_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ cxip_ptelist_buf_alloc(struct cxip_ptelist_bufpool *pool)
}
}

ret = cxip_map(rxc->base.domain, buf->data, pool->attr.buf_size,
OFI_MR_NOCACHE, &buf->md);
ret = cxip_ep_obj_map(rxc->base.ep_obj, buf->data, pool->attr.buf_size,
OFI_MR_NOCACHE, &buf->md);
if (ret)
goto err_unreg_buf;

Expand Down
3 changes: 2 additions & 1 deletion prov/cxi/src/cxip_rma.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ static int cxip_rma_emit_dma(struct cxip_txc *txc, const void *buf, size_t len,
} else {
assert(req != NULL);

ret = cxip_map(dom, buf, len, 0, &req->rma.local_md);
ret = cxip_ep_obj_map(txc->ep_obj, buf, len, 0,
&req->rma.local_md);
if (ret) {
TXC_WARN(txc, "Failed to map buffer: %d:%s\n",
ret, fi_strerror(-ret));
Expand Down
Loading