From 5e4ebe18a1f3d940b03209465e8f0121e5c8232b Mon Sep 17 00:00:00 2001 From: Ryan Hankins Date: Thu, 14 Nov 2024 19:48:00 +0000 Subject: [PATCH 1/3] prov/cxi: Support FI_OPT_CUDA_API_PERMITTED If GDRCopy is required by the application (ie. it has set FI_OPT_CUDA_API_PERMITTED), and is not available, return not supported, eliminating deadlocks due to calls to cudaMemcpy interacting with CUDA applications. Signed-off-by: Ian Ziemba --- prov/cxi/include/cxip.h | 50 ++++++++++++++++++++++------ prov/cxi/src/cxip_atomic.c | 15 +++++---- prov/cxi/src/cxip_coll.c | 4 +-- prov/cxi/src/cxip_ep.c | 58 +++++++++++++++++++++++++++++++++ prov/cxi/src/cxip_mr.c | 13 ++++++++ prov/cxi/src/cxip_msg.c | 12 +++---- prov/cxi/src/cxip_msg_hpc.c | 5 +-- prov/cxi/src/cxip_msg_rnr.c | 6 ++-- prov/cxi/src/cxip_ptelist_buf.c | 4 +-- prov/cxi/src/cxip_rma.c | 3 +- prov/cxi/src/cxip_txc.c | 4 +-- 11 files changed, 140 insertions(+), 34 deletions(-) diff --git a/prov/cxi/include/cxip.h b/prov/cxi/include/cxip.h index 61fd43be6b2..4494b9b703a 100644 --- a/prov/cxi/include/cxip.h +++ b/prov/cxi/include/cxip.h @@ -1963,13 +1963,16 @@ struct cxip_rxc_rnr { }; static inline void cxip_copy_to_md(struct cxip_md *md, void *dest, - const void *src, size_t size) + const void *src, size_t size, + bool require_dev_reg_copy) { ssize_t ret __attribute__((unused)); struct iovec iov; + bool dev_reg_copy = require_dev_reg_copy || + (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold); - /* Favor CPU store access instead of relying on HMEM copy functions. */ - if (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold) { + /* Favor dev reg access instead of relying on HMEM copy functions. */ + if (dev_reg_copy) { ret = ofi_hmem_dev_reg_copy_to_hmem(md->info.iface, md->handle, dest, src, size); assert(ret == FI_SUCCESS); @@ -1985,13 +1988,16 @@ static inline void cxip_copy_to_md(struct cxip_md *md, void *dest, } static inline void cxip_copy_from_md(struct cxip_md *md, void *dest, - const void *src, size_t size) + const void *src, size_t size, + bool require_dev_reg_copy) { ssize_t ret __attribute__((unused)); struct iovec iov; + bool dev_reg_copy = require_dev_reg_copy || + (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold); - /* Favor CPU store access instead of relying on HMEM copy functions. */ - if (md->handle_valid && size <= cxip_env.safe_devmem_copy_threshold) { + /* Favor dev reg access instead of relying on HMEM copy functions. */ + if (dev_reg_copy) { ret = ofi_hmem_dev_reg_copy_from_hmem(md->info.iface, md->handle, dest, src, size); @@ -2438,6 +2444,9 @@ struct cxip_ep_obj { struct fi_tx_attr tx_attr; struct fi_rx_attr rx_attr; + /* Require memcpy's via the dev reg APIs. */ + bool require_dev_reg_copy[OFI_HMEM_MAX]; + /* Collectives support */ struct cxip_ep_coll_obj coll; struct cxip_ep_zbcoll_obj zbcoll; @@ -2448,6 +2457,25 @@ struct cxip_ep_obj { struct cxip_portals_table *ptable; }; +int cxip_ep_obj_map(struct cxip_ep_obj *ep, const void *buf, unsigned long len, + uint64_t flags, struct cxip_md **md); + +static inline void +cxip_ep_obj_copy_to_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest, + const void *src, size_t size) +{ + cxip_copy_to_md(md, dest, src, size, + ep->require_dev_reg_copy[md->info.iface]); +} + +static inline void +cxip_ep_obj_copy_from_md(struct cxip_ep_obj *ep, struct cxip_md *md, void *dest, + const void *src, size_t size) +{ + cxip_copy_from_md(md, dest, src, size, + ep->require_dev_reg_copy[md->info.iface]); +} + static inline void cxip_txc_otx_reqs_inc(struct cxip_txc *txc) { assert(ofi_genlock_held(&txc->ep_obj->lock) == 1); @@ -3641,17 +3669,19 @@ cxip_txc_copy_from_hmem(struct cxip_txc *txc, struct cxip_md *hmem_md, */ if (!cxip_env.fork_safe_requested) { if (!hmem_md) { - ret = cxip_map(domain, hmem_src, size, 0, &hmem_md); + ret = cxip_ep_obj_map(txc->ep_obj, hmem_src, size, 0, + &hmem_md); if (ret) { - TXC_WARN(txc, "cxip_map failed: %d:%s\n", ret, - fi_strerror(-ret)); + TXC_WARN(txc, "cxip_ep_obj_map failed: %d:%s\n", + ret, fi_strerror(-ret)); return ret; } unmap_hmem_md = true; } - cxip_copy_from_md(hmem_md, dest, hmem_src, size); + cxip_ep_obj_copy_from_md(txc->ep_obj, hmem_md, dest, hmem_src, + size); if (unmap_hmem_md) cxip_unmap(hmem_md); diff --git a/prov/cxi/src/cxip_atomic.c b/prov/cxi/src/cxip_atomic.c index 49218a324a3..0b8f0d4867b 100644 --- a/prov/cxi/src/cxip_atomic.c +++ b/prov/cxi/src/cxip_atomic.c @@ -612,8 +612,9 @@ static int cxip_amo_emit_idc(struct cxip_txc *txc, if (result_mr) { result_md = result_mr->md; } else { - ret = cxip_map(dom, result, atomic_type_len, 0, - &req->amo.result_md); + ret = cxip_ep_obj_map(txc->ep_obj, result, + atomic_type_len, 0, + &req->amo.result_md); if (ret) { TXC_WARN_RET(txc, ret, "Failed to map result buffer\n"); @@ -930,8 +931,9 @@ static int cxip_amo_emit_dma(struct cxip_txc *txc, /* Optionally register result MR. */ if (result) { if (!result_mr) { - ret = cxip_map(dom, result, atomic_type_len, 0, - &req->amo.result_md); + ret = cxip_ep_obj_map(txc->ep_obj, result, + atomic_type_len, 0, + &req->amo.result_md); if (ret) { TXC_WARN(txc, "Failed to map result buffer: %d:%s\n", @@ -1017,8 +1019,9 @@ static int cxip_amo_emit_dma(struct cxip_txc *txc, buf_md = buf_mr->md; } else { /* Map user operand buffer for DMA command. */ - ret = cxip_map(dom, buf, atomic_type_len, 0, - &req->amo.oper1_md); + ret = cxip_ep_obj_map(txc->ep_obj, buf, + atomic_type_len, 0, + &req->amo.oper1_md); if (ret) { TXC_WARN(txc, "Failed to map operand buffer: %d:%s\n", diff --git a/prov/cxi/src/cxip_coll.c b/prov/cxi/src/cxip_coll.c index 9d9c6d73316..8d503c1c7b0 100644 --- a/prov/cxi/src/cxip_coll.c +++ b/prov/cxi/src/cxip_coll.c @@ -1246,8 +1246,8 @@ static int _coll_add_buffers(struct cxip_coll_pte *coll_pte, size_t size, ret = -FI_ENOMEM; goto out; } - ret = cxip_map(coll_pte->ep_obj->domain, (void *)buf->buffer, - size, 0, &buf->cxi_md); + ret = cxip_ep_obj_map(coll_pte->ep_obj, (void *)buf->buffer, + size, 0, &buf->cxi_md); if (ret) goto del_msg; buf->bufsiz = size; diff --git a/prov/cxi/src/cxip_ep.c b/prov/cxi/src/cxip_ep.c index 7be36c0d56d..bc5cc9ead2e 100644 --- a/prov/cxi/src/cxip_ep.c +++ b/prov/cxi/src/cxip_ep.c @@ -1118,6 +1118,15 @@ int cxip_ep_getopt_priv(struct cxip_ep *ep, int level, int optname, *optlen = sizeof(size_t); break; + case FI_OPT_CUDA_API_PERMITTED: + if (!optval || !optlen) + return -FI_EINVAL; + if (*optlen < sizeof(bool)) + return -FI_ETOOSMALL; + + *(bool *)optval = + !ep->ep_obj->require_dev_reg_copy[FI_HMEM_CUDA]; + break; default: return -FI_ENOPROTOOPT; } @@ -1140,6 +1149,7 @@ int cxip_ep_setopt_priv(struct cxip_ep *ep, int level, int optname, const void *optval, size_t optlen) { size_t min_multi_recv; + bool cuda_api_permitted; if (level != FI_OPT_ENDPOINT) return -FI_ENOPROTOOPT; @@ -1158,6 +1168,28 @@ int cxip_ep_setopt_priv(struct cxip_ep *ep, int level, int optname, } ep->ep_obj->rxc->min_multi_recv = min_multi_recv; break; + /* + * If GDRCopy is required by the application (ie. it has set + * FI_OPT_CUDA_API_PERMITTED), and is not available, return not + * supported. + */ + case FI_OPT_CUDA_API_PERMITTED: + if (optlen != sizeof(bool)) + return -FI_EINVAL; + + if (!hmem_ops[FI_HMEM_CUDA].initialized) { + CXIP_WARN("FI_OPT_CUDA_API_PERMITTED cannot be set when CUDA library or CUDA device is not available\n"); + return -FI_EOPNOTSUPP; + } + + cuda_api_permitted = *(bool *)optval; + + if (!cuda_api_permitted && !cuda_is_gdrcopy_enabled()) + return -FI_EOPNOTSUPP; + + ep->ep_obj->require_dev_reg_copy[FI_HMEM_CUDA] = + !cuda_api_permitted; + break; default: return -FI_ENOPROTOOPT; @@ -1260,6 +1292,12 @@ int cxip_alloc_endpoint(struct cxip_domain *cxip_dom, struct fi_info *hints, ep_obj->src_addr.pid = pid; ep_obj->fi_addr = FI_ADDR_NOTAVAIL; + /* Default to allowing non-dev reg copy APIs unless the caller + * disables it. + */ + for (i = 0; i < OFI_HMEM_MAX; i++) + ep_obj->require_dev_reg_copy[i] = false; + ofi_atomic_initialize32(&ep_obj->txq_ref, 0); ofi_atomic_initialize32(&ep_obj->tgq_ref, 0); @@ -1332,6 +1370,26 @@ int cxip_alloc_endpoint(struct cxip_domain *cxip_dom, struct fi_info *hints, return ret; } +int cxip_ep_obj_map(struct cxip_ep_obj *ep, const void *buf, unsigned long len, + uint64_t flags, struct cxip_md **md) +{ + struct cxip_domain *dom = ep->domain; + int ret; + + ret = cxip_map(dom, buf, len, flags, md); + if (ret != FI_SUCCESS) + return ret; + + if (ep->require_dev_reg_copy[(*md)->info.iface] && + !((*md)->handle_valid)) { + CXIP_WARN("Required dev registration copy failed\n"); + cxip_unmap(*md); + return -FI_EOPNOTSUPP; + } + + return FI_SUCCESS; +} + /* * cxip_endpoint() - Provider fi_endpoint() implementation. */ diff --git a/prov/cxi/src/cxip_mr.c b/prov/cxi/src/cxip_mr.c index b52e6d22d1a..34d8ead3576 100644 --- a/prov/cxi/src/cxip_mr.c +++ b/prov/cxi/src/cxip_mr.c @@ -1283,6 +1283,15 @@ static int cxip_mr_bind(struct fid *fid, struct fid *bfid, uint64_t flags) break; } + /* Zero length MRs do not have MD. */ + if (mr->md && + ep->ep_obj->require_dev_reg_copy[mr->md->info.iface] && + !mr->md->handle_valid) { + CXIP_WARN("Cannot bind to endpoint without required dev reg support\n"); + ret = -FI_EOPNOTSUPP; + break; + } + mr->ep = ep; ofi_atomic_inc32(&ep->ep_obj->ref); break; @@ -1439,6 +1448,10 @@ static int cxip_regattr(struct fid *fid, const struct fi_mr_attr *attr, _mr->mr_fid.key = _mr->key; if (_mr->len) { + /* Do not check whether cuda_api_permitted is set at this point, + * because the mr is not bound to an endpoint. Check instead in + * cxip_mr_bind(). + */ ret = cxip_map(_mr->domain, (void *)_mr->buf, _mr->len, 0, &_mr->md); if (ret) { diff --git a/prov/cxi/src/cxip_msg.c b/prov/cxi/src/cxip_msg.c index ef8356943c2..a8309847802 100644 --- a/prov/cxi/src/cxip_msg.c +++ b/prov/cxi/src/cxip_msg.c @@ -60,7 +60,6 @@ int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len, int (*recv_cb)(struct cxip_req *req, const union c_event *event)) { - struct cxip_domain *dom = rxc->domain; struct cxip_req *req; struct cxip_md *recv_md = NULL; int ret; @@ -79,7 +78,8 @@ int cxip_recv_req_alloc(struct cxip_rxc *rxc, void *buf, size_t len, if (len) { /* If hybrid descriptor not passed, map for dma */ if (!md) { - ret = cxip_map(dom, (void *)buf, len, 0, &recv_md); + ret = cxip_ep_obj_map(rxc->ep_obj, (void *)buf, len, 0, + &recv_md); if (ret) { RXC_WARN(rxc, "Map of recv buffer failed: %d, %s\n", @@ -718,8 +718,8 @@ int cxip_send_buf_init(struct cxip_req *req) /* Triggered operation always requires memory registration. */ if (req->triggered) - return cxip_map(txc->domain, req->send.buf, req->send.len, 0, - &req->send.send_md); + return cxip_ep_obj_map(txc->ep_obj, req->send.buf, + req->send.len, 0, &req->send.send_md); /* FI_INJECT operations always require an internal bounce buffer. This * is needed to replay FI_INJECT operations which may experience flow @@ -777,8 +777,8 @@ int cxip_send_buf_init(struct cxip_req *req) } /* Everything else requires memory registeration. */ - return cxip_map(txc->domain, req->send.buf, req->send.len, 0, - &req->send.send_md); + return cxip_ep_obj_map(txc->ep_obj, req->send.buf, req->send.len, 0, + &req->send.send_md); err_buf_fini: cxip_send_buf_fini(req); diff --git a/prov/cxi/src/cxip_msg_hpc.c b/prov/cxi/src/cxip_msg_hpc.c index c6e0bcc35fd..4980a3fd3b0 100644 --- a/prov/cxi/src/cxip_msg_hpc.c +++ b/prov/cxi/src/cxip_msg_hpc.c @@ -629,8 +629,9 @@ static int cxip_ux_send(struct cxip_req *match_req, struct cxip_req *oflow_req, /* Copy data out of overflow buffer. */ oflow_bytes = MIN(put_event->tgt_long.mlength, match_req->data_len); - cxip_copy_to_md(match_req->recv.recv_md, match_req->recv.recv_buf, - oflow_va, oflow_bytes); + cxip_ep_obj_copy_to_md(match_req->recv.rxc->ep_obj, + match_req->recv.recv_md, + match_req->recv.recv_buf, oflow_va, oflow_bytes); if (oflow_req->type == CXIP_REQ_OFLOW) oflow_req_put_bytes(oflow_req, put_event->tgt_long.mlength); diff --git a/prov/cxi/src/cxip_msg_rnr.c b/prov/cxi/src/cxip_msg_rnr.c index ec5064a4fe5..7b4415ea1e8 100644 --- a/prov/cxi/src/cxip_msg_rnr.c +++ b/prov/cxi/src/cxip_msg_rnr.c @@ -1174,9 +1174,9 @@ cxip_send_common(struct cxip_txc *txc, uint32_t tclass, const void *buf, if (send_req->send.len && !idc) { if (!mr) { - ret = cxip_map(txc->domain, send_req->send.buf, - send_req->send.len, 0, - &send_req->send.send_md); + ret = cxip_ep_obj_map(txc->ep_obj, send_req->send.buf, + send_req->send.len, 0, + &send_req->send.send_md); if (ret) { TXC_WARN(txc, "Local buffer map failed: %d %s\n", diff --git a/prov/cxi/src/cxip_ptelist_buf.c b/prov/cxi/src/cxip_ptelist_buf.c index b8ee08a3733..a313ccf0be4 100644 --- a/prov/cxi/src/cxip_ptelist_buf.c +++ b/prov/cxi/src/cxip_ptelist_buf.c @@ -132,8 +132,8 @@ cxip_ptelist_buf_alloc(struct cxip_ptelist_bufpool *pool) } } - ret = cxip_map(rxc->base.domain, buf->data, pool->attr.buf_size, - OFI_MR_NOCACHE, &buf->md); + ret = cxip_ep_obj_map(rxc->base.ep_obj, buf->data, pool->attr.buf_size, + OFI_MR_NOCACHE, &buf->md); if (ret) goto err_unreg_buf; diff --git a/prov/cxi/src/cxip_rma.c b/prov/cxi/src/cxip_rma.c index 9aa1ace679f..660c29862de 100644 --- a/prov/cxi/src/cxip_rma.c +++ b/prov/cxi/src/cxip_rma.c @@ -269,7 +269,8 @@ static int cxip_rma_emit_dma(struct cxip_txc *txc, const void *buf, size_t len, } else { assert(req != NULL); - ret = cxip_map(dom, buf, len, 0, &req->rma.local_md); + ret = cxip_ep_obj_map(txc->ep_obj, buf, len, 0, + &req->rma.local_md); if (ret) { TXC_WARN(txc, "Failed to map buffer: %d:%s\n", ret, fi_strerror(-ret)); diff --git a/prov/cxi/src/cxip_txc.c b/prov/cxi/src/cxip_txc.c index 94bc470ba68..fdbd64af604 100644 --- a/prov/cxi/src/cxip_txc.c +++ b/prov/cxi/src/cxip_txc.c @@ -63,8 +63,8 @@ int cxip_ibuf_chunk_init(struct ofi_bufpool_region *region) struct cxip_md *md; int ret; - ret = cxip_map(txc->domain, region->mem_region, - region->pool->region_size, OFI_MR_NOCACHE, &md); + ret = cxip_ep_obj_map(txc->ep_obj, region->mem_region, + region->pool->region_size, OFI_MR_NOCACHE, &md); if (ret != FI_SUCCESS) { CXIP_WARN("Failed to map inject buffer chunk\n"); return ret; From edefae5e6a9339b44bb39de4b19793e0a8be5ecf Mon Sep 17 00:00:00 2001 From: Ian Ziemba Date: Tue, 19 Nov 2024 12:04:26 -0600 Subject: [PATCH 2/3] prov/cxi: Define FI_CXI_FORCE_DEV_REG_COPY FI_CXI_FORCE_DEV_REG_COPY will force the CXI provider to use the HMEM device register copy routines. If not supported, RDMA operations or memory registration will fail. Signed-off-by: Ian Ziemba --- man/fi_cxi.7.md | 4 ++++ prov/cxi/include/cxip.h | 1 + prov/cxi/src/cxip_ep.c | 8 +++++--- prov/cxi/src/cxip_info.c | 7 +++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/man/fi_cxi.7.md b/man/fi_cxi.7.md index 384026f0192..4c6e46d14cc 100644 --- a/man/fi_cxi.7.md +++ b/man/fi_cxi.7.md @@ -1301,6 +1301,10 @@ The CXI provider checks for the following environment variables: : Max amount of time to poll when LE invalidate disabling an MR configured with MR match events. +*FI_CXI_FORCE_DEV_REG_COPY* +: Force the CXI provider to use the HMEM device register copy routines. If not + supported, RDMA operations or memory registration will fail. + Note: Use the fi_info utility to query provider environment variables: fi_info -p cxi -e diff --git a/prov/cxi/include/cxip.h b/prov/cxi/include/cxip.h index 4494b9b703a..68ea4c0ce7e 100644 --- a/prov/cxi/include/cxip.h +++ b/prov/cxi/include/cxip.h @@ -324,6 +324,7 @@ struct cxip_environment { int hybrid_unexpected_msg_preemptive; size_t mr_cache_events_disable_poll_nsecs; size_t mr_cache_events_disable_le_poll_nsecs; + int force_dev_reg_copy; }; extern struct cxip_environment cxip_env; diff --git a/prov/cxi/src/cxip_ep.c b/prov/cxi/src/cxip_ep.c index bc5cc9ead2e..aebec245ef7 100644 --- a/prov/cxi/src/cxip_ep.c +++ b/prov/cxi/src/cxip_ep.c @@ -1187,8 +1187,10 @@ int cxip_ep_setopt_priv(struct cxip_ep *ep, int level, int optname, if (!cuda_api_permitted && !cuda_is_gdrcopy_enabled()) return -FI_EOPNOTSUPP; - ep->ep_obj->require_dev_reg_copy[FI_HMEM_CUDA] = - !cuda_api_permitted; + if (!cxip_env.force_dev_reg_copy) { + ep->ep_obj->require_dev_reg_copy[FI_HMEM_CUDA] = + !cuda_api_permitted; + } break; default: @@ -1296,7 +1298,7 @@ int cxip_alloc_endpoint(struct cxip_domain *cxip_dom, struct fi_info *hints, * disables it. */ for (i = 0; i < OFI_HMEM_MAX; i++) - ep_obj->require_dev_reg_copy[i] = false; + ep_obj->require_dev_reg_copy[i] = cxip_env.force_dev_reg_copy; ofi_atomic_initialize32(&ep_obj->txq_ref, 0); ofi_atomic_initialize32(&ep_obj->tgq_ref, 0); diff --git a/prov/cxi/src/cxip_info.c b/prov/cxi/src/cxip_info.c index af94964ab5f..f0da25e315e 100644 --- a/prov/cxi/src/cxip_info.c +++ b/prov/cxi/src/cxip_info.c @@ -670,6 +670,7 @@ struct cxip_environment cxip_env = { CXIP_MR_CACHE_EVENTS_DISABLE_POLL_NSECS, .mr_cache_events_disable_le_poll_nsecs = CXIP_MR_CACHE_EVENTS_DISABLE_LE_POLL_NSECS, + .force_dev_reg_copy = false, }; static void cxip_env_init(void) @@ -1288,6 +1289,12 @@ static void cxip_env_init(void) fi_param_get_size_t(&cxip_prov, "mr_cache_events_disable_le_poll_nsecs", &cxip_env.mr_cache_events_disable_le_poll_nsecs); + fi_param_define(&cxip_prov, "force_dev_reg_copy", FI_PARAM_BOOL, + "Force device register copy operations. Default: %d", + cxip_env.force_dev_reg_copy); + fi_param_get_bool(&cxip_prov, "force_dev_reg_copy", + &cxip_env.force_dev_reg_copy); + set_system_page_size(); } From 75fbfd2f63970220002a020d271ca8511a40c16c Mon Sep 17 00:00:00 2001 From: Ian Ziemba Date: Tue, 19 Nov 2024 17:30:00 -0600 Subject: [PATCH 3/3] prov/cxi: Add FI_OPT_CUDA_API_PERMITTED tests Signed-off-by: Ian Ziemba --- prov/cxi/test/cuda.c | 149 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/prov/cxi/test/cuda.c b/prov/cxi/test/cuda.c index 53338f60fd6..4776556635a 100644 --- a/prov/cxi/test/cuda.c +++ b/prov/cxi/test/cuda.c @@ -431,3 +431,152 @@ Test(cuda, large_transfer) { cuda_dev_memory_test(LARGE_XFER, 2, false, true); } + +static void verify_dev_reg_eopnotsupp_local_op(void) +{ + void *buf; + cudaError_t cuda_ret; + size_t buf_size = 1024; + int ret; + + cuda_ret = cudaMalloc(&buf, buf_size); + cr_assert_eq(cuda_ret, cudaSuccess, "cudaMalloc failed: %d", cuda_ret); + + ret = fi_recv(cxit_ep, buf, buf_size, NULL, cxit_ep_fi_addr, NULL); + cr_assert_eq(ret, -FI_EOPNOTSUPP, "fi_recv failed: %d", ret); + + cuda_ret = cudaFree(buf); + cr_assert_eq(cuda_ret, cudaSuccess, "cudaFree failed: %d", cuda_ret); +} + +static void verify_dev_reg_eopnotsupp_remote_mr(void) +{ + int ret; + void *buf; + cudaError_t cuda_ret; + size_t buf_size = 1024; + struct fid_mr *fid_mr; + + cuda_ret = cudaMalloc(&buf, buf_size); + cr_assert_eq(cuda_ret, cudaSuccess, "cudaMalloc failed: %d", cuda_ret); + + ret = fi_mr_reg(cxit_domain, buf, buf_size, FI_READ, 0, 0x123, 0, + &fid_mr, NULL); + cr_assert_eq(ret, FI_SUCCESS, "fi_mr_reg failed: %d", ret); + + ret = fi_mr_bind(fid_mr, &(cxit_ep->fid), 0); + cr_assert_eq(ret, -FI_EOPNOTSUPP, "fi_mr_bind failed: %d", ret); + + ret = fi_close(&fid_mr->fid); + cr_assert_eq(ret, FI_SUCCESS, "fi_close MR failed: %d", ret); + + cuda_ret = cudaFree(buf); + cr_assert_eq(cuda_ret, cudaSuccess, "cudaFree failed: %d", cuda_ret); +} + +Test(cuda, verify_fi_opt_cuda_api_permitted_local_operation) +{ + int ret; + bool optval = false; + + ret = setenv("FI_CXI_DISABLE_HMEM_DEV_REGISTER", "1", 1); + cr_assert_eq(ret, 0, "setenv failed: %d", -errno); + + cxit_setup_msg(); + + ret = fi_setopt(&(cxit_ep->fid), FI_OPT_ENDPOINT, + FI_OPT_CUDA_API_PERMITTED, &optval, sizeof(optval)); + assert(ret == FI_SUCCESS); + + verify_dev_reg_eopnotsupp_local_op(); + + cxit_teardown_msg(); +} + +Test(cuda, verify_fi_opt_cuda_api_permitted_remote_mr) +{ + int ret; + bool optval = false; + + ret = setenv("FI_CXI_DISABLE_HMEM_DEV_REGISTER", "1", 1); + cr_assert_eq(ret, 0, "setenv failed: %d", -errno); + + cxit_setup_msg(); + + ret = fi_setopt(&(cxit_ep->fid), FI_OPT_ENDPOINT, + FI_OPT_CUDA_API_PERMITTED, &optval, sizeof(optval)); + assert(ret == FI_SUCCESS); + + verify_dev_reg_eopnotsupp_remote_mr(); + + cxit_teardown_msg(); +} + +Test(cuda, verify_get_fi_opt_cuda_api_permitted) +{ + int ret; + bool optval = false; + size_t size = sizeof(optval); + + ret = setenv("FI_CXI_DISABLE_HMEM_DEV_REGISTER", "1", 1); + cr_assert_eq(ret, 0, "setenv failed: %d", -errno); + + cxit_setup_msg(); + + ret = fi_setopt(&(cxit_ep->fid), FI_OPT_ENDPOINT, + FI_OPT_CUDA_API_PERMITTED, &optval, sizeof(optval)); + assert(ret == FI_SUCCESS); + + optval = true; + + ret = fi_getopt(&(cxit_ep->fid), FI_OPT_ENDPOINT, + FI_OPT_CUDA_API_PERMITTED, &optval, &size); + assert(ret == FI_SUCCESS); + + assert(optval == false); + + cxit_teardown_msg(); +} + +Test(cuda, verify_force_dev_reg_local) +{ + int ret; + + ret = setenv("FI_CXI_DISABLE_HMEM_DEV_REGISTER", "1", 1); + cr_assert_eq(ret, 0, "setenv failed: %d", -errno); + + ret = setenv("FI_CXI_FORCE_DEV_REG_COPY", "1", 1); + cr_assert_eq(ret, 0, "setenv failed: %d", -errno); + + cxit_setup_getinfo(); + + cxit_tx_cq_attr.format = FI_CQ_FORMAT_TAGGED; + cxit_av_attr.type = FI_AV_TABLE; + + cxit_fi_hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; + cxit_fi_hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; + + cxit_fi_hints->tx_attr->size = 512; + + cxit_setup_ep(); + + /* Set up RMA objects */ + cxit_create_ep(); + cxit_create_cqs(); + cxit_bind_cqs(); + cxit_create_cntrs(); + cxit_bind_cntrs(); + cxit_create_av(); + cxit_bind_av(); + + ret = fi_enable(cxit_ep); + cr_assert(ret != FI_SUCCESS, "ret is: %d\n", ret); + + /* Tear down RMA objects */ + cxit_destroy_ep(); /* EP must be destroyed before bound objects */ + + cxit_destroy_av(); + cxit_destroy_cntrs(); + cxit_destroy_cqs(); + cxit_teardown_ep(); +}