diff --git a/src/nexus_impl/nexus.cc b/src/nexus_impl/nexus.cc index 5e46ef86..192cd1cc 100644 --- a/src/nexus_impl/nexus.cc +++ b/src/nexus_impl/nexus.cc @@ -24,7 +24,7 @@ Nexus::Nexus(std::string local_uri, size_t numa_node, size_t num_bg_threads) sm_udp_port < (kBaseSmUdpPort + kMaxNumERpcProcesses), "Invalid management UDP port"); rt_assert(num_bg_threads <= kMaxBgThreads, "Too many background threads"); - rt_assert(numa_node < kMaxNumaNodes, "Invalid NUMA node"); + rt_assert(numa_node < kMaxNumaNodes || numa_node == kNoNumaNode, "Invalid NUMA node"); kill_switch = false; diff --git a/src/rpc_constants.h b/src/rpc_constants.h index c7e4edfc..21415567 100644 --- a/src/rpc_constants.h +++ b/src/rpc_constants.h @@ -37,6 +37,13 @@ static_assert(kBaseSmUdpPort + kMaxNumERpcProcesses + * @brief Maximum number of NUMA nodes per machine */ static constexpr size_t kMaxNumaNodes = 8; +/** + * @relates Rpc + * @brief Identifier for using heap memory instead of NUMA memory. + */ +static constexpr size_t kNoNumaNode = UINT8_MAX - 1; + + /** * @relates Rpc diff --git a/src/rpc_impl/rpc.cc b/src/rpc_impl/rpc.cc index e782f321..89b04247 100644 --- a/src/rpc_impl/rpc.cc +++ b/src/rpc_impl/rpc.cc @@ -29,7 +29,7 @@ Rpc::Rpc(Nexus *nexus, void *context, uint8_t rpc_id, rt_assert(rpc_id != kInvalidRpcId, "Invalid Rpc ID"); rt_assert(!nexus->rpc_id_exists(rpc_id), "Rpc ID already exists"); rt_assert(phy_port < kMaxPhyPorts, "Invalid physical port"); - rt_assert(numa_node < kMaxNumaNodes, "Invalid NUMA node"); + rt_assert(numa_node < kMaxNumaNodes || numa_node == kNoNumaNode, "Invalid NUMA node"); tls_registry = &nexus->tls_registry; tls_registry->init(); // Initialize thread-local variables for this thread diff --git a/src/util/huge_alloc.cc b/src/util/huge_alloc.cc index f5d4a5d2..1b943727 100644 --- a/src/util/huge_alloc.cc +++ b/src/util/huge_alloc.cc @@ -10,7 +10,7 @@ HugeAlloc::HugeAlloc(size_t initial_size, size_t numa_node, : numa_node(numa_node), reg_mr_func(reg_mr_func), dereg_mr_func(dereg_mr_func) { - assert(numa_node <= kMaxNumaNodes); + assert(numa_node <= kMaxNumaNodes || numa_node == kNoNumaNode); if (initial_size < kMaxClassSize) initial_size = kMaxClassSize; prev_allocation_size = initial_size; @@ -20,6 +20,10 @@ HugeAlloc::~HugeAlloc() { // Deregister and detach the created SHM regions for (shm_region_t &shm_region : shm_list) { if (shm_region.registered) dereg_mr_func(shm_region.mem_reg_info); + if(numa_node == kNoNumaNode) { + free(static_cast(const_cast(shm_region.buf))); + continue; + } int ret = shmdt(static_cast(const_cast(shm_region.buf))); if (ret != 0) { fprintf(stderr, "HugeAlloc: Error freeing SHM buf for key %d.\n", @@ -62,63 +66,74 @@ void HugeAlloc::print_stats() { Buffer HugeAlloc::alloc_raw(size_t size, DoRegister do_register) { std::ostringstream xmsg; // The exception message size = round_up(size); + int shm_key, shm_id; + uint8_t *shm_buf; - while (true) { - // Choose a positive SHM key. Negative is fine but it looks scary in the - // error message. - shm_key = static_cast(slow_rand.next_u64()); - shm_key = std::abs(shm_key); - - // Try to get an SHM region - shm_id = shmget(shm_key, size, IPC_CREAT | IPC_EXCL | 0666 | SHM_HUGETLB); - - if (shm_id == -1) { - switch (errno) { - case EEXIST: - continue; // shm_key already exists. Try again. - - case EACCES: - xmsg << "eRPC HugeAlloc: SHM allocation error. " - << "Insufficient permissions."; - throw std::runtime_error(xmsg.str()); - - case EINVAL: - xmsg << "eRPC HugeAlloc: SHM allocation error: SHMMAX/SHMIN " - << "mismatch. size = " << std::to_string(size) << " (" - << std::to_string(size / MB(1)) << " MB)."; - throw std::runtime_error(xmsg.str()); - - case ENOMEM: - // Out of memory - this is OK - LOG_WARN( - "eRPC HugeAlloc: Insufficient hugepages. Can't reserve %lu MB.\n", - size / MB(1)); - return Buffer(nullptr, 0, 0); - - default: - xmsg << "eRPC HugeAlloc: Unexpected SHM malloc error " - << strerror(errno); - throw std::runtime_error(xmsg.str()); + if (numa_node == kMaxNumaNodes) { // special case + shm_key = 0; + shm_id = 0; + shm_buf = static_cast(malloc(size)); + } + else { + + while (true) { + // Choose a positive SHM key. Negative is fine but it looks scary in the + // error message. + shm_key = static_cast(slow_rand.next_u64()); + shm_key = std::abs(shm_key); + + // Try to get an SHM region + shm_id = shmget(shm_key, size, IPC_CREAT | IPC_EXCL | 0666 | SHM_HUGETLB); + + if (shm_id == -1) { + switch (errno) { + case EEXIST: + continue; // shm_key already exists. Try again. + + case EACCES: + xmsg << "eRPC HugeAlloc: SHM allocation error. " + << "Insufficient permissions."; + throw std::runtime_error(xmsg.str()); + + case EINVAL: + xmsg << "eRPC HugeAlloc: SHM allocation error: SHMMAX/SHMIN " + << "mismatch. size = " << std::to_string(size) << " (" + << std::to_string(size / MB(1)) << " MB)."; + throw std::runtime_error(xmsg.str()); + + case ENOMEM: + // Out of memory - this is OK + LOG_WARN( + "eRPC HugeAlloc: Insufficient hugepages. Can't reserve %lu MB.\n", + size / MB(1)); + return Buffer(nullptr, 0, 0); + + default: + xmsg << "eRPC HugeAlloc: Unexpected SHM malloc error " + << strerror(errno); + throw std::runtime_error(xmsg.str()); + } + } else { + // shm_key worked. Break out of the while loop. + break; } - } else { - // shm_key worked. Break out of the while loop. - break; } - } - uint8_t *shm_buf = static_cast(shmat(shm_id, nullptr, 0)); - rt_assert(shm_buf != nullptr, - "eRPC HugeAlloc: shmat() failed. Key = " + std::to_string(shm_key)); + shm_buf = static_cast(shmat(shm_id, nullptr, 0)); + rt_assert(shm_buf != nullptr, + "eRPC HugeAlloc: shmat() failed. Key = " + std::to_string(shm_key)); - // Mark the SHM region for deletion when this process exits - shmctl(shm_id, IPC_RMID, nullptr); + // Mark the SHM region for deletion when this process exits + shmctl(shm_id, IPC_RMID, nullptr); - // Bind the buffer to the NUMA node - const unsigned long nodemask = (1ul << static_cast(numa_node)); - long ret = mbind(shm_buf, size, MPOL_BIND, &nodemask, 32, 0); - rt_assert(ret == 0, - "eRPC HugeAlloc: mbind() failed. Key " + std::to_string(shm_key)); + // Bind the buffer to the NUMA node + const unsigned long nodemask = (1ul << static_cast(numa_node)); + long ret = mbind(shm_buf, size, MPOL_BIND, &nodemask, 32, 0); + rt_assert(ret == 0, + "eRPC HugeAlloc: mbind() failed. Key " + std::to_string(shm_key)); + + } // If we are here, the allocation succeeded. Register if needed. bool do_register_bool = (do_register == DoRegister::kTrue); diff --git a/src/util/numautils.h b/src/util/numautils.h index 1912ea87..699148af 100644 --- a/src/util/numautils.h +++ b/src/util/numautils.h @@ -15,13 +15,13 @@ static size_t num_lcores_per_numa_node() { /// Return a list of logical cores in \p numa_node static std::vector get_lcores_for_numa_node(size_t numa_node) { - rt_assert(numa_node <= static_cast(numa_max_node())); + rt_assert(numa_node == kNoNumaNode || numa_node <= static_cast(numa_max_node())); std::vector ret; size_t num_lcores = static_cast(numa_num_configured_cpus()); for (size_t i = 0; i < num_lcores; i++) { - if (numa_node == static_cast(numa_node_of_cpu(i))) { + if (numa_node == kNoNumaNode || numa_node == static_cast(numa_node_of_cpu(i))) { ret.push_back(i); } } @@ -34,7 +34,7 @@ static void bind_to_core(std::thread &thread, size_t numa_node, size_t numa_local_index) { cpu_set_t cpuset; CPU_ZERO(&cpuset); - rt_assert(numa_node <= kMaxNumaNodes, "Invalid NUMA node"); + rt_assert(numa_node <= kMaxNumaNodes || numa_node == kNoNumaNode, "Invalid NUMA node"); auto lcore_vec = get_lcores_for_numa_node(numa_node); size_t global_index = lcore_vec.at(numa_local_index);