Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enabling eRPC use without hugepages #23

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/nexus_impl/nexus.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Nexus::Nexus(std::string local_uri, size_t numa_node, size_t num_bg_threads)
sm_udp_port < (kBaseSmUdpPort + kMaxNumERpcProcesses),
"Invalid management UDP port");
rt_assert(num_bg_threads <= kMaxBgThreads, "Too many background threads");
rt_assert(numa_node < kMaxNumaNodes, "Invalid NUMA node");
rt_assert(numa_node < kMaxNumaNodes || numa_node == kNoNumaNode, "Invalid NUMA node");

kill_switch = false;

Expand Down
7 changes: 7 additions & 0 deletions src/rpc_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ static_assert(kBaseSmUdpPort + kMaxNumERpcProcesses +
* @brief Maximum number of NUMA nodes per machine
*/
static constexpr size_t kMaxNumaNodes = 8;
/**
* @relates Rpc
* @brief Identifier for using heap memory instead of NUMA memory.
*/
static constexpr size_t kNoNumaNode = UINT8_MAX - 1;



/**
* @relates Rpc
Expand Down
2 changes: 1 addition & 1 deletion src/rpc_impl/rpc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Rpc<TTr>::Rpc(Nexus *nexus, void *context, uint8_t rpc_id,
rt_assert(rpc_id != kInvalidRpcId, "Invalid Rpc ID");
rt_assert(!nexus->rpc_id_exists(rpc_id), "Rpc ID already exists");
rt_assert(phy_port < kMaxPhyPorts, "Invalid physical port");
rt_assert(numa_node < kMaxNumaNodes, "Invalid NUMA node");
rt_assert(numa_node < kMaxNumaNodes || numa_node == kNoNumaNode, "Invalid NUMA node");

tls_registry = &nexus->tls_registry;
tls_registry->init(); // Initialize thread-local variables for this thread
Expand Down
117 changes: 66 additions & 51 deletions src/util/huge_alloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ HugeAlloc::HugeAlloc(size_t initial_size, size_t numa_node,
: numa_node(numa_node),
reg_mr_func(reg_mr_func),
dereg_mr_func(dereg_mr_func) {
assert(numa_node <= kMaxNumaNodes);
assert(numa_node <= kMaxNumaNodes || numa_node == kNoNumaNode);

if (initial_size < kMaxClassSize) initial_size = kMaxClassSize;
prev_allocation_size = initial_size;
Expand All @@ -20,6 +20,10 @@ HugeAlloc::~HugeAlloc() {
// Deregister and detach the created SHM regions
for (shm_region_t &shm_region : shm_list) {
if (shm_region.registered) dereg_mr_func(shm_region.mem_reg_info);
if(numa_node == kNoNumaNode) {
free(static_cast<void *>(const_cast<uint8_t *>(shm_region.buf)));
continue;
}
int ret = shmdt(static_cast<void *>(const_cast<uint8_t *>(shm_region.buf)));
if (ret != 0) {
fprintf(stderr, "HugeAlloc: Error freeing SHM buf for key %d.\n",
Expand Down Expand Up @@ -62,63 +66,74 @@ void HugeAlloc::print_stats() {
Buffer HugeAlloc::alloc_raw(size_t size, DoRegister do_register) {
std::ostringstream xmsg; // The exception message
size = round_up<kHugepageSize>(size);

int shm_key, shm_id;
uint8_t *shm_buf;

while (true) {
// Choose a positive SHM key. Negative is fine but it looks scary in the
// error message.
shm_key = static_cast<int>(slow_rand.next_u64());
shm_key = std::abs(shm_key);

// Try to get an SHM region
shm_id = shmget(shm_key, size, IPC_CREAT | IPC_EXCL | 0666 | SHM_HUGETLB);

if (shm_id == -1) {
switch (errno) {
case EEXIST:
continue; // shm_key already exists. Try again.

case EACCES:
xmsg << "eRPC HugeAlloc: SHM allocation error. "
<< "Insufficient permissions.";
throw std::runtime_error(xmsg.str());

case EINVAL:
xmsg << "eRPC HugeAlloc: SHM allocation error: SHMMAX/SHMIN "
<< "mismatch. size = " << std::to_string(size) << " ("
<< std::to_string(size / MB(1)) << " MB).";
throw std::runtime_error(xmsg.str());

case ENOMEM:
// Out of memory - this is OK
LOG_WARN(
"eRPC HugeAlloc: Insufficient hugepages. Can't reserve %lu MB.\n",
size / MB(1));
return Buffer(nullptr, 0, 0);

default:
xmsg << "eRPC HugeAlloc: Unexpected SHM malloc error "
<< strerror(errno);
throw std::runtime_error(xmsg.str());
if (numa_node == kMaxNumaNodes) { // special case
shm_key = 0;
shm_id = 0;
shm_buf = static_cast<uint8_t *>(malloc(size));
}
else {

while (true) {
// Choose a positive SHM key. Negative is fine but it looks scary in the
// error message.
shm_key = static_cast<int>(slow_rand.next_u64());
shm_key = std::abs(shm_key);

// Try to get an SHM region
shm_id = shmget(shm_key, size, IPC_CREAT | IPC_EXCL | 0666 | SHM_HUGETLB);

if (shm_id == -1) {
switch (errno) {
case EEXIST:
continue; // shm_key already exists. Try again.

case EACCES:
xmsg << "eRPC HugeAlloc: SHM allocation error. "
<< "Insufficient permissions.";
throw std::runtime_error(xmsg.str());

case EINVAL:
xmsg << "eRPC HugeAlloc: SHM allocation error: SHMMAX/SHMIN "
<< "mismatch. size = " << std::to_string(size) << " ("
<< std::to_string(size / MB(1)) << " MB).";
throw std::runtime_error(xmsg.str());

case ENOMEM:
// Out of memory - this is OK
LOG_WARN(
"eRPC HugeAlloc: Insufficient hugepages. Can't reserve %lu MB.\n",
size / MB(1));
return Buffer(nullptr, 0, 0);

default:
xmsg << "eRPC HugeAlloc: Unexpected SHM malloc error "
<< strerror(errno);
throw std::runtime_error(xmsg.str());
}
} else {
// shm_key worked. Break out of the while loop.
break;
}
} else {
// shm_key worked. Break out of the while loop.
break;
}
}

uint8_t *shm_buf = static_cast<uint8_t *>(shmat(shm_id, nullptr, 0));
rt_assert(shm_buf != nullptr,
"eRPC HugeAlloc: shmat() failed. Key = " + std::to_string(shm_key));
shm_buf = static_cast<uint8_t *>(shmat(shm_id, nullptr, 0));
rt_assert(shm_buf != nullptr,
"eRPC HugeAlloc: shmat() failed. Key = " + std::to_string(shm_key));

// Mark the SHM region for deletion when this process exits
shmctl(shm_id, IPC_RMID, nullptr);
// Mark the SHM region for deletion when this process exits
shmctl(shm_id, IPC_RMID, nullptr);

// Bind the buffer to the NUMA node
const unsigned long nodemask = (1ul << static_cast<unsigned long>(numa_node));
long ret = mbind(shm_buf, size, MPOL_BIND, &nodemask, 32, 0);
rt_assert(ret == 0,
"eRPC HugeAlloc: mbind() failed. Key " + std::to_string(shm_key));
// Bind the buffer to the NUMA node
const unsigned long nodemask = (1ul << static_cast<unsigned long>(numa_node));
long ret = mbind(shm_buf, size, MPOL_BIND, &nodemask, 32, 0);
rt_assert(ret == 0,
"eRPC HugeAlloc: mbind() failed. Key " + std::to_string(shm_key));

}

// If we are here, the allocation succeeded. Register if needed.
bool do_register_bool = (do_register == DoRegister::kTrue);
Expand Down
6 changes: 3 additions & 3 deletions src/util/numautils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ static size_t num_lcores_per_numa_node() {

/// Return a list of logical cores in \p numa_node
static std::vector<size_t> get_lcores_for_numa_node(size_t numa_node) {
rt_assert(numa_node <= static_cast<size_t>(numa_max_node()));
rt_assert(numa_node == kNoNumaNode || numa_node <= static_cast<size_t>(numa_max_node()));

std::vector<size_t> ret;
size_t num_lcores = static_cast<size_t>(numa_num_configured_cpus());

for (size_t i = 0; i < num_lcores; i++) {
if (numa_node == static_cast<size_t>(numa_node_of_cpu(i))) {
if (numa_node == kNoNumaNode || numa_node == static_cast<size_t>(numa_node_of_cpu(i))) {
ret.push_back(i);
}
}
Expand All @@ -34,7 +34,7 @@ static void bind_to_core(std::thread &thread, size_t numa_node,
size_t numa_local_index) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
rt_assert(numa_node <= kMaxNumaNodes, "Invalid NUMA node");
rt_assert(numa_node <= kMaxNumaNodes || numa_node == kNoNumaNode, "Invalid NUMA node");

auto lcore_vec = get_lcores_for_numa_node(numa_node);
size_t global_index = lcore_vec.at(numa_local_index);
Expand Down