Skip to content

Commit

Permalink
prov/util: Integrate kdreg2 into libfabric
Browse files Browse the repository at this point in the history
kdreg2 is a Linux kernel module used to enabled the libfabric MR cache
for FI_HMEM_SYSTEM.

Signed-off-by: Mike Uttormark <[email protected]>
Signed-off-by: Ian Ziemba <[email protected]>
  • Loading branch information
muttormark authored and j-xiong committed Oct 22, 2024
1 parent c677d1e commit 877221e
Show file tree
Hide file tree
Showing 7 changed files with 472 additions and 8 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ common_srcs = \
prov/util/src/rocr_ipc_monitor.c \
prov/util/src/ze_ipc_monitor.c \
prov/util/src/xpmem_monitor.c \
prov/util/src/kdreg2_mem_monitor.c \
prov/util/src/util_profile.c \
prov/coll/src/coll_attr.c \
prov/coll/src/coll_av.c \
Expand Down
51 changes: 50 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,53 @@ AC_ARG_ENABLE([restricted_dl],
AC_DEFINE_UNQUOTED([HAVE_RESTRICTED_DL], [$restricted_dl],
[Define to 1 to only look for dl providers under default location if FI_PROVIDER_PATH is not set])

dnl Check kdreg2 support
kdreg2_enabled=1
have_kdreg2=0
have_kdreg2_include_path=0

AC_ARG_ENABLE([kdreg2],
[AC_HELP_STRING([--disable-kdreg2],
[Determine whether kdreg2 memory monitor is disabled.])],
[AS_IF([test "$enable_kdreg2" = "no"], [kdreg2_enabled=0])],
[])

AS_IF([test $kdreg2_enabled -ne 0 ],
[AC_CHECK_HEADER([linux/kdreg2.h], [have_kdreg2=1], [], [])
AC_ARG_WITH([kdreg2],
[AS_HELP_STRING([--with-kdreg2=DIR],
[Enable KDREG2 memory monitor.
Optional=<Path to kdreg2.h header file>.])],
[AS_CASE(["$with_kdreg2"],
["no"], [kdreg2_enabled=0],
["yes"], [],
[""], [],
[CPPFLAGS="$CPPFLAGS -I$with_kdreg2"
AC_CHECK_HEADER([kdreg2.h],
[have_kdreg2=1
have_kdreg2_include_path=1],
[have_kdreg2=0],
[])])
AS_IF([test $have_kdreg2 -eq 0 ],
[AC_MSG_ERROR([KDREG2 header not found in $with_kdreg2. Cannot enable KDREG2 memory monitor.])])
])
])

AS_IF([test $kdreg2_enabled -eq 0],
[AC_MSG_NOTICE([kdreg2 monitor disabled])],
[AS_IF([test $have_kdreg2 -ne 0],
[AC_MSG_NOTICE([kdreg2 present and enabled])])])

AC_DEFINE_UNQUOTED(HAVE_KDREG2, [$have_kdreg2],
[Define to 1 if kdreg2.h is available.])

AC_DEFINE_UNQUOTED(HAVE_KDREG2_INCLUDE_PATH, [$have_kdreg2_include_path],
[Define to 1 if kdreg2.h path is not <linux/kdreg2.h>.])

AC_DEFINE_UNQUOTED(HAVE_KDREG2_MONITOR, [$have_kdreg2],
[Define to 1 to enable kdreg2 memory monitor])

dnl Check support to intercept syscalls
AC_CHECK_HEADERS_ONCE(elf.h sys/auxv.h)

Expand Down Expand Up @@ -888,16 +935,18 @@ AC_DEFINE_UNQUOTED(ENABLE_UFFD_MONITOR, [$enable_uffd],
default_monitor=""
bad_default="0"
AC_ARG_WITH([default-monitor],
[AS_HELP_STRING([--with-default-monitor=<memhooks|uffd|disabled>],
[AS_HELP_STRING([--with-default-monitor=<memhooks|uffd|kdreg2|disabled>],
[Select the default memory monitor.])],
[AS_CASE([$with_default_monitor],
[memhooks],[default_monitor=memhooks],
[uffd],[default_monitor=uffd],
[kdreg2],[default_monitor=kdreg2]
[disabled], [default_monitor=disabled],
[AC_MSG_ERROR([Unknown monitor specified: $with_default_monitor. Choices are memhooks, uffd, or disabled.])])
AS_CASE([$default_monitor],
[memhooks], [AS_IF([test "$enable_memhooks" != "1"], [bad_default=1])],
[uffd], [AS_IF([test "$enable_uffd" != "1"], [bad_default=1])],
[kdreg2], [AS_IF([test "$kdreg2_enabled" != "1"], [bad_default=1])],
[])
AS_IF([test "$bad_default" != "0"],
[AC_MSG_ERROR(["Default memory monitor is not available: $default_monitor."])])
Expand Down
38 changes: 36 additions & 2 deletions include/ofi_mr.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2017-2019 Intel Corporation, Inc. All rights reserved.
* Copyright (c) 2019-2021 Amazon.com, Inc. or its affiliates.
* All rights reserved.
* (C) Copyright 2020 Hewlett Packard Enterprise Development LP
* (C) Copyright 2020-2023 Hewlett Packard Enterprise Development LP
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
Expand Down Expand Up @@ -40,6 +40,8 @@
# include <config.h>
#endif /* HAVE_CONFIG_H */

struct ofi_mr;

#include <inttypes.h>
#include <stdbool.h>

Expand All @@ -48,6 +50,15 @@
#include <ofi_lock.h>
#include <ofi_list.h>
#include <ofi_tree.h>
#include <ofi_hmem.h>

#if HAVE_KDREG2_MONITOR
#if HAVE_KDREG2_INCLUDE_PATH
#include "kdreg2.h"
#else
#include <linux/kdreg2.h>
#endif
#endif

int ofi_open_mr_cache(uint32_t version, void *attr, size_t attr_len,
uint64_t flags, struct fid **fid, void *context);
Expand Down Expand Up @@ -128,6 +139,12 @@ struct ofi_mr_cache;
union ofi_mr_hmem_info {
uint64_t cuda_id;
uint64_t ze_id;
#if HAVE_KDREG2_MONITOR
struct {
kdreg2_cookie_t cookie;
struct kdreg2_monitoring_params monitoring_params;
} kdreg2;
#endif
};

struct ofi_mr_entry {
Expand Down Expand Up @@ -228,6 +245,23 @@ struct ofi_memhooks {

extern struct ofi_mem_monitor *memhooks_monitor;

/*
* Kdreg2 monitor
*/

struct kdreg2_status_data;

struct ofi_kdreg2 {
struct ofi_mem_monitor monitor;
pthread_t thread;
int fd;
int exit_pipe[2];
const struct kdreg2_status_data *status_data;
ofi_atomic64_t next_cookie;
};

extern struct ofi_mem_monitor *kdreg2_monitor;

extern struct ofi_mem_monitor *cuda_monitor;
extern struct ofi_mem_monitor *cuda_ipc_monitor;
extern struct ofi_mem_monitor *rocr_monitor;
Expand Down Expand Up @@ -368,7 +402,7 @@ struct ofi_mr_cache {
struct ofi_rbmap tree;
struct dlist_entry lru_list;
struct dlist_entry dead_region_list;
pthread_mutex_t lock;
pthread_mutex_t lock;

size_t cached_cnt;
size_t cached_size;
Expand Down
1 change: 1 addition & 0 deletions libfabric.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,7 @@
<ClCompile Include="prov\util\src\ze_ipc_monitor.c" />
<ClCompile Include="prov\util\src\rocr_ipc_monitor.c" />
<ClCompile Include="prov\util\src\xpmem_monitor.c" />
<ClCompile Include="prov\util\src\kdreg2_mem_monitor.c" />
<ClCompile Include="prov\coll\src\coll_attr.c" />
<ClCompile Include="prov\coll\src\coll_av.c" />
<ClCompile Include="prov\coll\src\coll_av_set.c" />
Expand Down
5 changes: 3 additions & 2 deletions man/fi_mr.3.md
Original file line number Diff line number Diff line change
Expand Up @@ -1054,12 +1054,13 @@ configure registration caches.
: The cache monitor is responsible for detecting system memory (FI_HMEM_SYSTEM)
changes made between the virtual addresses used by an application and the
underlying physical pages. Valid monitor options are: userfaultfd, memhooks,
and disabled. Selecting disabled will turn off the registration cache.
kdreg2, and disabled. Selecting disabled will turn off the registration cache.
Userfaultfd is a Linux kernel feature used to report virtual to physical
address mapping changes to user space. Memhooks operates by intercepting
relevant memory allocation and deallocation calls which may result in the
mappings changing, such as malloc, mmap, free, etc. Note that memhooks
operates at the elf linker layer, and does not use glibc memory hooks.
operates at the elf linker layer, and does not use glibc memory hooks. Kdreg2
is supplied as a loadable Linux kernel module.

*FI_MR_CUDA_CACHE_MONITOR_ENABLED*
: The CUDA cache monitor is responsible for detecting CUDA device memory
Expand Down
Loading

0 comments on commit 877221e

Please sign in to comment.