diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml
index 1ec1186484..3fadbc8eb5 100644
--- a/.github/actions/build_cmake/action.yml
+++ b/.github/actions/build_cmake/action.yml
@@ -8,8 +8,8 @@ inputs:
     description: 'Enable GPU support.'
     required: false
     default: OFF
-  raft:
-    description: 'Enable RAFT support.'
+  cuvs:
+    description: 'Enable cuVS support.'
     required: false
     default: OFF
   rocm:
@@ -50,11 +50,11 @@ runs:
         if [ "${{ inputs.rocm }}" = "ON" ]; then
           :
         # regular CUDA for GPU builds
-        elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.raft }}" = "OFF" ]; then
+        elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.cuvs }}" = "OFF" ]; then
           conda install -y -q cuda-toolkit -c "nvidia/label/cuda-12.4.0"
-        # and CUDA from RAFT channel for RAFT builds
-        elif [ "${{ inputs.raft }}" = "ON" ]; then
-          conda install -y -q libraft=24.06 cuda-version=12.4 cuda-toolkit -c rapidsai -c "nvidia/label/cuda-12.4.0" -c conda-forge
+        # and CUDA from cuVS channel for cuVS builds
+        elif [ "${{ inputs.cuvs }}" = "ON" ]; then
+          conda install -y -q libcuvs=24.08 cuda-version=12.4 cuda-toolkit -c rapidsai -c conda-forge -c "nvidia/label/cuda-12.4.0"
         fi
 
         # install test packages
@@ -102,7 +102,7 @@ runs:
         sudo apt-get -qq clean >/dev/null
         sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
     - name: Symblink system dependencies
-      if: inputs.raft == 'ON' || inputs.rocm == 'ON'
+      if: inputs.rocm == 'ON'
       shell: bash
       run: |
         # symblink system libraries for HIP compiler
@@ -119,7 +119,7 @@ runs:
               -DBUILD_TESTING=ON \
               -DBUILD_SHARED_LIBS=ON \
               -DFAISS_ENABLE_GPU=${{ inputs.gpu }} \
-              -DFAISS_ENABLE_RAFT=${{ inputs.raft }} \
+              -DFAISS_ENABLE_CUVS=${{ inputs.cuvs }} \
               -DFAISS_ENABLE_ROCM=${{ inputs.rocm }} \
               -DFAISS_OPT_LEVEL=${{ inputs.opt_level }} \
               -DFAISS_ENABLE_C_API=ON \
@@ -174,5 +174,5 @@ runs:
       if: always()
       uses: actions/upload-artifact@v4
       with:
-        name: test-results-arch=${{ runner.arch }}-opt=${{ inputs.opt_level }}-gpu=${{ inputs.gpu }}-raft=${{ inputs.raft }}-rocm=${{ inputs.rocm }}
+        name: test-results-arch=${{ runner.arch }}-opt=${{ inputs.opt_level }}-gpu=${{ inputs.gpu }}-cuvs=${{ inputs.cuvs }}-rocm=${{ inputs.rocm }}
         path: test-results
diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml
index d644e3e120..71153231b9 100644
--- a/.github/actions/build_conda/action.yml
+++ b/.github/actions/build_conda/action.yml
@@ -9,8 +9,8 @@ inputs:
     description: "CUDA toolkit version to use."
     default: ""
     required: false
-  raft:
-    description: "Enable RAFT support."
+  cuvs:
+    description: "Enable cuVS support."
     default: ""
     required: false
 runs:
@@ -59,14 +59,14 @@ runs:
       run: |
         conda build faiss --user pytorch --label ${{ inputs.label }} -c pytorch
     - name: Conda build (GPU)
-      if: inputs.label == '' && inputs.cuda != '' && inputs.raft == ''
+      if: inputs.label == '' && inputs.cuda != '' && inputs.cuvs == ''
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       run: |
         conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
             -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia
     - name: Conda build (GPU) w/ anaconda upload
-      if: inputs.label != '' && inputs.cuda != '' && inputs.raft == ''
+      if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs == ''
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       env:
@@ -74,19 +74,19 @@ runs:
       run: |
         conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
             --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia
-    - name: Conda build (GPU w/ RAFT)
-      if: inputs.label == '' && inputs.cuda != '' && inputs.raft != ''
+    - name: Conda build (GPU w/ cuVS)
+      if: inputs.label == '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       run: |
-        conda build faiss-gpu-raft --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
+        conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
             -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge
-    - name: Conda build (GPU w/ RAFT) w/ anaconda upload
-      if: inputs.label != '' && inputs.cuda != '' && inputs.raft != ''
+    - name: Conda build (GPU w/ cuVS) w/ anaconda upload
+      if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
       shell: ${{ steps.choose_shell.outputs.shell }}
       working-directory: conda
       env:
         PACKAGE_TYPE: ${{ inputs.label }}
       run: |
-        conda build faiss-gpu-raft --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
+        conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
             --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index edba3b1c83..ac1387c438 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -78,8 +78,8 @@ jobs:
         uses: ./.github/actions/build_cmake
         with:
           gpu: ON
-  linux-x86_64-GPU-w-RAFT-cmake:
-    name: Linux x86_64 GPU w/ RAFT (cmake)
+  linux-x86_64-GPU-w-CUVS-cmake:
+    name: Linux x86_64 GPU w/ cuVS (cmake)
     needs: linux-x86_64-cmake
     runs-on: 4-core-ubuntu-gpu-t4
     steps:
@@ -89,7 +89,7 @@ jobs:
         uses: ./.github/actions/build_cmake
         with:
           gpu: ON
-          raft: ON
+          cuvs: ON
   linux-x86_64-GPU-w-ROCm-cmake:
     name: Linux x86_64 GPU w/ ROCm (cmake)
     needs: linux-x86_64-cmake
@@ -199,8 +199,8 @@ jobs:
         with:
           label: main
           cuda: "11.4.4"
-  linux-x86_64-GPU-RAFT-packages-CUDA11-8-0:
-    name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8.0)
+  linux-x86_64-GPU-CUVS-packages-CUDA11-8-0:
+    name: Linux x86_64 GPU w/ cuVS packages (CUDA 11.8.0)
     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
     runs-on: 4-core-ubuntu-gpu-t4
     env:
@@ -217,7 +217,7 @@ jobs:
           ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
         with:
           label: main
-          raft: "ON"
+          cuvs: "ON"
           cuda: "11.8.0"
   linux-x86_64-GPU-packages-CUDA-12-1-1:
     name: Linux x86_64 GPU packages (CUDA 12.1.1)
@@ -238,8 +238,8 @@ jobs:
         with:
           label: main
           cuda: "12.1.1"
-  linux-x86_64-GPU-RAFT-packages-CUDA12-1-1:
-    name: Linux x86_64 GPU w/ RAFT packages (CUDA 12.1.1)
+  linux-x86_64-GPU-CUVS-packages-CUDA12-1-1:
+    name: Linux x86_64 GPU w/ cuVS packages (CUDA 12.1.1)
     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
     runs-on: 4-core-ubuntu-gpu-t4
     env:
@@ -256,7 +256,7 @@ jobs:
           ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
         with:
           label: main
-          raft: "ON"
+          cuvs: "ON"
           cuda: "12.1.1"
   windows-x86_64-packages:
     name: Windows x86_64 packages
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index cdca7d873a..c3929683b8 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -38,8 +38,8 @@ jobs:
         with:
           label: nightly
           cuda: "11.4.4"
-  linux-x86_64-GPU-RAFT-CUDA11-8-0-nightly:
-    name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0)
+  linux-x86_64-GPU-CUVS-CUDA11-8-0-nightly:
+    name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 11.8.0)
     runs-on: 4-core-ubuntu-gpu-t4
     env:
       CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
@@ -54,7 +54,7 @@ jobs:
           ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
         with:
           label: nightly
-          raft: "ON"
+          cuvs: "ON"
           cuda: "11.8.0"
   linux-x86_64-GPU-CUDA-12-1-1-nightly:
     name: Linux x86_64 GPU nightlies (CUDA 12.1.1)
@@ -73,8 +73,8 @@ jobs:
         with:
           label: nightly
           cuda: "12.1.1"
-  linux-x86_64-GPU-RAFT-CUDA12-1-1-nightly:
-    name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.1)
+  linux-x86_64-GPU-CUVS-CUDA12-1-1-nightly:
+    name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 12.1.1)
     runs-on: 4-core-ubuntu-gpu-t4
     env:
       CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
@@ -89,7 +89,7 @@ jobs:
           ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }}
         with:
           label: nightly
-          raft: "ON"
+          cuvs: "ON"
           cuda: "12.1.1"
   windows-x86_64-nightly:
     name: Windows x86_64 nightlies
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4dab5900aa..ec990a8734 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,7 +33,7 @@ if(FAISS_ENABLE_GPU)
   endif()
 endif()
 
-if(FAISS_ENABLE_RAFT)
+if(FAISS_ENABLE_CUVS)
 include(cmake/thirdparty/fetch_rapids.cmake)
 include(rapids-cmake)
 include(rapids-cpm)
@@ -60,7 +60,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
 # Valid values are "generic", "avx2", "avx512", "sve".
 option(FAISS_OPT_LEVEL "" "generic")
 option(FAISS_ENABLE_GPU "Enable support for GPU indexes." ON)
-option(FAISS_ENABLE_RAFT "Enable RAFT for GPU indexes." OFF)
+option(FAISS_ENABLE_CUVS "Enable cuVS for GPU indexes." OFF)
 option(FAISS_ENABLE_ROCM "Enable ROCm for GPU indexes." OFF)
 option(FAISS_ENABLE_PYTHON "Build Python extension." ON)
 option(FAISS_ENABLE_C_API "Build C API." OFF)
@@ -81,9 +81,9 @@ if(FAISS_ENABLE_GPU)
   endif()
 endif()
 
-if(FAISS_ENABLE_RAFT AND NOT TARGET raft::raft)
-  find_package(raft COMPONENTS compiled distributed)
-endif()
+if(FAISS_ENABLE_CUVS AND NOT TARGET cuvs::cuvs)
+   find_package(cuvs)
+ endif()
 
 add_subdirectory(faiss)
 
diff --git a/INSTALL.md b/INSTALL.md
index e6d3f33fb8..e16de484fe 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -118,7 +118,7 @@ Several options can be passed to CMake, among which:
   values are `ON` and `OFF`),
   - `-DFAISS_ENABLE_PYTHON=OFF` in order to disable building python bindings
   (possible values are `ON` and `OFF`),
-  - `-DFAISS_ENABLE_RAFT=ON` in order to enable building the RAFT implementations
+  - `-DFAISS_ENABLE_CUVS=ON` in order to enable building the cuVS implementations
     of the IVF-Flat and IVF-PQ GPU-accelerated indices (default is `OFF`, possible
     values are `ON` and `OFF`)
   - `-DBUILD_TESTING=OFF` in order to disable building C++ tests,
diff --git a/benchs/bench_ivfflat_raft.py b/benchs/bench_ivfflat_cuvs.py
similarity index 81%
rename from benchs/bench_ivfflat_raft.py
rename to benchs/bench_ivfflat_cuvs.py
index d8e299165c..3628ec7422 100644
--- a/benchs/bench_ivfflat_raft.py
+++ b/benchs/bench_ivfflat_cuvs.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 #
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,8 +44,8 @@ def aa(*args, **kwargs):
    help='whether to benchmark add operation on GPU index')
 aa('--bm_search', default=True,
    help='whether to benchmark search operation on GPU index')
-aa('--raft_only', default=False, action='store_true',
-   help='whether to only produce RAFT enabled benchmarks')
+aa('--cuvs_only', default=False, action='store_true',
+   help='whether to only produce cuVS enabled benchmarks')
 
 
 group = parser.add_argument_group('IVF options')
@@ -70,9 +70,9 @@ def aa(*args, **kwargs):
 mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaMemoryResource())
 rmm.mr.set_current_device_resource(mr)
 
-def bench_train_milliseconds(index, trainVecs, use_raft):
+def bench_train_milliseconds(index, trainVecs, use_cuvs):
     co = faiss.GpuMultipleClonerOptions()
-    co.use_raft = use_raft
+    co.use_cuvs = use_cuvs
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
     t0 = time.time()
     index_gpu.train(trainVecs)
@@ -89,21 +89,21 @@ def bench_train_milliseconds(index, trainVecs, use_raft):
         for n_cols in dataset_dims:
             index = faiss.index_factory(n_cols, "IVF{},Flat".format(args.n_centroids))
             trainVecs = rs.rand(n_rows, n_cols).astype('float32')
-            raft_gpu_train_time = bench_train_milliseconds(
+            cuvs_gpu_train_time = bench_train_milliseconds(
                 index, trainVecs, True)
-            if args.raft_only:
-                print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, RAFT enabled GPU train time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_rows, raft_gpu_train_time))
+            if args.cuvs_only:
+                print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, cuVS enabled GPU train time: %.3f milliseconds" % (
+                    n_cols, args.n_centroids, n_rows, cuvs_gpu_train_time))
             else:
                 classical_gpu_train_time = bench_train_milliseconds(
                     index, trainVecs, False)
-                print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, classical GPU train time: %.3f milliseconds, RAFT enabled GPU train time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_rows, classical_gpu_train_time, raft_gpu_train_time))
+                print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
+                    n_cols, args.n_centroids, n_rows, classical_gpu_train_time, cuvs_gpu_train_time))
 
 
-def bench_add_milliseconds(index, addVecs, use_raft):
+def bench_add_milliseconds(index, addVecs, use_cuvs):
     co = faiss.GpuMultipleClonerOptions()
-    co.use_raft = use_raft
+    co.use_cuvs = use_cuvs
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
     index_gpu.copyFrom(index)
     t0 = time.time()
@@ -125,20 +125,20 @@ def bench_add_milliseconds(index, addVecs, use_raft):
     for n_rows in addset_sizes:
         for n_cols in dataset_dims:
             addVecs = rs.rand(n_rows, n_cols).astype('float32')
-            raft_gpu_add_time = bench_add_milliseconds(index, addVecs, True)
-            if args.raft_only:
-                print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, RAFT enabled GPU add time: %.3f milliseconds" % (
-                    n_train, n_rows, n_cols, args.n_centroids, raft_gpu_add_time))
+            cuvs_gpu_add_time = bench_add_milliseconds(index, addVecs, True)
+            if args.cuvs_only:
+                print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, cuVS enabled GPU add time: %.3f milliseconds" % (
+                    n_train, n_rows, n_cols, args.n_centroids, cuvs_gpu_add_time))
             else:
                 classical_gpu_add_time = bench_add_milliseconds(
                     index, addVecs, False)
-                print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, classical GPU add time: %.3f milliseconds, RAFT enabled GPU add time: %.3f milliseconds" % (
-                    n_train, n_rows, n_cols, args.n_centroids, classical_gpu_add_time, raft_gpu_add_time))
+                print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
+                    n_train, n_rows, n_cols, args.n_centroids, classical_gpu_add_time, cuvs_gpu_add_time))
 
 
-def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft):
+def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs):
     co = faiss.GpuMultipleClonerOptions()
-    co.use_raft = use_raft
+    co.use_cuvs = use_cuvs
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
     index_gpu.copyFrom(index)
     index_gpu.add(addVecs)
@@ -163,19 +163,19 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft):
         addVecs = rs.rand(n_add, n_cols).astype('float32')
         for n_rows in queryset_sizes:
             queryVecs = rs.rand(n_rows, n_cols).astype('float32')
-            raft_gpu_search_time = bench_search_milliseconds(
+            cuvs_gpu_search_time = bench_search_milliseconds(
                 index, addVecs, queryVecs, args.nprobe, args.k, True)
-            if args.raft_only:
-                print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, RAFT enabled GPU search time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, raft_gpu_search_time))
+            if args.cuvs_only:
+                print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % (
+                    n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time))
             else:
                 classical_gpu_search_time = bench_search_milliseconds(
                     index, addVecs, queryVecs, args.nprobe, args.k, False)
-                print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, RAFT enabled GPU search time: %.3f milliseconds" % (
-                    n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, raft_gpu_search_time))
+                print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % (
+                    n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time))
 
     print("=" * 40)
-    print("Large RAFT Enabled Benchmarks")
+    print("Large cuVS Enabled Benchmarks")
     print("=" * 40)
     # Avoid classical GPU Benchmarks for large datasets because of OOM for more than 500000 queries and/or large dims as well as for large k
     queryset_sizes = [100000, 500000, 1000000]
@@ -188,7 +188,7 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft):
         addVecs = rs.rand(n_add, n_cols).astype('float32')
         for n_rows in queryset_sizes:
             queryVecs = rs.rand(n_rows, n_cols).astype('float32')
-            raft_gpu_search_time = bench_search_milliseconds(
+            cuvs_gpu_search_time = bench_search_milliseconds(
                 index, addVecs, queryVecs, args.nprobe, args.k, True)
-            print("Method: IVFFlat, Operation: SEARCH, numTrain: %d, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, RAFT enabled GPU search time: %.3f milliseconds" % (
-                n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, raft_gpu_search_time))
+            print("Method: IVFFlat, Operation: SEARCH, numTrain: %d, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % (
+                n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time))
diff --git a/benchs/bench_ivfpq_raft.py b/benchs/bench_ivfpq_cuvs.py
similarity index 78%
rename from benchs/bench_ivfpq_raft.py
rename to benchs/bench_ivfpq_cuvs.py
index 96a9ab1512..7668afffea 100644
--- a/benchs/bench_ivfpq_raft.py
+++ b/benchs/bench_ivfpq_cuvs.py
@@ -1,10 +1,9 @@
-# @lint-ignore-every LICENSELINT
-# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# Copyright (c) Facebook, Inc. and its affiliates.
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 #
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -42,13 +41,13 @@ def aa(*args, **kwargs):
 
 
 group = parser.add_argument_group('benchmarking options')
-aa('--raft_only', default=False, action='store_true',
-   help='whether to only produce RAFT enabled benchmarks')
+aa('--cuvs_only', default=False, action='store_true',
+   help='whether to only produce cuVS enabled benchmarks')
 
 group = parser.add_argument_group('IVF options')
-aa('--bits_per_code', default=8, type=int, help='bits per code. Note that < 8 is only supported when RAFT is enabled')
+aa('--bits_per_code', default=8, type=int, help='bits per code. Note that < 8 is only supported when cuVS is enabled')
 aa('--pq_len', default=2, type=int, help='number of vector elements represented by one PQ code')
-aa('--use_precomputed', default=True, type=bool, help='use precomputed codes (not with RAFT enabled)')
+aa('--use_precomputed', default=True, type=bool, help='use precomputed codes (not with cuVS enabled)')
 
 group = parser.add_argument_group('searching')
 aa('--k', default=10, type=int, help='nb of nearest neighbors')
@@ -74,11 +73,11 @@ def compute_nlist(numVecs):
     return int(nlist)
 
 
-def bench_train_milliseconds(index, trainVecs, use_raft):
+def bench_train_milliseconds(index, trainVecs, use_cuvs):
     co = faiss.GpuMultipleClonerOptions()
     # use float 16 lookup tables to save space
     co.useFloat16LookupTables = True
-    co.use_raft = use_raft
+    co.use_cuvs = use_cuvs
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
     t0 = time.time()
     index_gpu.train(trainVecs)
@@ -93,22 +92,22 @@ def bench_train_milliseconds(index, trainVecs, use_raft):
 print("=" * 40)
 print("GPU Train Benchmarks")
 print("=" * 40)
-raft_gpu_train_time = bench_train_milliseconds(index, xt, True)
-if args.raft_only:
-    print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, RAFT enabled GPU train time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_train, raft_gpu_train_time))
+cuvs_gpu_train_time = bench_train_milliseconds(index, xt, True)
+if args.cuvs_only:
+    print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, cuVS enabled GPU train time: %.3f milliseconds" % (
+        n_cols, nlist, M, args.bits_per_code, n_train, cuvs_gpu_train_time))
 else:
     classical_gpu_train_time = bench_train_milliseconds(
         index, xt, False)
-    print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, classical GPU train time: %.3f milliseconds, RAFT enabled GPU train time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_train, classical_gpu_train_time, raft_gpu_train_time))
+    print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % (
+        n_cols, nlist, M, args.bits_per_code, n_train, classical_gpu_train_time, cuvs_gpu_train_time))
 
 
-def bench_add_milliseconds(index, addVecs, use_raft):
+def bench_add_milliseconds(index, addVecs, use_cuvs):
     co = faiss.GpuMultipleClonerOptions()
     # use float 16 lookup tables to save space
     co.useFloat16LookupTables = True
-    co.use_raft = use_raft
+    co.use_cuvs = use_cuvs
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
     index_gpu.copyFrom(index)
     t0 = time.time()
@@ -119,20 +118,20 @@ def bench_add_milliseconds(index, addVecs, use_raft):
 print("GPU Add Benchmarks")
 print("=" * 40)
 index.train(xt)
-raft_gpu_add_time = bench_add_milliseconds(index, xb, True)
-if args.raft_only:
-    print("Method: IVFPQ, Operation: ADD, dim: %d, n_centroids %d numSubQuantizers %d, bitsPerCode %d, numAdd %d, RAFT enabled GPU add time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_rows, raft_gpu_add_time))
+cuvs_gpu_add_time = bench_add_milliseconds(index, xb, True)
+if args.cuvs_only:
+    print("Method: IVFPQ, Operation: ADD, dim: %d, n_centroids %d numSubQuantizers %d, bitsPerCode %d, numAdd %d, cuVS enabled GPU add time: %.3f milliseconds" % (
+        n_cols, nlist, M, args.bits_per_code, n_rows, cuvs_gpu_add_time))
 else:
     classical_gpu_add_time = bench_add_milliseconds(
         index, xb, False)
-    print("Method: IVFFPQ, Operation: ADD, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numAdd %d, classical GPU add time: %.3f milliseconds, RAFT enabled GPU add time: %.3f milliseconds" % (
-        n_cols, nlist, M, args.bits_per_code, n_rows, classical_gpu_add_time, raft_gpu_add_time))
+    print("Method: IVFFPQ, Operation: ADD, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numAdd %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % (
+        n_cols, nlist, M, args.bits_per_code, n_rows, classical_gpu_add_time, cuvs_gpu_add_time))
 
 
-def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft):
+def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs):
     co = faiss.GpuMultipleClonerOptions()
-    co.use_raft = use_raft
+    co.use_cuvs = use_cuvs
     co.useFloat16LookupTables = True
     index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
     index_gpu.copyFrom(index)
@@ -157,13 +156,13 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft):
     index.train(xt)
     for n_rows in queryset_sizes:
         queryVecs = xq[np.random.choice(xq.shape[0], n_rows, replace=False)]
-        raft_gpu_search_time = bench_search_milliseconds(
+        cuvs_gpu_search_time = bench_search_milliseconds(
             index, xb, queryVecs, args.nprobe, args.k, True)
-        if args.raft_only:
-            print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, RAFT enabled GPU search time: %.3f milliseconds" % (
-                n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, raft_gpu_search_time))
+        if args.cuvs_only:
+            print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % (
+                n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time))
         else:
             classical_gpu_search_time = bench_search_milliseconds(
                 index, xb, queryVecs, args.nprobe, args.k, False)
-            print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, RAFT enabled GPU search time: %.3f milliseconds" % (
-                n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, raft_gpu_search_time))
+            print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % (
+                n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time))
\ No newline at end of file
diff --git a/c_api/gpu/CMakeLists.txt b/c_api/gpu/CMakeLists.txt
index 88550e6750..5fdfc34dfd 100644
--- a/c_api/gpu/CMakeLists.txt
+++ b/c_api/gpu/CMakeLists.txt
@@ -19,9 +19,7 @@ if (FAISS_ENABLE_ROCM)
   target_link_libraries(faiss_c PUBLIC hip::host roc::hipblas)
 else()
   find_package(CUDAToolkit REQUIRED)
-  target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
+  target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs>)
 endif()
 
 add_executable(example_gpu_c EXCLUDE_FROM_ALL example_gpu_c.c)
diff --git a/cmake/thirdparty/fetch_rapids.cmake b/cmake/thirdparty/fetch_rapids.cmake
index 5e40865586..8d99161071 100644
--- a/cmake/thirdparty/fetch_rapids.cmake
+++ b/cmake/thirdparty/fetch_rapids.cmake
@@ -3,7 +3,7 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -15,7 +15,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-set(RAPIDS_VERSION "24.06")
+set(RAPIDS_VERSION "24.08")
 
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake)
     file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
diff --git a/conda/faiss-gpu-raft/build-lib.sh b/conda/faiss-gpu-cuvs/build-lib.sh
similarity index 95%
rename from conda/faiss-gpu-raft/build-lib.sh
rename to conda/faiss-gpu-cuvs/build-lib.sh
index 78a7f87eae..37f0381809 100644
--- a/conda/faiss-gpu-raft/build-lib.sh
+++ b/conda/faiss-gpu-cuvs/build-lib.sh
@@ -13,7 +13,7 @@ cmake -B _build \
       -DBUILD_TESTING=OFF \
       -DFAISS_OPT_LEVEL=avx512 \
       -DFAISS_ENABLE_GPU=ON \
-      -DFAISS_ENABLE_RAFT=ON \
+      -DFAISS_ENABLE_CUVS=ON \
       -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
       -DFAISS_ENABLE_PYTHON=OFF \
       -DBLA_VENDOR=Intel10_64lp \
diff --git a/conda/faiss-gpu-raft/build-pkg.sh b/conda/faiss-gpu-cuvs/build-pkg.sh
similarity index 95%
rename from conda/faiss-gpu-raft/build-pkg.sh
rename to conda/faiss-gpu-cuvs/build-pkg.sh
index 66a91bd006..09d6e6b7af 100644
--- a/conda/faiss-gpu-raft/build-pkg.sh
+++ b/conda/faiss-gpu-cuvs/build-pkg.sh
@@ -12,7 +12,7 @@ cmake -B _build_python_${PY_VER} \
       -Dfaiss_ROOT=_libfaiss_stage/ \
       -DFAISS_OPT_LEVEL=avx512 \
       -DFAISS_ENABLE_GPU=ON \
-      -DFAISS_ENABLE_RAFT=ON \
+      -DFAISS_ENABLE_CUVS=ON \
       -DCMAKE_BUILD_TYPE=Release \
       -DPython_EXECUTABLE=$PYTHON \
       faiss/python
diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml
similarity index 97%
rename from conda/faiss-gpu-raft/meta.yaml
rename to conda/faiss-gpu-cuvs/meta.yaml
index 1dde8e9868..6c43d19a7c 100644
--- a/conda/faiss-gpu-raft/meta.yaml
+++ b/conda/faiss-gpu-cuvs/meta.yaml
@@ -38,7 +38,7 @@ outputs:
     script: build-lib-arm64.sh  # [not x86_64]
     script: build-lib.bat  # [win]
     build:
-      string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_raft{{ suffix }}"
+      string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_cuvs{{ suffix }}"
       run_exports:
         - {{ pin_compatible('libfaiss', exact=True) }}
       script_env:
@@ -58,7 +58,7 @@ outputs:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
         - mkl =2023  # [x86_64]
         - openblas  # [not x86_64]
-        - libraft =24.06
+        - libcuvs =24.08
         - cuda-version {{ cuda_constraints }}
       run:
         - _openmp_mutex =4.5=2_kmp_llvm  # [x86_64]
@@ -66,7 +66,7 @@ outputs:
         - openblas  # [not x86_64]
         - cuda-cudart {{ cuda_constraints }}
         - libcublas {{ libcublas_constraints }}
-        - libraft =24.06
+        - libcuvs =24.08
         - cuda-version {{ cuda_constraints }}
     test:
       requires:
@@ -77,7 +77,7 @@ outputs:
         - conda inspect linkages -p $PREFIX $PKG_NAME  # [not win]
         - conda inspect objects -p $PREFIX $PKG_NAME   # [osx]
 
-  - name: faiss-gpu-raft
+  - name: faiss-gpu-cuvs
     script: build-pkg.sh  # [x86_64 and not win and not osx]
     script: build-pkg-osx.sh  # [x86_64 and osx]
     script: build-pkg-arm64.sh # [not x86_64]
diff --git a/conda/faiss-gpu-raft/test_cpu_dispatch.sh b/conda/faiss-gpu-cuvs/test_cpu_dispatch.sh
similarity index 100%
rename from conda/faiss-gpu-raft/test_cpu_dispatch.sh
rename to conda/faiss-gpu-cuvs/test_cpu_dispatch.sh
diff --git a/conda/faiss-gpu/build-lib.sh b/conda/faiss-gpu/build-lib.sh
index 9cb3ad468b..befad80547 100755
--- a/conda/faiss-gpu/build-lib.sh
+++ b/conda/faiss-gpu/build-lib.sh
@@ -19,7 +19,7 @@ cmake -B _build \
       -DBUILD_TESTING=OFF \
       -DFAISS_OPT_LEVEL=avx512 \
       -DFAISS_ENABLE_GPU=ON \
-      -DFAISS_ENABLE_RAFT=OFF \
+      -DFAISS_ENABLE_CUVS=OFF \
       -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \
       -DFAISS_ENABLE_PYTHON=OFF \
       -DBLA_VENDOR=Intel10_64lp \
diff --git a/conda/faiss-gpu/build-pkg.sh b/conda/faiss-gpu/build-pkg.sh
index f90ff7d38f..64afe6c13f 100755
--- a/conda/faiss-gpu/build-pkg.sh
+++ b/conda/faiss-gpu/build-pkg.sh
@@ -12,7 +12,7 @@ cmake -B _build_python_${PY_VER} \
       -Dfaiss_ROOT=_libfaiss_stage/ \
       -DFAISS_OPT_LEVEL=avx512 \
       -DFAISS_ENABLE_GPU=ON \
-      -DFAISS_ENABLE_RAFT=OFF \
+      -DFAISS_ENABLE_CUVS=OFF \
       -DCMAKE_BUILD_TYPE=Release \
       -DPython_EXECUTABLE=$PYTHON \
       faiss/python
diff --git a/contrib/torch_utils.py b/contrib/torch_utils.py
index 6db271ec48..5568901d84 100644
--- a/contrib/torch_utils.py
+++ b/contrib/torch_utils.py
@@ -586,7 +586,7 @@ def torch_replacement_knn(xq, xb, k, metric=faiss.METRIC_L2, metric_arg=0):
 
 
 # allows torch tensor usage with bfKnn
-def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRIC_L2, device=-1, use_raft=False):
+def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRIC_L2, device=-1, use_cuvs=False):
     if type(xb) is np.ndarray:
         # Forward to faiss __init__.py base method
         return faiss.knn_gpu_numpy(res, xq, xb, k, D, I, metric, device)
@@ -667,7 +667,7 @@ def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRI
     args.outIndices = I_ptr
     args.outIndicesType = I_type
     args.device = device
-    args.use_raft = use_raft
+    args.use_cuvs = use_cuvs
 
     with using_stream(res):
         faiss.bfKnn(res, args)
diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt
index 3517827750..84cb222145 100644
--- a/faiss/gpu/CMakeLists.txt
+++ b/faiss/gpu/CMakeLists.txt
@@ -5,7 +5,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 # =============================================================================
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -241,21 +241,21 @@ endfunction()
 
 generate_ivf_interleaved_code()
 
-if(FAISS_ENABLE_RAFT)
+if(FAISS_ENABLE_CUVS)
   list(APPEND FAISS_GPU_HEADERS
           GpuIndexCagra.h
-          impl/RaftCagra.cuh
-          impl/RaftFlatIndex.cuh
-          impl/RaftIVFFlat.cuh
-          impl/RaftIVFPQ.cuh
-          utils/RaftUtils.h)
+          impl/CuvsCagra.cuh
+          impl/CuvsFlatIndex.cuh
+          impl/CuvsIVFFlat.cuh
+          impl/CuvsIVFPQ.cuh
+          utils/CuvsUtils.h)
   list(APPEND FAISS_GPU_SRC
           GpuIndexCagra.cu
-          impl/RaftCagra.cu
-          impl/RaftFlatIndex.cu
-          impl/RaftIVFFlat.cu
-          impl/RaftIVFPQ.cu
-          utils/RaftUtils.cu)
+          impl/CuvsCagra.cu
+          impl/CuvsFlatIndex.cu
+          impl/CuvsIVFFlat.cu
+          impl/CuvsIVFPQ.cu
+          utils/CuvsUtils.cu)
 endif()
 
 add_library(faiss_gpu STATIC ${FAISS_GPU_SRC})
@@ -266,10 +266,10 @@ set_target_properties(faiss_gpu PROPERTIES
 target_include_directories(faiss_gpu PUBLIC
   $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
 
-if(FAISS_ENABLE_RAFT)
-  target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1)
-  target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1)
-  target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_RAFT=1)
+if(FAISS_ENABLE_CUVS)
+  target_compile_definitions(faiss PUBLIC USE_NVIDIA_CUVS=1)
+  target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_CUVS=1)
+  target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_CUVS=1)
 
   # Mark all functions as hidden so that we don't generate
   # global 'public' functions that also exist in libraft.so
@@ -286,13 +286,13 @@ if(FAISS_ENABLE_RAFT)
   set_source_files_properties(
     GpuDistance.cu
     StandardGpuResources.cpp
-    impl/RaftFlatIndex.cu
-    impl/RaftIVFFlat.cu
-    impl/RaftIVFPQ.cu
-    utils/RaftUtils.cu
+    impl/CuvsFlatIndex.cu
+    impl/CuvsIVFFlat.cu
+    impl/CuvsIVFPQ.cu
+    utils/CuvsUtils.cu
     TARGET_DIRECTORY faiss
     PROPERTIES COMPILE_OPTIONS "-fvisibility=hidden")
-  target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_RAFT=1)
+  target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_CUVS=1)
 endif()
 
 if (FAISS_ENABLE_ROCM)
@@ -333,14 +333,11 @@ else()
   )
   target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
 
+
   find_package(CUDAToolkit REQUIRED)
-  target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:OpenMP::OpenMP_CXX>)
+  target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs> $<$<BOOL:${FAISS_ENABLE_CUVS}>:OpenMP::OpenMP_CXX>)
   target_compile_options(faiss_gpu PRIVATE
     $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all
     --expt-extended-lambda --expt-relaxed-constexpr
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
+    $<$<BOOL:${FAISS_ENABLE_CUVS}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
 endif()
diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp
index a5a57da770..575ee2e0a5 100644
--- a/faiss/gpu/GpuCloner.cpp
+++ b/faiss/gpu/GpuCloner.cpp
@@ -14,7 +14,7 @@
 
 #include <faiss/IndexBinaryFlat.h>
 #include <faiss/IndexFlat.h>
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 #include <faiss/IndexHNSW.h>
 #endif
 #include <faiss/IndexIVF.h>
@@ -27,7 +27,7 @@
 #include <faiss/MetaIndexes.h>
 #include <faiss/gpu/GpuIndex.h>
 #include <faiss/gpu/GpuIndexBinaryFlat.h>
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 #include <faiss/gpu/GpuIndexCagra.h>
 #endif
 #include <faiss/gpu/GpuIndexFlat.h>
@@ -92,7 +92,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
         // (inverse op of ToGpuClonerMultiple)
 
     }
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
         IndexHNSWCagra* res = new IndexHNSWCagra();
         icg->copyTo(res);
@@ -138,7 +138,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         GpuIndexFlatConfig config;
         config.device = device;
         config.useFloat16 = useFloat16;
-        config.use_raft = use_raft;
+        config.use_cuvs = use_cuvs;
         return new GpuIndexFlat(provider, ifl, config);
     } else if (
             dynamic_cast<const IndexScalarQuantizer*>(index) &&
@@ -148,7 +148,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.device = device;
         config.useFloat16 = true;
         FAISS_THROW_IF_NOT_MSG(
-                !use_raft, "this type of index is not implemented for RAFT");
+                !use_cuvs, "this type of index is not implemented for cuVS");
         GpuIndexFlat* gif = new GpuIndexFlat(
                 provider, index->d, index->metric_type, config);
         // transfer data by blocks
@@ -166,7 +166,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.device = device;
         config.indicesOptions = indicesOptions;
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-        config.use_raft = use_raft;
+        config.use_cuvs = use_cuvs;
         config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
 
         GpuIndexIVFFlat* res = new GpuIndexIVFFlat(
@@ -185,7 +185,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.indicesOptions = indicesOptions;
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
         FAISS_THROW_IF_NOT_MSG(
-                !use_raft, "this type of index is not implemented for RAFT");
+                !use_cuvs, "this type of index is not implemented for cuVS");
 
         GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer(
                 provider,
@@ -218,8 +218,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
         config.useFloat16LookupTables = useFloat16;
         config.usePrecomputedTables = usePrecomputed;
-        config.use_raft = use_raft;
-        config.interleavedLayout = use_raft;
+        config.use_cuvs = use_cuvs;
+        config.interleavedLayout = use_cuvs;
         config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer;
 
         GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config);
@@ -230,7 +230,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
 
         return res;
     }
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     else if (auto icg = dynamic_cast<const faiss::IndexHNSWCagra*>(index)) {
         GpuIndexCagraConfig config;
         config.device = device;
@@ -541,7 +541,7 @@ faiss::IndexBinary* index_binary_cpu_to_gpu(
         GpuIndexBinaryFlatConfig config;
         config.device = device;
         if (options) {
-            config.use_raft = options->use_raft;
+            config.use_cuvs = options->use_cuvs;
         }
         return new GpuIndexBinaryFlat(provider, ii, config);
     } else {
diff --git a/faiss/gpu/GpuClonerOptions.h b/faiss/gpu/GpuClonerOptions.h
index f3788fd170..e524ca6829 100644
--- a/faiss/gpu/GpuClonerOptions.h
+++ b/faiss/gpu/GpuClonerOptions.h
@@ -37,11 +37,11 @@ struct GpuClonerOptions {
     /// Set verbose options on the index
     bool verbose = false;
 
-    /// use the RAFT implementation
-#if defined USE_NVIDIA_RAFT
-    bool use_raft = true;
+    /// use the cuVS implementation
+#if defined USE_NVIDIA_CUVS
+    bool use_cuvs = true;
 #else
-    bool use_raft = false;
+    bool use_cuvs = false;
 #endif
 
     /// This flag controls the CPU fallback logic for coarse quantizer
diff --git a/faiss/gpu/GpuDistance.cu b/faiss/gpu/GpuDistance.cu
index d5082bbee7..f515067889 100644
--- a/faiss/gpu/GpuDistance.cu
+++ b/faiss/gpu/GpuDistance.cu
@@ -30,9 +30,11 @@
 #include <faiss/gpu/utils/ConversionOperators.cuh>
 #include <faiss/gpu/utils/CopyUtils.cuh>
 #include <faiss/gpu/utils/DeviceTensor.cuh>
+#include <optional>
 
-#if defined USE_NVIDIA_RAFT
-#include <faiss/gpu/utils/RaftUtils.h>
+#if defined USE_NVIDIA_CUVS
+#include <cuvs/neighbors/brute_force.hpp>
+#include <faiss/gpu/utils/CuvsUtils.h>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/error.hpp>
@@ -41,32 +43,19 @@
 #include <raft/core/temporary_device_buffer.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/neighbors/brute_force.cuh>
-#define RAFT_NAME "raft"
 #endif
 
 namespace faiss {
 namespace gpu {
 
-#if defined USE_NVIDIA_RAFT
-using namespace raft::distance;
-using namespace raft::neighbors;
-#endif
-
-/// Caches device major version
-int device_major_version = -1;
-
-bool should_use_raft(GpuDistanceParams args) {
-    if (device_major_version < 0) {
-        cudaDeviceProp prop;
-        int dev = args.device >= 0 ? args.device : getCurrentDevice();
-        cudaGetDeviceProperties(&prop, dev);
-        device_major_version = prop.major;
-    }
+bool should_use_cuvs(GpuDistanceParams args) {
+    int dev = args.device >= 0 ? args.device : getCurrentDevice();
+    auto prop = getDeviceProperties(dev);
 
-    if (device_major_version < 7)
+    if (prop.major < 7)
         return false;
 
-    return args.use_raft;
+    return args.use_cuvs;
 }
 
 template <typename T>
@@ -244,13 +233,33 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) {
             "limitation: both vectorType and queryType must currently "
             "be the same (F32 or F16");
 
-#if defined USE_NVIDIA_RAFT
-    // Note: For now, RAFT bfknn requires queries and vectors to be same layout
-    if (should_use_raft(args) && args.queriesRowMajor == args.vectorsRowMajor) {
-        DistanceType distance = metricFaissToRaft(args.metric, false);
+#if defined USE_NVIDIA_CUVS
+    // Note: For now, cuVS bfknn requires queries and vectors to be same layout
+    if (should_use_cuvs(args) && args.queriesRowMajor == args.vectorsRowMajor &&
+        args.outIndicesType == IndicesDataType::I64 &&
+        args.vectorType == DistanceDataType::F32 && args.k > 0) {
+        cuvsDistanceType distance = metricFaissToCuvs(args.metric, false);
 
         auto resImpl = prov->getResources();
         auto res = resImpl.get();
+        // If the user specified a device, then ensure that it is currently set
+        int device = -1;
+        if (args.device == -1) {
+            // Original behavior if no device is specified, use the current CUDA
+            // thread local device
+            device = getCurrentDevice();
+        } else {
+            // Otherwise, use the device specified in `args`
+            device = args.device;
+
+            FAISS_THROW_IF_NOT_FMT(
+                    device >= 0 && device < getNumDevices(),
+                    "bfKnn: device specified must be -1 (current CUDA thread local device) "
+                    "or within the range [0, %d)",
+                    getNumDevices());
+        }
+
+        DeviceScope scope(device);
         raft::device_resources& handle = res->getRaftHandleCurrentDevice();
         auto stream = res->getDefaultStreamCurrentDevice();
 
@@ -306,10 +315,16 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) {
                         raft::vector_extent<int64_t>(num_queries));
                 norms_view = norms->view();
             }
-            raft::neighbors::brute_force::index idx(
+
+            cuvs::neighbors::brute_force::index<float> idx(
                     handle, index.view(), norms_view, distance, metric_arg);
-            raft::neighbors::brute_force::search<float, idx_t>(
-                    handle, idx, search.view(), inds.view(), dists.view());
+            cuvs::neighbors::brute_force::search(
+                    handle,
+                    idx,
+                    search.view(),
+                    inds.view(),
+                    dists.view(),
+                    std::nullopt);
         } else {
             auto index = raft::make_readonly_temporary_device_buffer<
                     const float,
@@ -329,20 +344,31 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) {
                             reinterpret_cast<const float*>(args.queries)),
                     raft::matrix_extent<int64_t>(num_queries, dims));
 
-            std::vector<raft::device_matrix_view<
+            std::optional<raft::temporary_device_buffer<
                     const float,
-                    int64_t,
-                    raft::col_major>>
-                    index_vec = {index.view()};
+                    raft::vector_extent<int64_t>>>
+                    norms;
+            std::optional<raft::device_vector_view<const float, int64_t>>
+                    norms_view;
+            if (args.vectorNorms) {
+                norms = raft::make_readonly_temporary_device_buffer<
+                        const float,
+                        int64_t>(
+                        handle,
+                        args.vectorNorms,
+                        raft::vector_extent<int64_t>(num_queries));
+                norms_view = norms->view();
+            }
 
-            brute_force::knn(
+            cuvs::neighbors::brute_force::index<float> idx(
+                    handle, index.view(), norms_view, distance, metric_arg);
+            cuvs::neighbors::brute_force::search(
                     handle,
-                    index_vec,
+                    idx,
                     search.view(),
                     inds.view(),
                     dists.view(),
-                    distance,
-                    metric_arg);
+                    std::nullopt);
         }
 
         if (args.metric == MetricType::METRIC_Lp) {
@@ -364,10 +390,10 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) {
         handle.sync_stream();
     } else
 #else
-    if (should_use_raft(args)) {
+    if (should_use_cuvs(args)) {
         FAISS_THROW_IF_NOT_MSG(
-                !should_use_raft(args),
-                "RAFT has not been compiled into the current version so it cannot be used.");
+                !should_use_cuvs(args),
+                "cuVS has not been compiled into the current version so it cannot be used.");
     } else
 #endif
             if (args.vectorType == DistanceDataType::F32) {
diff --git a/faiss/gpu/GpuDistance.h b/faiss/gpu/GpuDistance.h
index 54777058a6..7052fc68b0 100644
--- a/faiss/gpu/GpuDistance.h
+++ b/faiss/gpu/GpuDistance.h
@@ -106,14 +106,17 @@ struct GpuDistanceParams {
     /// execution
     int device = -1;
 
-    /// Should the index dispatch down to RAFT?
-    /// TODO: change default to true if RAFT is enabled
-    bool use_raft = false;
+    /// Should the index dispatch down to cuVS?
+#if defined USE_NVIDIA_CUVS
+    bool use_cuvs = true;
+#else
+    bool use_cuvs = false;
+#endif
 };
 
-/// A function that determines whether RAFT should be used based on various
+/// A function that determines whether cuVS should be used based on various
 /// conditions (such as unsupported architecture)
-bool should_use_raft(GpuDistanceParams args);
+bool should_use_cuvs(GpuDistanceParams args);
 
 /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest
 /// neighbor searches on an externally-provided region of memory (e.g., from a
diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu
index 0740ad4dab..033cb9aaa9 100644
--- a/faiss/gpu/GpuIndex.cu
+++ b/faiss/gpu/GpuIndex.cu
@@ -42,20 +42,13 @@ constexpr idx_t kAddVecSize = (idx_t)512 * 1024;
 // FIXME: parameterize based on algorithm need
 constexpr idx_t kSearchVecSize = (idx_t)32 * 1024;
 
-/// Caches device major version
-extern int device_major_version;
-
-bool should_use_raft(GpuIndexConfig config_) {
-    if (device_major_version < 0) {
-        cudaDeviceProp prop;
-        cudaGetDeviceProperties(&prop, config_.device);
-        device_major_version = prop.major;
-    }
+bool should_use_cuvs(GpuIndexConfig config_) {
+    auto prop = getDeviceProperties(config_.device);
 
-    if (device_major_version < 7)
+    if (prop.major < 7)
         return false;
 
-    return config_.use_raft;
+    return config_.use_cuvs;
 }
 
 GpuIndex::GpuIndex(
@@ -148,7 +141,7 @@ void GpuIndex::addPaged_(idx_t n, const float* x, const idx_t* ids) {
     if (n > 0) {
         idx_t totalSize = n * this->d * sizeof(float);
 
-        if (!should_use_raft(config_) &&
+        if (!should_use_cuvs(config_) &&
             (totalSize > kAddPageSize || n > kAddVecSize)) {
             // How many vectors fit into kAddPageSize?
             idx_t maxNumVecsForPageSize =
@@ -540,8 +533,8 @@ extern std::string gpu_compile_options;
 struct InitGpuCompileOptions {
     InitGpuCompileOptions() {
         gpu_compile_options = "GPU ";
-#ifdef USE_NVIDIA_RAFT
-        gpu_compile_options += "NVIDIA_RAFT ";
+#ifdef USE_NVIDIA_CUVS
+        gpu_compile_options += "NVIDIA_CUVS ";
 #endif
 
 #ifdef USE_AMD_ROCM
diff --git a/faiss/gpu/GpuIndex.h b/faiss/gpu/GpuIndex.h
index 4098e6a101..48bb7a17a3 100644
--- a/faiss/gpu/GpuIndex.h
+++ b/faiss/gpu/GpuIndex.h
@@ -38,17 +38,17 @@ struct GpuIndexConfig {
     /// more memory than is available on the GPU.
     MemorySpace memorySpace = MemorySpace::Device;
 
-    /// Should the index dispatch down to RAFT?
-#if defined USE_NVIDIA_RAFT
-    bool use_raft = true;
+    /// Should the index dispatch down to cuVS?
+#if defined USE_NVIDIA_CUVS
+    bool use_cuvs = true;
 #else
-    bool use_raft = false;
+    bool use_cuvs = false;
 #endif
 };
 
-/// A centralized function that determines whether RAFT should
+/// A centralized function that determines whether cuVS should
 /// be used based on various conditions (such as unsupported architecture)
-bool should_use_raft(GpuIndexConfig config_);
+bool should_use_cuvs(GpuIndexConfig config_);
 
 class GpuIndex : public faiss::Index {
    public:
diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu
index fb90758f78..fe0c82b8aa 100644
--- a/faiss/gpu/GpuIndexCagra.cu
+++ b/faiss/gpu/GpuIndexCagra.cu
@@ -23,8 +23,9 @@
 
 #include <faiss/IndexHNSW.h>
 #include <faiss/gpu/GpuIndexCagra.h>
+#include <faiss/gpu/StandardGpuResources.h>
 #include <cstddef>
-#include <faiss/gpu/impl/RaftCagra.cuh>
+#include <faiss/gpu/impl/CuvsCagra.cuh>
 #include <optional>
 
 namespace faiss {
@@ -41,6 +42,7 @@ GpuIndexCagra::GpuIndexCagra(
 }
 
 void GpuIndexCagra::train(idx_t n, const float* x) {
+    DeviceScope scope(config_.device);
     if (this->is_trained) {
         FAISS_ASSERT(index_);
         return;
@@ -48,13 +50,13 @@ void GpuIndexCagra::train(idx_t n, const float* x) {
 
     FAISS_ASSERT(!index_);
 
-    std::optional<raft::neighbors::ivf_pq::index_params> ivf_pq_params =
+    std::optional<cuvs::neighbors::ivf_pq::index_params> ivf_pq_params =
             std::nullopt;
-    std::optional<raft::neighbors::ivf_pq::search_params> ivf_pq_search_params =
+    std::optional<cuvs::neighbors::ivf_pq::search_params> ivf_pq_search_params =
             std::nullopt;
     if (cagraConfig_.ivf_pq_params != nullptr) {
         ivf_pq_params =
-                std::make_optional<raft::neighbors::ivf_pq::index_params>();
+                std::make_optional<cuvs::neighbors::ivf_pq::index_params>();
         ivf_pq_params->n_lists = cagraConfig_.ivf_pq_params->n_lists;
         ivf_pq_params->kmeans_n_iters =
                 cagraConfig_.ivf_pq_params->kmeans_n_iters;
@@ -63,7 +65,7 @@ void GpuIndexCagra::train(idx_t n, const float* x) {
         ivf_pq_params->pq_bits = cagraConfig_.ivf_pq_params->pq_bits;
         ivf_pq_params->pq_dim = cagraConfig_.ivf_pq_params->pq_dim;
         ivf_pq_params->codebook_kind =
-                static_cast<raft::neighbors::ivf_pq::codebook_gen>(
+                static_cast<cuvs::neighbors::ivf_pq::codebook_gen>(
                         cagraConfig_.ivf_pq_params->codebook_kind);
         ivf_pq_params->force_random_rotation =
                 cagraConfig_.ivf_pq_params->force_random_rotation;
@@ -72,7 +74,7 @@ void GpuIndexCagra::train(idx_t n, const float* x) {
     }
     if (cagraConfig_.ivf_pq_search_params != nullptr) {
         ivf_pq_search_params =
-                std::make_optional<raft::neighbors::ivf_pq::search_params>();
+                std::make_optional<cuvs::neighbors::ivf_pq::search_params>();
         ivf_pq_search_params->n_probes =
                 cagraConfig_.ivf_pq_search_params->n_probes;
         ivf_pq_search_params->lut_dtype =
@@ -80,18 +82,20 @@ void GpuIndexCagra::train(idx_t n, const float* x) {
         ivf_pq_search_params->preferred_shmem_carveout =
                 cagraConfig_.ivf_pq_search_params->preferred_shmem_carveout;
     }
-    index_ = std::make_shared<RaftCagra>(
+    index_ = std::make_shared<CuvsCagra>(
             this->resources_.get(),
             this->d,
             cagraConfig_.intermediate_graph_degree,
             cagraConfig_.graph_degree,
             static_cast<faiss::cagra_build_algo>(cagraConfig_.build_algo),
             cagraConfig_.nn_descent_niter,
+            cagraConfig_.store_dataset,
             this->metric_type,
             this->metric_arg,
             INDICES_64_BIT,
             ivf_pq_params,
-            ivf_pq_search_params);
+            ivf_pq_search_params,
+            cagraConfig_.refine_rate);
 
     index_->train(n, x);
 
@@ -180,7 +184,7 @@ void GpuIndexCagra::copyFrom(const faiss::IndexHNSWCagra* index) {
         }
     }
 
-    index_ = std::make_shared<RaftCagra>(
+    index_ = std::make_shared<CuvsCagra>(
             this->resources_.get(),
             this->d,
             index->ntotal,
@@ -226,17 +230,33 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const {
     index->hnsw.set_default_probas(M, 1.0 / log(M));
 
     auto n_train = this->ntotal;
-    auto train_dataset = index_->get_training_dataset();
+    float* train_dataset;
+    auto dataset = index_->get_training_dataset();
+    bool allocation = false;
+    if (getDeviceForAddress(dataset) >= 0) {
+        train_dataset = new float[n_train * index->d];
+        allocation = true;
+        raft::copy(
+                train_dataset,
+                dataset,
+                n_train * index->d,
+                this->resources_->getRaftHandleCurrentDevice().get_stream());
+    } else {
+        train_dataset = const_cast<float*>(dataset);
+    }
 
     // turn off as level 0 is copied from CAGRA graph
     index->init_level0 = false;
     if (!index->base_level_only) {
-        index->add(n_train, train_dataset.data());
+        index->add(n_train, train_dataset);
     } else {
         index->hnsw.prepare_level_tab(n_train, false);
-        index->storage->add(n_train, train_dataset.data());
+        index->storage->add(n_train, train_dataset);
         index->ntotal = n_train;
     }
+    if (allocation) {
+        delete[] train_dataset;
+    }
 
     auto graph = get_knngraph();
 
diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h
index 9c39e6a95a..d6fae29b58 100644
--- a/faiss/gpu/GpuIndexCagra.h
+++ b/faiss/gpu/GpuIndexCagra.h
@@ -34,12 +34,12 @@ struct IndexHNSWCagra;
 namespace faiss {
 namespace gpu {
 
-class RaftCagra;
+class CuvsCagra;
 
 enum class graph_build_algo {
     /// Use IVF-PQ to build all-neighbors knn graph
     IVF_PQ,
-    /// Experimental, use NN-Descent to build all-neighbors knn graph
+    /// Use NN-Descent to build all-neighbors knn graph
     NN_DESCENT
 };
 
@@ -175,6 +175,8 @@ struct GpuIndexCagraConfig : public GpuIndexConfig {
 
     IVFPQBuildCagraConfig* ivf_pq_params = nullptr;
     IVFPQSearchCagraConfig* ivf_pq_search_params = nullptr;
+    float refine_rate = 2.0f;
+    bool store_dataset = true;
 };
 
 enum class search_algo {
@@ -276,7 +278,7 @@ struct GpuIndexCagra : public GpuIndex {
     const GpuIndexCagraConfig cagraConfig_;
 
     /// Instance that we own; contains the inverted lists
-    std::shared_ptr<RaftCagra> index_;
+    std::shared_ptr<CuvsCagra> index_;
 };
 
 } // namespace gpu
diff --git a/faiss/gpu/GpuIndexFlat.cu b/faiss/gpu/GpuIndexFlat.cu
index baa27d6e85..eb87e082e9 100644
--- a/faiss/gpu/GpuIndexFlat.cu
+++ b/faiss/gpu/GpuIndexFlat.cu
@@ -18,8 +18,8 @@
 #include <faiss/gpu/utils/Float16.cuh>
 #include <limits>
 
-#if defined USE_NVIDIA_RAFT
-#include <faiss/gpu/impl/RaftFlatIndex.cuh>
+#if defined USE_NVIDIA_CUVS
+#include <faiss/gpu/impl/CuvsFlatIndex.cuh>
 #endif
 
 namespace faiss {
@@ -93,19 +93,19 @@ GpuIndexFlat::GpuIndexFlat(
 GpuIndexFlat::~GpuIndexFlat() {}
 
 void GpuIndexFlat::resetIndex_(int dims) {
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 
-    if (should_use_raft(config_)) {
-        data_.reset(new RaftFlatIndex(
+    if (should_use_cuvs(config_)) {
+        data_.reset(new CuvsFlatIndex(
                 resources_.get(),
                 dims,
                 flatConfig_.useFloat16,
                 config_.memorySpace));
     } else
 #else
-    if (should_use_raft(config_)) {
+    if (should_use_cuvs(config_)) {
         FAISS_THROW_MSG(
-                "RAFT has not been compiled into the current version so it cannot be used.");
+                "cuVS has not been compiled into the current version so it cannot be used.");
     } else
 #endif
     {
diff --git a/faiss/gpu/GpuIndexIVF.cu b/faiss/gpu/GpuIndexIVF.cu
index 846bc197e4..a882d8a3d2 100644
--- a/faiss/gpu/GpuIndexIVF.cu
+++ b/faiss/gpu/GpuIndexIVF.cu
@@ -92,7 +92,7 @@ void GpuIndexIVF::init_() {
         GpuIndexFlatConfig config = ivfConfig_.flatConfig;
         // inherit our same device
         config.device = config_.device;
-        config.use_raft = config_.use_raft;
+        config.use_cuvs = config_.use_cuvs;
 
         if (metric_type == faiss::METRIC_L2) {
             quantizer = new GpuIndexFlatL2(resources_, d, config);
diff --git a/faiss/gpu/GpuIndexIVFFlat.cu b/faiss/gpu/GpuIndexIVFFlat.cu
index 0d8396b193..ceeb2dda76 100644
--- a/faiss/gpu/GpuIndexIVFFlat.cu
+++ b/faiss/gpu/GpuIndexIVFFlat.cu
@@ -15,10 +15,10 @@
 #include <faiss/gpu/utils/CopyUtils.cuh>
 #include <faiss/gpu/utils/Float16.cuh>
 
-#if defined USE_NVIDIA_RAFT
-#include <faiss/gpu/utils/RaftUtils.h>
-#include <faiss/gpu/impl/RaftIVFFlat.cuh>
-#include <raft/neighbors/ivf_flat.cuh>
+#if defined USE_NVIDIA_CUVS
+#include <cuvs/neighbors/ivf_flat.hpp>
+#include <faiss/gpu/utils/CuvsUtils.h>
+#include <faiss/gpu/impl/CuvsIVFFlat.cuh>
 #endif
 
 #include <limits>
@@ -73,8 +73,8 @@ GpuIndexIVFFlat::GpuIndexIVFFlat(
           ivfFlatConfig_(config),
           reserveMemoryVecs_(0) {
     FAISS_THROW_IF_NOT_MSG(
-            !should_use_raft(config),
-            "GpuIndexIVFFlat: RAFT does not support separate coarseQuantizer");
+            !should_use_cuvs(config),
+            "GpuIndexIVFFlat: cuVS does not support separate coarseQuantizer");
     // We could have been passed an already trained coarse quantizer. There is
     // no other quantizer that we need to train, so this is sufficient
     if (this->is_trained) {
@@ -100,9 +100,9 @@ GpuIndexIVFFlat::~GpuIndexIVFFlat() {}
 void GpuIndexIVFFlat::reserveMemory(size_t numVecs) {
     DeviceScope scope(config_.device);
 
-    if (should_use_raft(config_)) {
+    if (should_use_cuvs(config_)) {
         FAISS_THROW_MSG(
-                "Pre-allocation of IVF lists is not supported with RAFT enabled.");
+                "Pre-allocation of IVF lists is not supported with cuVS enabled.");
     }
 
     reserveMemoryVecs_ = numVecs;
@@ -120,8 +120,8 @@ void GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) {
     // Clear out our old data
     index_.reset();
 
-    // skip base class allocations if RAFT is enabled
-    if (!should_use_raft(config_)) {
+    // skip base class allocations if cuVS is enabled
+    if (!should_use_cuvs(config_)) {
         baseIndex_.reset();
     }
 
@@ -213,12 +213,12 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
 
     if (this->is_trained) {
         FAISS_ASSERT(index_);
-        if (should_use_raft(config_)) {
-            // if RAFT is enabled, copy the IVF centroids to the RAFT index in
-            // case it has been reset. This is because reset clears the RAFT
-            // index and its centroids.
+        if (should_use_cuvs(config_)) {
+            // copy the IVF centroids to the cuVS index
+            // in case it has been reset. This is because `reset` clears the
+            // cuVS index and its centroids.
             // TODO: change this once the coarse quantizer is separated from
-            // RAFT index
+            // cuVS index
             updateQuantizer();
         };
         return;
@@ -226,8 +226,8 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
 
     FAISS_ASSERT(!index_);
 
-    if (should_use_raft(config_)) {
-#if defined USE_NVIDIA_RAFT
+    if (should_use_cuvs(config_)) {
+#if defined USE_NVIDIA_CUVS
         setIndex_(
                 resources_.get(),
                 this->d,
@@ -242,30 +242,43 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
         const raft::device_resources& raft_handle =
                 resources_->getRaftHandleCurrentDevice();
 
-        raft::neighbors::ivf_flat::index_params raft_idx_params;
-        raft_idx_params.n_lists = nlist;
-        raft_idx_params.metric = metricFaissToRaft(metric_type, false);
-        raft_idx_params.add_data_on_build = false;
-        raft_idx_params.kmeans_trainset_fraction =
+        cuvs::neighbors::ivf_flat::index_params cuvs_index_params;
+        cuvs_index_params.n_lists = nlist;
+        cuvs_index_params.metric = metricFaissToCuvs(metric_type, false);
+        cuvs_index_params.add_data_on_build = false;
+        cuvs_index_params.kmeans_trainset_fraction =
                 static_cast<double>(cp.max_points_per_centroid * nlist) /
                 static_cast<double>(n);
-        raft_idx_params.kmeans_n_iters = cp.niter;
-
-        auto raftIndex_ =
-                std::static_pointer_cast<RaftIVFFlat, IVFFlat>(index_);
-
-        raft::neighbors::ivf_flat::index<float, idx_t> raft_ivfflat_index =
-                raft::neighbors::ivf_flat::build<float, idx_t>(
-                        raft_handle, raft_idx_params, x, n, (idx_t)d);
+        cuvs_index_params.kmeans_n_iters = cp.niter;
+
+        auto cuvsIndex_ =
+                std::static_pointer_cast<CuvsIVFFlat, IVFFlat>(index_);
+
+        std::optional<cuvs::neighbors::ivf_flat::index<float, idx_t>>
+                cuvs_ivfflat_index;
+
+        if (getDeviceForAddress(x) >= 0) {
+            auto dataset_d =
+                    raft::make_device_matrix_view<const float, idx_t>(x, n, d);
+            cuvs_ivfflat_index = cuvs::neighbors::ivf_flat::build(
+                    raft_handle, cuvs_index_params, dataset_d);
+        } else {
+            auto dataset_h =
+                    raft::make_host_matrix_view<const float, idx_t>(x, n, d);
+            cuvs_ivfflat_index = cuvs::neighbors::ivf_flat::build(
+                    raft_handle, cuvs_index_params, dataset_h);
+        }
 
-        quantizer->train(nlist, raft_ivfflat_index.centers().data_handle());
-        quantizer->add(nlist, raft_ivfflat_index.centers().data_handle());
+        quantizer->train(
+                nlist, cuvs_ivfflat_index.value().centers().data_handle());
+        quantizer->add(
+                nlist, cuvs_ivfflat_index.value().centers().data_handle());
         raft_handle.sync_stream();
 
-        raftIndex_->setRaftIndex(std::move(raft_ivfflat_index));
+        cuvsIndex_->setCuvsIndex(std::move(*cuvs_ivfflat_index));
 #else
         FAISS_THROW_MSG(
-                "RAFT has not been compiled into the current version so it cannot be used.");
+                "cuVS has not been compiled into the current version so it cannot be used.");
 #endif
     } else {
         // FIXME: GPUize more of this
@@ -295,9 +308,9 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) {
     baseIndex_ = std::static_pointer_cast<IVFBase, IVFFlat>(index_);
 
     if (reserveMemoryVecs_) {
-        if (should_use_raft(config_)) {
+        if (should_use_cuvs(config_)) {
             FAISS_THROW_MSG(
-                    "Pre-allocation of IVF lists is not supported with RAFT enabled.");
+                    "Pre-allocation of IVF lists is not supported with cuVS enabled.");
         } else
             index_->reserveMemory(reserveMemoryVecs_);
     }
@@ -317,16 +330,16 @@ void GpuIndexIVFFlat::setIndex_(
         bool interleavedLayout,
         IndicesOptions indicesOptions,
         MemorySpace space) {
-    if (should_use_raft(config_)) {
-#if defined USE_NVIDIA_RAFT
+    if (should_use_cuvs(config_)) {
+#if defined USE_NVIDIA_CUVS
         FAISS_THROW_IF_NOT_MSG(
                 ivfFlatConfig_.indicesOptions == INDICES_64_BIT,
-                "RAFT only supports INDICES_64_BIT");
+                "cuVS only supports INDICES_64_BIT");
         if (!ivfFlatConfig_.interleavedLayout) {
             fprintf(stderr,
-                    "WARN: interleavedLayout is set to False with RAFT enabled. This will be ignored.\n");
+                    "WARN: interleavedLayout is set to False with cuVS enabled. This will be ignored.\n");
         }
-        index_.reset(new RaftIVFFlat(
+        index_.reset(new CuvsIVFFlat(
                 resources,
                 dim,
                 nlist,
@@ -339,7 +352,7 @@ void GpuIndexIVFFlat::setIndex_(
                 space));
 #else
         FAISS_THROW_MSG(
-                "RAFT has not been compiled into the current version so it cannot be used.");
+                "cuVS has not been compiled into the current version so it cannot be used.");
 #endif
     } else {
         index_.reset(new IVFFlat(
diff --git a/faiss/gpu/GpuIndexIVFPQ.cu b/faiss/gpu/GpuIndexIVFPQ.cu
index 27cc7ecc9e..da0e5ac8f3 100644
--- a/faiss/gpu/GpuIndexIVFPQ.cu
+++ b/faiss/gpu/GpuIndexIVFPQ.cu
@@ -15,11 +15,10 @@
 #include <faiss/gpu/impl/IVFPQ.cuh>
 #include <faiss/gpu/utils/CopyUtils.cuh>
 
-#if defined USE_NVIDIA_RAFT
-#include <faiss/gpu/utils/RaftUtils.h>
-#include <faiss/gpu/impl/RaftIVFPQ.cuh>
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/ivf_pq_helpers.cuh>
+#if defined USE_NVIDIA_CUVS
+#include <cuvs/neighbors/ivf_pq.hpp>
+#include <faiss/gpu/utils/CuvsUtils.h>
+#include <faiss/gpu/impl/CuvsIVFPQ.cuh>
 #endif
 
 #include <limits>
@@ -95,8 +94,8 @@ GpuIndexIVFPQ::GpuIndexIVFPQ(
     this->is_trained = false;
 
     FAISS_THROW_IF_NOT_MSG(
-            !config.use_raft,
-            "GpuIndexIVFPQ: RAFT does not support separate coarseQuantizer");
+            !config.use_cuvs,
+            "GpuIndexIVFPQ: cuVS does not support separate coarseQuantizer");
 
     verifyPQSettings_();
 }
@@ -112,8 +111,8 @@ void GpuIndexIVFPQ::copyFrom(const faiss::IndexIVFPQ* index) {
     // Clear out our old data
     index_.reset();
 
-    // skip base class allocations if RAFT is enabled
-    if (!should_use_raft(config_)) {
+    // skip base class allocations if cuVS is enabled
+    if (!should_use_cuvs(config_)) {
         baseIndex_.reset();
     }
 
@@ -323,7 +322,7 @@ void GpuIndexIVFPQ::trainResidualQuantizer_(idx_t n, const float* x) {
         try {
             GpuIndexFlatConfig config;
             config.device = ivfpqConfig_.device;
-            config.use_raft = false;
+            config.use_cuvs = false;
             GpuIndexFlatL2 pqIndex(resources_, pq.dsub, config);
 
             pq.assign_index = &pqIndex;
@@ -349,12 +348,12 @@ void GpuIndexIVFPQ::train(idx_t n, const float* x) {
 
     if (this->is_trained) {
         FAISS_ASSERT(index_);
-        if (should_use_raft(config_)) {
-            // if RAFT is enabled, copy the IVF centroids to the RAFT index in
-            // case it has been reset. This is because reset clears the RAFT
+        if (should_use_cuvs(config_)) {
+            // if cuVS is enabled, copy the IVF centroids to the cuVS index in
+            // case it has been reset. This is because reset clears the cuVS
             // index and its centroids.
             // TODO: change this once the coarse quantizer is separated from
-            // RAFT index
+            // cuVS index
             updateQuantizer();
         };
         return;
@@ -362,13 +361,13 @@ void GpuIndexIVFPQ::train(idx_t n, const float* x) {
 
     FAISS_ASSERT(!index_);
 
-    // RAFT does not support using an external index for assignment. Fall back
+    // cuVS does not support using an external index for assignment. Fall back
     // to the classical GPU impl
-    if (should_use_raft(config_)) {
-#if defined USE_NVIDIA_RAFT
+    if (should_use_cuvs(config_)) {
+#if defined USE_NVIDIA_CUVS
         if (pq.assign_index) {
             fprintf(stderr,
-                    "WARN: The Product Quantizer's assign_index will be ignored with RAFT enabled.\n");
+                    "WARN: The Product Quantizer's assign_index will be ignored with cuVS enabled.\n");
         }
         // first initialize the index. The PQ centroids will be updated
         // retroactively.
@@ -390,44 +389,54 @@ void GpuIndexIVFPQ::train(idx_t n, const float* x) {
         const raft::device_resources& raft_handle =
                 resources_->getRaftHandleCurrentDevice();
 
-        raft::neighbors::ivf_pq::index_params raft_idx_params;
-        raft_idx_params.n_lists = nlist;
-        raft_idx_params.metric = metricFaissToRaft(metric_type, false);
-        raft_idx_params.kmeans_trainset_fraction =
+        cuvs::neighbors::ivf_pq::index_params cuvs_index_params;
+        cuvs_index_params.n_lists = nlist;
+        cuvs_index_params.metric = metricFaissToCuvs(metric_type, false);
+        cuvs_index_params.kmeans_trainset_fraction =
                 static_cast<double>(cp.max_points_per_centroid * nlist) /
                 static_cast<double>(n);
-        raft_idx_params.kmeans_n_iters = cp.niter;
-        raft_idx_params.pq_bits = bitsPerCode_;
-        raft_idx_params.pq_dim = subQuantizers_;
-        raft_idx_params.conservative_memory_allocation = false;
-        raft_idx_params.add_data_on_build = false;
+        cuvs_index_params.kmeans_n_iters = cp.niter;
+        cuvs_index_params.pq_bits = bitsPerCode_;
+        cuvs_index_params.pq_dim = subQuantizers_;
+        cuvs_index_params.conservative_memory_allocation = false;
+        cuvs_index_params.add_data_on_build = false;
 
-        auto raftIndex_ = std::static_pointer_cast<RaftIVFPQ, IVFPQ>(index_);
+        auto cuvsIndex_ = std::static_pointer_cast<CuvsIVFPQ, IVFPQ>(index_);
 
-        raft::neighbors::ivf_pq::index<idx_t> raft_ivfpq_index =
-                raft::neighbors::ivf_pq::build<float, idx_t>(
-                        raft_handle, raft_idx_params, x, n, (idx_t)d);
+        std::optional<cuvs::neighbors::ivf_pq::index<idx_t>> cuvs_ivfpq_index;
 
-        auto raft_centers = raft::make_device_matrix<float>(
+        if (getDeviceForAddress(x) >= 0) {
+            auto dataset_d =
+                    raft::make_device_matrix_view<const float, idx_t>(x, n, d);
+            cuvs_ivfpq_index = cuvs::neighbors::ivf_pq::build(
+                    raft_handle, cuvs_index_params, dataset_d);
+        } else {
+            auto dataset_h =
+                    raft::make_host_matrix_view<const float, idx_t>(x, n, d);
+            cuvs_ivfpq_index = cuvs::neighbors::ivf_pq::build(
+                    raft_handle, cuvs_index_params, dataset_h);
+        }
+
+        auto cluster_centers = raft::make_device_matrix<float>(
                 raft_handle,
-                raft_ivfpq_index.n_lists(),
-                raft_ivfpq_index.dim());
-        raft::neighbors::ivf_pq::helpers::extract_centers(
-                raft_handle, raft_ivfpq_index, raft_centers.view());
+                cuvs_ivfpq_index.value().n_lists(),
+                cuvs_ivfpq_index.value().dim());
+        cuvs::neighbors::ivf_pq::helpers::extract_centers(
+                raft_handle, cuvs_ivfpq_index.value(), cluster_centers.view());
 
-        quantizer->train(nlist, raft_centers.data_handle());
-        quantizer->add(nlist, raft_centers.data_handle());
+        quantizer->train(nlist, cluster_centers.data_handle());
+        quantizer->add(nlist, cluster_centers.data_handle());
 
         raft::copy(
                 pq.get_centroids(0, 0),
-                raft_ivfpq_index.pq_centers().data_handle(),
-                raft_ivfpq_index.pq_centers().size(),
+                cuvs_ivfpq_index.value().pq_centers().data_handle(),
+                cuvs_ivfpq_index.value().pq_centers().size(),
                 raft_handle.get_stream());
         raft_handle.sync_stream();
-        raftIndex_->setRaftIndex(std::move(raft_ivfpq_index));
+        cuvsIndex_->setCuvsIndex(std::move(*cuvs_ivfpq_index));
 #else
         FAISS_THROW_MSG(
-                "RAFT has not been compiled into the current version so it cannot be used.");
+                "cuVS has not been compiled into the current version so it cannot be used.");
 #endif
     } else {
         // FIXME: GPUize more of this
@@ -484,9 +493,9 @@ void GpuIndexIVFPQ::setIndex_(
         float* pqCentroidData,
         IndicesOptions indicesOptions,
         MemorySpace space) {
-    if (should_use_raft(config_)) {
-#if defined USE_NVIDIA_RAFT
-        index_.reset(new RaftIVFPQ(
+    if (should_use_cuvs(config_)) {
+#if defined USE_NVIDIA_CUVS
+        index_.reset(new CuvsIVFPQ(
                 resources,
                 dim,
                 nlist,
@@ -502,7 +511,7 @@ void GpuIndexIVFPQ::setIndex_(
                 space));
 #else
         FAISS_THROW_MSG(
-                "RAFT has not been compiled into the current version so it cannot be used.");
+                "cuVS has not been compiled into the current version so it cannot be used.");
 #endif
     } else {
         index_.reset(new IVFPQ(
@@ -529,10 +538,10 @@ void GpuIndexIVFPQ::verifyPQSettings_() const {
     FAISS_THROW_IF_NOT_MSG(nlist > 0, "nlist must be >0");
 
     // up to a single byte per code
-    if (should_use_raft(config_)) {
+    if (should_use_cuvs(config_)) {
         if (!ivfpqConfig_.interleavedLayout) {
             fprintf(stderr,
-                    "WARN: interleavedLayout is set to False with RAFT enabled. This will be ignored.\n");
+                    "WARN: interleavedLayout is set to False with cuVS enabled. This will be ignored.\n");
         }
         FAISS_THROW_IF_NOT_FMT(
                 bitsPerCode_ >= 4 && bitsPerCode_ <= 8,
@@ -567,7 +576,7 @@ void GpuIndexIVFPQ::verifyPQSettings_() const {
             "is not supported",
             subQuantizers_);
 
-    if (!should_use_raft(config_)) {
+    if (!should_use_cuvs(config_)) {
         // Sub-quantizers must evenly divide dimensions available
         FAISS_THROW_IF_NOT_FMT(
                 this->d % subQuantizers_ == 0,
diff --git a/faiss/gpu/GpuIndexIVFPQ.h b/faiss/gpu/GpuIndexIVFPQ.h
index 06b0cad274..072a0d81d5 100644
--- a/faiss/gpu/GpuIndexIVFPQ.h
+++ b/faiss/gpu/GpuIndexIVFPQ.h
@@ -34,7 +34,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig {
 
     /// Use the alternative memory layout for the IVF lists
     /// WARNING: this is a feature under development, and is only supported with
-    /// RAFT enabled for the index. Do not use if RAFT is not enabled.
+    /// cuVS enabled for the index. Do not use if cuVS is not enabled.
     bool interleavedLayout = false;
 
     /// Use GEMM-backed computation of PQ code distances for the no precomputed
diff --git a/faiss/gpu/GpuResources.cpp b/faiss/gpu/GpuResources.cpp
index 7cb5905315..1f0f2541f1 100644
--- a/faiss/gpu/GpuResources.cpp
+++ b/faiss/gpu/GpuResources.cpp
@@ -169,7 +169,7 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() {
     return getDefaultStream(getCurrentDevice());
 }
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 raft::device_resources& GpuResources::getRaftHandleCurrentDevice() {
     return getRaftHandle(getCurrentDevice());
 }
diff --git a/faiss/gpu/GpuResources.h b/faiss/gpu/GpuResources.h
index 8edd8f3d81..3fec634fef 100644
--- a/faiss/gpu/GpuResources.h
+++ b/faiss/gpu/GpuResources.h
@@ -31,7 +31,7 @@
 #include <utility>
 #include <vector>
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 #include <raft/core/device_resources.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 #endif
@@ -162,7 +162,7 @@ struct AllocRequest : public AllocInfo {
     /// The size in bytes of the allocation
     size_t size = 0;
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     rmm::mr::device_memory_resource* mr = nullptr;
 #endif
 };
@@ -212,7 +212,7 @@ class GpuResources {
     /// given device
     virtual cudaStream_t getDefaultStream(int device) = 0;
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     /// Returns the raft handle for the given device which can be used to
     /// make calls to other raft primitives.
     virtual raft::device_resources& getRaftHandle(int device) = 0;
diff --git a/faiss/gpu/StandardGpuResources.cpp b/faiss/gpu/StandardGpuResources.cpp
index 4aad6ea1f1..a91c7f693c 100644
--- a/faiss/gpu/StandardGpuResources.cpp
+++ b/faiss/gpu/StandardGpuResources.cpp
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
  * limitations under the License.
  */
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 #include <raft/core/device_resources.hpp>
 #include <rmm/mr/device/managed_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
@@ -91,7 +91,7 @@ std::string allocsToString(const std::unordered_map<void*, AllocRequest>& map) {
 
 StandardGpuResourcesImpl::StandardGpuResourcesImpl()
         :
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
           mmr_(new rmm::mr::managed_memory_resource),
           pmr_(new rmm::mr::pinned_memory_resource),
 #endif
@@ -130,6 +130,10 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
     FAISS_ASSERT_MSG(
             !allocError, "GPU memory allocations not properly cleaned up");
 
+#if defined USE_NVIDIA_CUVS
+    raftHandles_.clear();
+#endif
+
     for (auto& entry : defaultStreams_) {
         DeviceScope scope(entry.first);
 
@@ -159,7 +163,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() {
     }
 
     if (pinnedMemAlloc_) {
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
         pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_);
 #else
         auto err = cudaFreeHost(pinnedMemAlloc_);
@@ -258,12 +262,12 @@ void StandardGpuResourcesImpl::setDefaultStream(
         if (prevStream != stream) {
             streamWait({stream}, {prevStream});
         }
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
         // delete the raft handle for this device, which will be initialized
         // with the updated stream during any subsequent calls to getRaftHandle
         auto it2 = raftHandles_.find(device);
         if (it2 != raftHandles_.end()) {
-            raftHandles_.erase(it2);
+            raft::resource::set_cuda_stream(it2->second, stream);
         }
 #endif
     }
@@ -283,15 +287,25 @@ void StandardGpuResourcesImpl::revertDefaultStream(int device) {
             cudaStream_t newStream = defaultStreams_[device];
 
             streamWait({newStream}, {prevStream});
-        }
-#if defined USE_NVIDIA_RAFT
-        // delete the raft handle for this device, which will be initialized
-        // with the updated stream during any subsequent calls to getRaftHandle
-        auto it2 = raftHandles_.find(device);
-        if (it2 != raftHandles_.end()) {
-            raftHandles_.erase(it2);
-        }
+
+#if defined USE_NVIDIA_CUVS
+            // update the stream on the raft handle for this device
+            auto it2 = raftHandles_.find(device);
+            if (it2 != raftHandles_.end()) {
+                raft::resource::set_cuda_stream(it2->second, newStream);
+            }
 #endif
+        } else {
+#if defined USE_NVIDIA_CUVS
+            // delete the raft handle for this device, which will be initialized
+            // with the updated stream during any subsequent calls to
+            // getRaftHandle
+            auto it2 = raftHandles_.find(device);
+            if (it2 != raftHandles_.end()) {
+                raftHandles_.erase(it2);
+            }
+#endif
+        }
     }
 
     userDefaultStreams_.erase(device);
@@ -324,7 +338,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
     // If this is the first device that we're initializing, create our
     // pinned memory allocation
     if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
         // If this is the first device that we're initializing, create our
         // pinned memory allocation
         if (defaultStreams_.empty() && pinnedMemSize_ > 0) {
@@ -386,7 +400,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) {
 
     defaultStreams_[device] = defaultStream;
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     raftHandles_.emplace(std::make_pair(device, defaultStream));
 #endif
 
@@ -452,7 +466,7 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) {
     return defaultStreams_[device];
 }
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) {
     initializeForDevice(device);
 
@@ -523,7 +537,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
         // Otherwise, we can handle this locally
         p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size);
     } else if (adjReq.space == MemorySpace::Device) {
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
         try {
             rmm::mr::device_memory_resource* current_mr =
                     rmm::mr::get_per_device_resource(
@@ -557,7 +571,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) {
         }
 #endif
     } else if (adjReq.space == MemorySpace::Unified) {
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
         try {
             // for now, use our own managed MR to do Unified Memory allocations.
             // TODO: change this to use the current device resource once RMM has
@@ -626,7 +640,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) {
     } else if (
             req.space == MemorySpace::Device ||
             req.space == MemorySpace::Unified) {
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
         req.mr->deallocate_async(p, req.size, req.stream);
 #else
         auto err = cudaFree(p);
@@ -720,7 +734,7 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) {
     return res_->getDefaultStream(device);
 }
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 raft::device_resources& StandardGpuResources::getRaftHandle(int device) {
     return res_->getRaftHandle(device);
 }
diff --git a/faiss/gpu/StandardGpuResources.h b/faiss/gpu/StandardGpuResources.h
index 7a48948663..322a341a00 100644
--- a/faiss/gpu/StandardGpuResources.h
+++ b/faiss/gpu/StandardGpuResources.h
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@
 
 #pragma once
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 #include <raft/core/device_resources.hpp>
 #include <rmm/mr/host/pinned_memory_resource.hpp>
 #endif
@@ -80,7 +80,7 @@ class StandardGpuResourcesImpl : public GpuResources {
     /// this stream upon exit from an index or other Faiss GPU call.
     cudaStream_t getDefaultStream(int device) override;
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     /// Returns the raft handle for the given device which can be used to
     /// make calls to other raft primitives.
     raft::device_resources& getRaftHandle(int device) override;
@@ -152,7 +152,7 @@ class StandardGpuResourcesImpl : public GpuResources {
     /// cuBLAS handle for each device
     std::unordered_map<int, cublasHandle_t> blasHandles_;
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     /// raft handle for each device
     std::unordered_map<int, raft::device_resources> raftHandles_;
 
@@ -235,7 +235,7 @@ class StandardGpuResources : public GpuResourcesProvider {
     /// Returns the current default stream
     cudaStream_t getDefaultStream(int device);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     /// Returns the raft handle for the given device which can be used to
     /// make calls to other raft primitives.
     raft::device_resources& getRaftHandle(int device);
diff --git a/faiss/gpu/impl/RaftCagra.cu b/faiss/gpu/impl/CuvsCagra.cu
similarity index 53%
rename from faiss/gpu/impl/RaftCagra.cu
rename to faiss/gpu/impl/CuvsCagra.cu
index e0f302ae04..82e3007d59 100644
--- a/faiss/gpu/impl/RaftCagra.cu
+++ b/faiss/gpu/impl/CuvsCagra.cu
@@ -21,74 +21,65 @@
  * limitations under the License.
  */
 
+#include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/CuvsUtils.h>
 #include <faiss/gpu/utils/DeviceUtils.h>
-#include <cstddef>
-#include <cstdint>
-#include <faiss/gpu/impl/RaftCagra.cuh>
+#include <faiss/gpu/impl/CuvsCagra.cuh>
 
+#include <cuvs/neighbors/cagra.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/resource/thrust_policy.hpp>
-#include <raft_runtime/neighbors/cagra.hpp>
-#include <optional>
-#include <raft/neighbors/cagra.cuh>
 
 namespace faiss {
 namespace gpu {
 
-RaftCagra::RaftCagra(
+CuvsCagra::CuvsCagra(
         GpuResources* resources,
         int dim,
         idx_t intermediate_graph_degree,
         idx_t graph_degree,
         faiss::cagra_build_algo graph_build_algo,
         size_t nn_descent_niter,
+        bool store_dataset,
         faiss::MetricType metric,
         float metricArg,
         IndicesOptions indicesOptions,
-        std::optional<raft::neighbors::ivf_pq::index_params> ivf_pq_params,
-        std::optional<raft::neighbors::ivf_pq::search_params>
-                ivf_pq_search_params)
+        std::optional<cuvs::neighbors::ivf_pq::index_params> ivf_pq_params,
+        std::optional<cuvs::neighbors::ivf_pq::search_params>
+                ivf_pq_search_params,
+        float refine_rate)
         : resources_(resources),
           dim_(dim),
+          graph_build_algo_(graph_build_algo),
+          nn_descent_niter_(nn_descent_niter),
+          store_dataset_(store_dataset),
           metric_(metric),
           metricArg_(metricArg),
           index_params_(),
           ivf_pq_params_(ivf_pq_params),
-          ivf_pq_search_params_(ivf_pq_search_params) {
+          ivf_pq_search_params_(ivf_pq_search_params),
+          refine_rate_(refine_rate) {
     FAISS_THROW_IF_NOT_MSG(
             metric == faiss::METRIC_L2 || metric == faiss::METRIC_INNER_PRODUCT,
             "CAGRA currently only supports L2 or Inner Product metric.");
     FAISS_THROW_IF_NOT_MSG(
             indicesOptions == faiss::gpu::INDICES_64_BIT,
-            "only INDICES_64_BIT is supported for RAFT CAGRA index");
+            "only INDICES_64_BIT is supported for cuVS CAGRA index");
 
     index_params_.intermediate_graph_degree = intermediate_graph_degree;
     index_params_.graph_degree = graph_degree;
-    index_params_.build_algo =
-            static_cast<raft::neighbors::cagra::graph_build_algo>(
-                    graph_build_algo);
-    index_params_.nn_descent_niter = nn_descent_niter;
 
-    if (!ivf_pq_params_) {
-        ivf_pq_params_ =
-                std::make_optional<raft::neighbors::ivf_pq::index_params>();
-    }
     if (!ivf_pq_search_params_) {
         ivf_pq_search_params_ =
-                std::make_optional<raft::neighbors::ivf_pq::search_params>();
+                std::make_optional<cuvs::neighbors::ivf_pq::search_params>();
     }
-    index_params_.metric = metric_ == faiss::METRIC_L2
-            ? raft::distance::DistanceType::L2Expanded
-            : raft::distance::DistanceType::InnerProduct;
-    ivf_pq_params_->metric = metric_ == faiss::METRIC_L2
-            ? raft::distance::DistanceType::L2Expanded
-            : raft::distance::DistanceType::InnerProduct;
+    index_params_.metric = metricFaissToCuvs(metric_, false);
 
     reset();
 }
 
-RaftCagra::RaftCagra(
+CuvsCagra::CuvsCagra(
         GpuResources* resources,
         int dim,
         idx_t n,
@@ -107,19 +98,22 @@ RaftCagra::RaftCagra(
             "CAGRA currently only supports L2 or Inner Product metric.");
     FAISS_THROW_IF_NOT_MSG(
             indicesOptions == faiss::gpu::INDICES_64_BIT,
-            "only INDICES_64_BIT is supported for RAFT CAGRA index");
+            "only INDICES_64_BIT is supported for cuVS CAGRA index");
 
     auto distances_on_gpu = getDeviceForAddress(distances) >= 0;
     auto knn_graph_on_gpu = getDeviceForAddress(knn_graph) >= 0;
 
     FAISS_ASSERT(distances_on_gpu == knn_graph_on_gpu);
 
+    storage_ = distances;
+    n_ = n;
+
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
 
     if (distances_on_gpu && knn_graph_on_gpu) {
         raft_handle.sync_stream();
-        // Copying to host so that raft::neighbors::cagra::index
+        // Copying to host so that cuvs::neighbors::cagra::index
         // creates an owning copy of the knn graph on device
         auto knn_graph_copy =
                 raft::make_host_matrix<uint32_t, int64_t>(n, graph_degree);
@@ -132,11 +126,10 @@ RaftCagra::RaftCagra(
                 raft::make_device_matrix_view<const float, int64_t>(
                         distances, n, dim);
 
-        raft_knn_index = raft::neighbors::cagra::index<float, uint32_t>(
+        cuvs_index = std::make_shared<
+                cuvs::neighbors::cagra::index<float, uint32_t>>(
                 raft_handle,
-                metric_ == faiss::METRIC_L2
-                        ? raft::distance::DistanceType::L2Expanded
-                        : raft::distance::DistanceType::InnerProduct,
+                metricFaissToCuvs(metric_, false),
                 distances_mds,
                 raft::make_const_mdspan(knn_graph_copy.view()));
     } else if (!distances_on_gpu && !knn_graph_on_gpu) {
@@ -151,11 +144,10 @@ RaftCagra::RaftCagra(
         auto distances_mds = raft::make_host_matrix_view<const float, int64_t>(
                 distances, n, dim);
 
-        raft_knn_index = raft::neighbors::cagra::index<float, uint32_t>(
+        cuvs_index = std::make_shared<
+                cuvs::neighbors::cagra::index<float, uint32_t>>(
                 raft_handle,
-                metric_ == faiss::METRIC_L2
-                        ? raft::distance::DistanceType::L2Expanded
-                        : raft::distance::DistanceType::InnerProduct,
+                metricFaissToCuvs(metric_, false),
                 distances_mds,
                 raft::make_const_mdspan(knn_graph_copy.view()));
     } else {
@@ -164,86 +156,56 @@ RaftCagra::RaftCagra(
     }
 }
 
-void RaftCagra::train(idx_t n, const float* x) {
+void CuvsCagra::train(idx_t n, const float* x) {
+    storage_ = x;
+    n_ = n;
+
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
-    if (index_params_.build_algo ==
-        raft::neighbors::cagra::graph_build_algo::IVF_PQ) {
-        std::optional<raft::host_matrix<uint32_t, int64_t>> knn_graph(
-                raft::make_host_matrix<uint32_t, int64_t>(
-                        n, index_params_.intermediate_graph_degree));
-        if (getDeviceForAddress(x) >= 0) {
-            auto dataset_d =
-                    raft::make_device_matrix_view<const float, int64_t>(
-                            x, n, dim_);
-            raft::neighbors::cagra::build_knn_graph(
-                    raft_handle,
-                    dataset_d,
-                    knn_graph->view(),
-                    1.0f,
-                    ivf_pq_params_,
-                    ivf_pq_search_params_);
-        } else {
-            auto dataset_h = raft::make_host_matrix_view<const float, int64_t>(
-                    x, n, dim_);
-            raft::neighbors::cagra::build_knn_graph(
-                    raft_handle,
-                    dataset_h,
-                    knn_graph->view(),
-                    1.0f,
-                    ivf_pq_params_,
-                    ivf_pq_search_params_);
-        }
-        auto cagra_graph = raft::make_host_matrix<uint32_t, int64_t>(
-                n, index_params_.graph_degree);
-
-        raft::neighbors::cagra::optimize<uint32_t>(
-                raft_handle, knn_graph->view(), cagra_graph.view());
-
-        // free intermediate graph before trying to create the index
-        knn_graph.reset();
-
-        if (getDeviceForAddress(x) >= 0) {
-            auto dataset_d =
-                    raft::make_device_matrix_view<const float, int64_t>(
-                            x, n, dim_);
-            raft_knn_index = raft::neighbors::cagra::index<float, uint32_t>(
-                    raft_handle,
-                    metric_ == faiss::METRIC_L2
-                            ? raft::distance::DistanceType::L2Expanded
-                            : raft::distance::DistanceType::InnerProduct,
-                    dataset_d,
-                    raft::make_const_mdspan(cagra_graph.view()));
-        } else {
-            auto dataset_h = raft::make_host_matrix_view<const float, int64_t>(
-                    x, n, dim_);
-            raft_knn_index = raft::neighbors::cagra::index<float, uint32_t>(
-                    raft_handle,
-                    metric_ == faiss::METRIC_L2
-                            ? raft::distance::DistanceType::L2Expanded
-                            : raft::distance::DistanceType::InnerProduct,
-                    dataset_h,
-                    raft::make_const_mdspan(cagra_graph.view()));
+
+    if (!ivf_pq_params_) {
+        ivf_pq_params_ = cuvs::neighbors::ivf_pq::index_params::from_dataset(
+                raft::make_extents<uint32_t>(
+                        static_cast<uint32_t>(n_), static_cast<uint32_t>(dim_)),
+                metricFaissToCuvs(metric_, false));
+    }
+    if (graph_build_algo_ == faiss::cagra_build_algo::IVF_PQ) {
+        cuvs::neighbors::cagra::graph_build_params::ivf_pq_params
+                graph_build_params;
+        graph_build_params.build_params = ivf_pq_params_.value();
+        graph_build_params.search_params = ivf_pq_search_params_.value();
+        graph_build_params.refinement_rate = refine_rate_.value();
+        index_params_.graph_build_params = graph_build_params;
+        if (index_params_.graph_degree ==
+            index_params_.intermediate_graph_degree) {
+            index_params_.intermediate_graph_degree =
+                    1.5 * index_params_.graph_degree;
         }
+    } else {
+        cuvs::neighbors::cagra::graph_build_params::nn_descent_params
+                graph_build_params(index_params_.intermediate_graph_degree);
+        graph_build_params.max_iterations = nn_descent_niter_;
+        index_params_.graph_build_params = graph_build_params;
+    }
 
+    if (getDeviceForAddress(x) >= 0) {
+        auto dataset =
+                raft::make_device_matrix_view<const float, int64_t>(x, n, dim_);
+        cuvs_index = std::make_shared<
+                cuvs::neighbors::cagra::index<float, uint32_t>>(
+                cuvs::neighbors::cagra::build(
+                        raft_handle, index_params_, dataset));
     } else {
-        if (getDeviceForAddress(x) >= 0) {
-            raft_knn_index = raft::runtime::neighbors::cagra::build(
-                    raft_handle,
-                    index_params_,
-                    raft::make_device_matrix_view<const float, int64_t>(
-                            x, n, dim_));
-        } else {
-            raft_knn_index = raft::runtime::neighbors::cagra::build(
-                    raft_handle,
-                    index_params_,
-                    raft::make_host_matrix_view<const float, int64_t>(
-                            x, n, dim_));
-        }
+        auto dataset =
+                raft::make_host_matrix_view<const float, int64_t>(x, n, dim_);
+        cuvs_index = std::make_shared<
+                cuvs::neighbors::cagra::index<float, uint32_t>>(
+                cuvs::neighbors::cagra::build(
+                        raft_handle, index_params_, dataset));
     }
 }
 
-void RaftCagra::search(
+void CuvsCagra::search(
         Tensor<float, 2, true>& queries,
         int k,
         Tensor<float, 2, true>& outDistances,
@@ -267,10 +229,22 @@ void RaftCagra::search(
     idx_t cols = queries.getSize(1);
     idx_t k_ = k;
 
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index);
     FAISS_ASSERT(numQueries > 0);
     FAISS_ASSERT(cols == dim_);
 
+    if (!store_dataset_) {
+        if (getDeviceForAddress(storage_) >= 0) {
+            auto dataset = raft::make_device_matrix_view<const float, int64_t>(
+                    storage_, n_, dim_);
+            cuvs_index->update_dataset(raft_handle, dataset);
+        } else {
+            auto dataset = raft::make_host_matrix_view<const float, int64_t>(
+                    storage_, n_, dim_);
+            cuvs_index->update_dataset(raft_handle, dataset);
+        }
+    }
+
     auto queries_view = raft::make_device_matrix_view<const float, int64_t>(
             queries.data(), numQueries, cols);
     auto distances_view = raft::make_device_matrix_view<float, int64_t>(
@@ -278,18 +252,18 @@ void RaftCagra::search(
     auto indices_view = raft::make_device_matrix_view<idx_t, int64_t>(
             outIndices.data(), numQueries, k_);
 
-    raft::neighbors::cagra::search_params search_pams;
+    cuvs::neighbors::cagra::search_params search_pams;
     search_pams.max_queries = max_queries;
     search_pams.itopk_size = itopk_size;
     search_pams.max_iterations = max_iterations;
     search_pams.algo =
-            static_cast<raft::neighbors::cagra::search_algo>(graph_search_algo);
+            static_cast<cuvs::neighbors::cagra::search_algo>(graph_search_algo);
     search_pams.team_size = team_size;
     search_pams.search_width = search_width;
     search_pams.min_iterations = min_iterations;
     search_pams.thread_block_size = thread_block_size;
     search_pams.hashmap_mode =
-            static_cast<raft::neighbors::cagra::hash_mode>(hash_mode);
+            static_cast<cuvs::neighbors::cagra::hash_mode>(hash_mode);
     search_pams.hashmap_min_bitlen = hashmap_min_bitlen;
     search_pams.hashmap_max_fill_rate = hashmap_max_fill_rate;
     search_pams.num_random_samplings = num_random_samplings;
@@ -298,10 +272,10 @@ void RaftCagra::search(
     auto indices_copy = raft::make_device_matrix<uint32_t, int64_t>(
             raft_handle, numQueries, k_);
 
-    raft::runtime::neighbors::cagra::search(
+    cuvs::neighbors::cagra::search(
             raft_handle,
             search_pams,
-            raft_knn_index.value(),
+            *cuvs_index,
             queries_view,
             indices_copy.view(),
             distances_view);
@@ -312,22 +286,22 @@ void RaftCagra::search(
             indices_view.data_handle());
 }
 
-void RaftCagra::reset() {
-    raft_knn_index.reset();
+void CuvsCagra::reset() {
+    cuvs_index.reset();
 }
 
-idx_t RaftCagra::get_knngraph_degree() const {
-    FAISS_ASSERT(raft_knn_index.has_value());
-    return static_cast<idx_t>(raft_knn_index.value().graph_degree());
+idx_t CuvsCagra::get_knngraph_degree() const {
+    FAISS_ASSERT(cuvs_index);
+    return static_cast<idx_t>(cuvs_index->graph_degree());
 }
 
-std::vector<idx_t> RaftCagra::get_knngraph() const {
-    FAISS_ASSERT(raft_knn_index.has_value());
+std::vector<idx_t> CuvsCagra::get_knngraph() const {
+    FAISS_ASSERT(cuvs_index);
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
     auto stream = raft_handle.get_stream();
 
-    auto device_graph = raft_knn_index.value().graph();
+    auto device_graph = cuvs_index->graph();
 
     std::vector<idx_t> host_graph(
             device_graph.extent(0) * device_graph.extent(1));
@@ -343,29 +317,8 @@ std::vector<idx_t> RaftCagra::get_knngraph() const {
     return host_graph;
 }
 
-std::vector<float> RaftCagra::get_training_dataset() const {
-    FAISS_ASSERT(raft_knn_index.has_value());
-    const raft::device_resources& raft_handle =
-            resources_->getRaftHandleCurrentDevice();
-    auto stream = raft_handle.get_stream();
-
-    auto device_dataset = raft_knn_index.value().dataset();
-
-    std::vector<float> host_dataset(
-            device_dataset.extent(0) * device_dataset.extent(1));
-
-    RAFT_CUDA_TRY(cudaMemcpy2DAsync(
-            host_dataset.data(),
-            sizeof(float) * dim_,
-            device_dataset.data_handle(),
-            sizeof(float) * device_dataset.stride(0),
-            sizeof(float) * dim_,
-            device_dataset.extent(0),
-            cudaMemcpyDefault,
-            raft_handle.get_stream()));
-    raft_handle.sync_stream();
-
-    return host_dataset;
+const float* CuvsCagra::get_training_dataset() const {
+    return storage_;
 }
 
 } // namespace gpu
diff --git a/faiss/gpu/impl/RaftCagra.cuh b/faiss/gpu/impl/CuvsCagra.cuh
similarity index 72%
rename from faiss/gpu/impl/RaftCagra.cuh
rename to faiss/gpu/impl/CuvsCagra.cuh
index 39d7d75765..c466aceec4 100644
--- a/faiss/gpu/impl/RaftCagra.cuh
+++ b/faiss/gpu/impl/CuvsCagra.cuh
@@ -31,8 +31,8 @@
 
 #include <faiss/MetricType.h>
 
-#include <raft/neighbors/cagra_types.hpp>
-#include <raft/neighbors/ivf_pq_types.hpp>
+#include <cuvs/neighbors/cagra.hpp>
+#include <cuvs/neighbors/ivf_pq.hpp>
 
 namespace faiss {
 
@@ -45,24 +45,26 @@ enum class cagra_hash_mode { HASH, SMALL, AUTO };
 
 namespace gpu {
 
-class RaftCagra {
+class CuvsCagra {
    public:
-    RaftCagra(
+    CuvsCagra(
             GpuResources* resources,
             int dim,
             idx_t intermediate_graph_degree,
             idx_t graph_degree,
             faiss::cagra_build_algo graph_build_algo,
             size_t nn_descent_niter,
+            bool store_dataset,
             faiss::MetricType metric,
             float metricArg,
             IndicesOptions indicesOptions,
-            std::optional<raft::neighbors::ivf_pq::index_params> ivf_pq_params =
+            std::optional<cuvs::neighbors::ivf_pq::index_params> ivf_pq_params =
                     std::nullopt,
-            std::optional<raft::neighbors::ivf_pq::search_params>
-                    ivf_pq_search_params = std::nullopt);
+            std::optional<cuvs::neighbors::ivf_pq::search_params>
+                    ivf_pq_search_params = std::nullopt,
+            float refine_rate = 2.0f);
 
-    RaftCagra(
+    CuvsCagra(
             GpuResources* resources,
             int dim,
             idx_t n,
@@ -73,7 +75,7 @@ class RaftCagra {
             float metricArg,
             IndicesOptions indicesOptions);
 
-    ~RaftCagra() = default;
+    ~CuvsCagra() = default;
 
     void train(idx_t n, const float* x);
 
@@ -102,31 +104,44 @@ class RaftCagra {
 
     std::vector<idx_t> get_knngraph() const;
 
-    std::vector<float> get_training_dataset() const;
+    const float* get_training_dataset() const;
 
    private:
     /// Collection of GPU resources that we use
     GpuResources* resources_;
 
+    /// Training dataset
+    const float* storage_;
+    int n_;
+
     /// Expected dimensionality of the vectors
     const int dim_;
 
+    /// Controls the underlying cuVS index if it should store the dataset in
+    /// device memory
+    bool store_dataset_;
+
     /// Metric type of the index
     faiss::MetricType metric_;
 
     /// Metric arg
     float metricArg_;
 
-    /// Parameters to build RAFT CAGRA index
-    raft::neighbors::cagra::index_params index_params_;
+    /// Parameters to build cuVS CAGRA index
+    faiss::cagra_build_algo graph_build_algo_;
+    cuvs::neighbors::cagra::index_params index_params_;
 
     /// Parameters to build CAGRA graph using IVF PQ
-    std::optional<raft::neighbors::ivf_pq::index_params> ivf_pq_params_;
-    std::optional<raft::neighbors::ivf_pq::search_params> ivf_pq_search_params_;
+    std::optional<cuvs::neighbors::ivf_pq::index_params> ivf_pq_params_;
+    std::optional<cuvs::neighbors::ivf_pq::search_params> ivf_pq_search_params_;
+    std::optional<float> refine_rate_;
+
+    /// Parameters to build CAGRA graph using NN Descent
+    size_t nn_descent_niter_ = 20;
 
-    /// Instance of trained RAFT CAGRA index
-    std::optional<raft::neighbors::cagra::index<float, uint32_t>>
-            raft_knn_index{std::nullopt};
+    /// Instance of trained cuVS CAGRA index
+    std::shared_ptr<cuvs::neighbors::cagra::index<float, uint32_t>> cuvs_index{
+            nullptr};
 };
 
 } // namespace gpu
diff --git a/faiss/gpu/impl/RaftFlatIndex.cu b/faiss/gpu/impl/CuvsFlatIndex.cu
similarity index 86%
rename from faiss/gpu/impl/RaftFlatIndex.cu
rename to faiss/gpu/impl/CuvsFlatIndex.cu
index c06ca62cb5..08f63300ef 100644
--- a/faiss/gpu/impl/RaftFlatIndex.cu
+++ b/faiss/gpu/impl/CuvsFlatIndex.cu
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,33 +21,33 @@
  * limitations under the License.
  */
 
-#include <faiss/gpu/utils/RaftUtils.h>
-#include <faiss/gpu/impl/RaftFlatIndex.cuh>
+#include <faiss/gpu/utils/CuvsUtils.h>
+#include <faiss/gpu/impl/CuvsFlatIndex.cuh>
 #include <faiss/gpu/utils/ConversionOperators.cuh>
 
+#include <optional>
 #include <vector>
 
+#include <cuvs/neighbors/brute_force.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/logger.hpp>
 #include <raft/distance/distance_types.hpp>
-#include <raft/neighbors/brute_force.cuh>
-
-#define RAFT_NAME "raft"
+#include <raft/linalg/unary_op.cuh>
 
 namespace faiss {
 namespace gpu {
 
-using namespace raft::distance;
-using namespace raft::neighbors;
+using namespace cuvs::distance;
+using namespace cuvs::neighbors;
 
-RaftFlatIndex::RaftFlatIndex(
+CuvsFlatIndex::CuvsFlatIndex(
         GpuResources* res,
         int dim,
         bool useFloat16,
         MemorySpace space)
         : FlatIndex(res, dim, useFloat16, space) {}
 
-void RaftFlatIndex::query(
+void CuvsFlatIndex::query(
         Tensor<float, 2, true>& input,
         int k,
         faiss::MetricType metric,
@@ -56,7 +56,7 @@ void RaftFlatIndex::query(
         Tensor<idx_t, 2, true>& outIndices,
         bool exactDistance) {
     /**
-     * RAFT doesn't yet support half-precision in bfknn.
+     * cuVS doesn't yet support half-precision in bfknn.
      * Use FlatIndex for float16 for now
      */
     if (useFloat16_) {
@@ -92,16 +92,16 @@ void RaftFlatIndex::query(
                 outDistances.getSize(0),
                 outDistances.getSize(1));
 
-        DistanceType distance = metricFaissToRaft(metric, exactDistance);
+        cuvsDistanceType distance = metricFaissToCuvs(metric, exactDistance);
 
         std::optional<raft::device_vector_view<const float, int64_t>>
                 norms_view = raft::make_device_vector_view(
                         norms_.data(), norms_.getSize(0));
 
-        raft::neighbors::brute_force::index idx(
+        cuvs::neighbors::brute_force::index idx(
                 handle, index, norms_view, distance, metricArg);
-        raft::neighbors::brute_force::search<float, int64_t>(
-                handle, idx, search, inds, dists);
+        cuvs::neighbors::brute_force::search(
+                handle, idx, search, inds, dists, std::nullopt);
 
         if (metric == MetricType::METRIC_Lp) {
             raft::linalg::unary_op(
@@ -121,7 +121,7 @@ void RaftFlatIndex::query(
     }
 }
 
-void RaftFlatIndex::query(
+void CuvsFlatIndex::query(
         Tensor<half, 2, true>& vecs,
         int k,
         faiss::MetricType metric,
diff --git a/faiss/gpu/impl/RaftFlatIndex.cuh b/faiss/gpu/impl/CuvsFlatIndex.cuh
similarity index 95%
rename from faiss/gpu/impl/RaftFlatIndex.cuh
rename to faiss/gpu/impl/CuvsFlatIndex.cuh
index 9d20fade61..b856351cfa 100644
--- a/faiss/gpu/impl/RaftFlatIndex.cuh
+++ b/faiss/gpu/impl/CuvsFlatIndex.cuh
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -40,9 +40,9 @@ class GpuResources;
 /// the vectors in float32.
 /// If float16, we store the vectors in both float16 and float32, where float32
 /// data is possibly needed for certain residual operations
-class RaftFlatIndex : public FlatIndex {
+class CuvsFlatIndex : public FlatIndex {
    public:
-    RaftFlatIndex(
+    CuvsFlatIndex(
             GpuResources* res,
             int dim,
             bool useFloat16,
diff --git a/faiss/gpu/impl/RaftIVFFlat.cu b/faiss/gpu/impl/CuvsIVFFlat.cu
similarity index 76%
rename from faiss/gpu/impl/RaftIVFFlat.cu
rename to faiss/gpu/impl/CuvsIVFFlat.cu
index 181cf94968..0de7100c72 100644
--- a/faiss/gpu/impl/RaftIVFFlat.cu
+++ b/faiss/gpu/impl/CuvsIVFFlat.cu
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,15 +25,17 @@
 #include <cstdint>
 
 #include <faiss/gpu/GpuIndexFlat.h>
-#include <faiss/gpu/utils/RaftUtils.h>
+#include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/CuvsUtils.h>
+#include <faiss/gpu/impl/CuvsIVFFlat.cuh>
 #include <faiss/gpu/impl/FlatIndex.cuh>
 #include <faiss/gpu/impl/IVFFlat.cuh>
-#include <faiss/gpu/impl/RaftIVFFlat.cuh>
 #include <faiss/gpu/utils/Transpose.cuh>
 
-#include <raft/neighbors/ivf_flat_codepacker.hpp>
-#include <raft/neighbors/ivf_flat.cuh>
-#include <raft/neighbors/ivf_flat_helpers.cuh>
+#include <cuvs/neighbors/common.hpp>
+#include <cuvs/neighbors/ivf_flat.hpp>
+#include <raft/linalg/map.cuh>
+#include <raft/linalg/norm.cuh>
 
 #include <limits>
 #include <memory>
@@ -41,7 +43,7 @@
 namespace faiss {
 namespace gpu {
 
-RaftIVFFlat::RaftIVFFlat(
+CuvsIVFFlat::CuvsIVFFlat(
         GpuResources* res,
         int dim,
         int nlist,
@@ -60,32 +62,34 @@ RaftIVFFlat::RaftIVFFlat(
                   useResidual,
                   scalarQ,
                   interleavedLayout,
-                  // skip ptr allocations in base class (handled by RAFT
+                  // skip ptr allocations in base class (handled by cuVS
                   // internally)
                   indicesOptions,
                   space) {
     FAISS_THROW_IF_NOT_MSG(
             indicesOptions == INDICES_64_BIT,
-            "only INDICES_64_BIT is supported for RAFT index");
+            "only INDICES_64_BIT is supported for cuVS index");
 }
 
-RaftIVFFlat::~RaftIVFFlat() {}
+CuvsIVFFlat::~CuvsIVFFlat() {}
 
-void RaftIVFFlat::reserveMemory(idx_t numVecs) {
+void CuvsIVFFlat::reserveMemory(idx_t numVecs) {
     fprintf(stderr,
-            "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with RAFT enabled.\n");
+            "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with cuVS enabled.\n");
 }
 
-void RaftIVFFlat::reset() {
-    raft_knn_index.reset();
+void CuvsIVFFlat::reset() {
+    cuvs_index.reset();
 }
 
-void RaftIVFFlat::setRaftIndex(
-        raft::neighbors::ivf_flat::index<float, idx_t>&& idx) {
-    raft_knn_index.emplace(std::move(idx));
+void CuvsIVFFlat::setCuvsIndex(
+        cuvs::neighbors::ivf_flat::index<float, idx_t>&& idx) {
+    cuvs_index =
+            std::make_shared<cuvs::neighbors::ivf_flat::index<float, idx_t>>(
+                    std::move(idx));
 }
 
-void RaftIVFFlat::search(
+void CuvsIVFFlat::search(
         Index* coarseQuantizer,
         Tensor<float, 2, true>& queries,
         int nprobe,
@@ -93,7 +97,7 @@ void RaftIVFFlat::search(
         Tensor<float, 2, true>& outDistances,
         Tensor<idx_t, 2, true>& outIndices) {
     /// NB: The coarse quantizer is ignored here. The user is assumed to have
-    /// called updateQuantizer() to modify the RAFT index if the quantizer was
+    /// called updateQuantizer() to modify the cuVS index if the quantizer was
     /// modified externally
 
     uint32_t numQueries = queries.getSize(0);
@@ -101,14 +105,14 @@ void RaftIVFFlat::search(
     uint32_t k_ = k;
 
     // Device is already set in GpuIndex::search
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index != nullptr);
     FAISS_ASSERT(numQueries > 0);
     FAISS_ASSERT(cols == dim_);
     FAISS_THROW_IF_NOT(nprobe > 0 && nprobe <= numLists_);
 
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
-    raft::neighbors::ivf_flat::search_params pams;
+    cuvs::neighbors::ivf_flat::search_params pams;
     pams.n_probes = nprobe;
 
     auto queries_view = raft::make_device_matrix_view<const float, idx_t>(
@@ -118,10 +122,10 @@ void RaftIVFFlat::search(
     auto out_dists_view = raft::make_device_matrix_view<float, idx_t>(
             outDistances.data(), (idx_t)numQueries, (idx_t)k_);
 
-    raft::neighbors::ivf_flat::search<float, idx_t>(
+    cuvs::neighbors::ivf_flat::search(
             raft_handle,
             pams,
-            raft_knn_index.value(),
+            *cuvs_index,
             queries_view,
             out_inds_view,
             out_dists_view);
@@ -158,15 +162,15 @@ void RaftIVFFlat::search(
             });
 }
 
-idx_t RaftIVFFlat::addVectors(
+idx_t CuvsIVFFlat::addVectors(
         Index* coarseQuantizer,
         Tensor<float, 2, true>& vecs,
         Tensor<idx_t, 1, true>& indices) {
     /// NB: The coarse quantizer is ignored here. The user is assumed to have
-    /// called updateQuantizer() to update the RAFT index if the quantizer was
+    /// called updateQuantizer() to update the cuVS index if the quantizer was
     /// modified externally
 
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index != nullptr);
 
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
@@ -174,27 +178,27 @@ idx_t RaftIVFFlat::addVectors(
     /// Remove rows containing NaNs
     idx_t n_rows_valid = inplaceGatherFilteredRows(resources_, vecs, indices);
 
-    raft_knn_index.emplace(raft::neighbors::ivf_flat::extend(
+    cuvs::neighbors::ivf_flat::extend(
             raft_handle,
             raft::make_device_matrix_view<const float, idx_t>(
                     vecs.data(), n_rows_valid, dim_),
             std::make_optional<raft::device_vector_view<const idx_t, idx_t>>(
                     raft::make_device_vector_view<const idx_t, idx_t>(
                             indices.data(), n_rows_valid)),
-            raft_knn_index.value()));
+            cuvs_index.get());
 
     return n_rows_valid;
 }
 
-idx_t RaftIVFFlat::getListLength(idx_t listId) const {
-    FAISS_ASSERT(raft_knn_index.has_value());
+idx_t CuvsIVFFlat::getListLength(idx_t listId) const {
+    FAISS_ASSERT(cuvs_index != nullptr);
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
 
     uint32_t size;
     raft::update_host(
             &size,
-            raft_knn_index.value().list_sizes().data_handle() + listId,
+            cuvs_index->list_sizes().data_handle() + listId,
             1,
             raft_handle.get_stream());
     raft_handle.sync_stream();
@@ -203,8 +207,8 @@ idx_t RaftIVFFlat::getListLength(idx_t listId) const {
 }
 
 /// Return the list indices of a particular list back to the CPU
-std::vector<idx_t> RaftIVFFlat::getListIndices(idx_t listId) const {
-    FAISS_ASSERT(raft_knn_index.has_value());
+std::vector<idx_t> CuvsIVFFlat::getListIndices(idx_t listId) const {
+    FAISS_ASSERT(cuvs_index != nullptr);
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
     auto stream = raft_handle.get_stream();
@@ -218,9 +222,7 @@ std::vector<idx_t> RaftIVFFlat::getListIndices(idx_t listId) const {
 
     raft::update_host(
             &list_indices_ptr,
-            const_cast<idx_t**>(
-                    raft_knn_index.value().inds_ptrs().data_handle()) +
-                    listId,
+            const_cast<idx_t**>(cuvs_index->inds_ptrs().data_handle()) + listId,
             1,
             stream);
     raft_handle.sync_stream();
@@ -232,13 +234,13 @@ std::vector<idx_t> RaftIVFFlat::getListIndices(idx_t listId) const {
 }
 
 /// Return the encoded vectors of a particular list back to the CPU
-std::vector<uint8_t> RaftIVFFlat::getListVectorData(
+std::vector<uint8_t> CuvsIVFFlat::getListVectorData(
         idx_t listId,
         bool gpuFormat) const {
     if (gpuFormat) {
-        FAISS_THROW_MSG("gpuFormat should be false for RAFT indices");
+        FAISS_THROW_MSG("gpuFormat should be false for cuVS indices");
     }
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index != nullptr);
 
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
@@ -259,7 +261,7 @@ std::vector<uint8_t> RaftIVFFlat::getListVectorData(
     // fetch the list data ptr on host
     raft::update_host(
             &list_data_ptr,
-            raft_knn_index.value().data_ptrs().data_handle() + listId,
+            cuvs_index->data_ptrs().data_handle() + listId,
             1,
             stream);
     raft_handle.sync_stream();
@@ -271,15 +273,15 @@ std::vector<uint8_t> RaftIVFFlat::getListVectorData(
             stream);
     raft_handle.sync_stream();
 
-    RaftIVFFlatCodePackerInterleaved packer(
-            (size_t)listSize, dim_, raft_knn_index.value().veclen());
+    CuvsIVFFlatCodePackerInterleaved packer(
+            (size_t)listSize, dim_, cuvs_index->veclen());
     packer.unpack_all(interleaved_codes.data(), flat_codes.data());
     return flat_codes;
 }
 
 /// Performs search when we are already given the IVF cells to look at
 /// (GpuIndexIVF::search_preassigned implementation)
-void RaftIVFFlat::searchPreassigned(
+void CuvsIVFFlat::searchPreassigned(
         Index* coarseQuantizer,
         Tensor<float, 2, true>& vecs,
         Tensor<float, 2, true>& ivfDistances,
@@ -291,7 +293,7 @@ void RaftIVFFlat::searchPreassigned(
     // TODO: Fill this in!
 }
 
-void RaftIVFFlat::updateQuantizer(Index* quantizer) {
+void CuvsIVFFlat::updateQuantizer(Index* quantizer) {
     FAISS_THROW_IF_NOT(quantizer->is_trained);
 
     // Must match our basic IVF parameters
@@ -304,14 +306,15 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) {
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
 
-    raft::neighbors::ivf_flat::index_params pams;
+    cuvs::neighbors::ivf_flat::index_params pams;
     pams.add_data_on_build = false;
-    pams.metric = metricFaissToRaft(metric_, false);
+    pams.metric = metricFaissToCuvs(metric_, false);
     pams.n_lists = numLists_;
-    raft_knn_index.emplace(raft_handle, pams, static_cast<uint32_t>(dim_));
-
-    raft::neighbors::ivf_flat::helpers::reset_index(
-            raft_handle, &raft_knn_index.value());
+    cuvs_index =
+            std::make_shared<cuvs::neighbors::ivf_flat::index<float, idx_t>>(
+                    raft_handle, pams, static_cast<uint32_t>(dim_));
+    cuvs::neighbors::ivf_flat::helpers::reset_index(
+            raft_handle, cuvs_index.get());
 
     // If the index instance is a GpuIndexFlat, then we can use direct access to
     // the centroids within.
@@ -330,7 +333,7 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) {
             gpuData->reconstruct(0, gpuData->getSize(), centroids);
 
             raft::update_device(
-                    raft_knn_index.value().centers().data_handle(),
+                    cuvs_index->centers().data_handle(),
                     centroids.data(),
                     total_elems,
                     stream);
@@ -339,7 +342,7 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) {
             auto centroids = gpuData->getVectorsFloat32Ref();
 
             raft::update_device(
-                    raft_knn_index.value().centers().data_handle(),
+                    cuvs_index->centers().data_handle(),
                     centroids.data(),
                     total_elems,
                     stream);
@@ -352,14 +355,14 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) {
         quantizer->reconstruct_n(0, quantizer->ntotal, vecs.data());
 
         raft::update_device(
-                raft_knn_index.value().centers().data_handle(),
+                cuvs_index->centers().data_handle(),
                 vecs.data(),
                 total_elems,
                 stream);
     }
 }
 
-void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) {
+void CuvsIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) {
     size_t nlist = ivf ? ivf->nlist : 0;
     size_t ntotal = ivf ? ivf->compute_ntotal() : 0;
 
@@ -370,12 +373,12 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) {
     std::vector<idx_t> indices_(ntotal);
 
     // the index must already exist
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index != nullptr);
 
-    auto& raft_lists = raft_knn_index.value().lists();
+    auto& cuvs_index_lists = cuvs_index->lists();
 
     // conservative memory alloc for cloning cpu inverted lists
-    raft::neighbors::ivf_flat::list_spec<uint32_t, float, idx_t> raft_list_spec{
+    cuvs::neighbors::ivf_flat::list_spec<uint32_t, float, idx_t> ivf_list_spec{
             static_cast<uint32_t>(dim_), true};
 
     for (size_t i = 0; i < nlist; ++i) {
@@ -392,20 +395,20 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) {
         // store the list size
         list_sizes_[i] = static_cast<uint32_t>(listSize);
 
-        // This RAFT list must currently be empty
+        // This cuVS list must currently be empty
         FAISS_ASSERT(getListLength(i) == 0);
 
-        raft::neighbors::ivf::resize_list(
+        cuvs::neighbors::ivf::resize_list(
                 raft_handle,
-                raft_lists[i],
-                raft_list_spec,
+                cuvs_index_lists[i],
+                ivf_list_spec,
                 (uint32_t)listSize,
                 (uint32_t)0);
     }
 
     // Update the pointers and the sizes
-    raft::neighbors::ivf_flat::helpers::recompute_internal_state(
-            raft_handle, &(raft_knn_index.value()));
+    cuvs::neighbors::ivf_flat::helpers::recompute_internal_state(
+            raft_handle, cuvs_index.get());
 
     for (size_t i = 0; i < nlist; ++i) {
         size_t listSize = ivf->list_size(i);
@@ -414,18 +417,18 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) {
     }
 
     raft::update_device(
-            raft_knn_index.value().list_sizes().data_handle(),
+            cuvs_index->list_sizes().data_handle(),
             list_sizes_.data(),
             nlist,
             raft_handle.get_stream());
 
     // Precompute the centers vector norms for L2Expanded distance
     if (this->metric_ == faiss::METRIC_L2) {
-        raft_knn_index.value().allocate_center_norms(raft_handle);
+        cuvs_index->allocate_center_norms(raft_handle);
         raft::linalg::rowNorm(
-                raft_knn_index.value().center_norms().value().data_handle(),
-                raft_knn_index.value().centers().data_handle(),
-                raft_knn_index.value().dim(),
+                cuvs_index->center_norms().value().data_handle(),
+                cuvs_index->centers().data_handle(),
+                cuvs_index->dim(),
                 (uint32_t)nlist,
                 raft::linalg::L2Norm,
                 true,
@@ -433,7 +436,7 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) {
     }
 }
 
-size_t RaftIVFFlat::getGpuVectorsEncodingSize_(idx_t numVecs) const {
+size_t CuvsIVFFlat::getGpuVectorsEncodingSize_(idx_t numVecs) const {
     idx_t bits = 32 /* float */;
 
     // bytes to encode a block of 32 vectors (single dimension)
@@ -444,13 +447,13 @@ size_t RaftIVFFlat::getGpuVectorsEncodingSize_(idx_t numVecs) const {
 
     // number of blocks of 32 vectors we have
     idx_t numBlocks =
-            utils::divUp(numVecs, raft::neighbors::ivf_flat::kIndexGroupSize);
+            utils::divUp(numVecs, cuvs::neighbors::ivf_flat::kIndexGroupSize);
 
     // total size to encode numVecs
     return bytesPerBlock * numBlocks;
 }
 
-void RaftIVFFlat::addEncodedVectorsToList_(
+void CuvsIVFFlat::addEncodedVectorsToList_(
         idx_t listId,
         const void* codes,
         const idx_t* indices,
@@ -470,8 +473,8 @@ void RaftIVFFlat::addEncodedVectorsToList_(
     FAISS_ASSERT(gpuListSizeInBytes <= (size_t)std::numeric_limits<int>::max());
 
     std::vector<uint8_t> interleaved_codes(gpuListSizeInBytes);
-    RaftIVFFlatCodePackerInterleaved packer(
-            (size_t)numVecs, (uint32_t)dim_, raft_knn_index.value().veclen());
+    CuvsIVFFlatCodePackerInterleaved packer(
+            (size_t)numVecs, (uint32_t)dim_, cuvs_index->veclen());
 
     packer.pack_all(
             reinterpret_cast<const uint8_t*>(codes), interleaved_codes.data());
@@ -483,7 +486,7 @@ void RaftIVFFlat::addEncodedVectorsToList_(
     /// fetch the list data ptr on host
     raft::update_host(
             &list_data_ptr,
-            raft_knn_index.value().data_ptrs().data_handle() + listId,
+            cuvs_index->data_ptrs().data_handle() + listId,
             1,
             stream);
     raft_handle.sync_stream();
@@ -500,7 +503,7 @@ void RaftIVFFlat::addEncodedVectorsToList_(
     // fetch the list indices ptr on host
     raft::update_host(
             &list_indices_ptr,
-            raft_knn_index.value().inds_ptrs().data_handle() + listId,
+            cuvs_index->inds_ptrs().data_handle() + listId,
             1,
             stream);
     raft_handle.sync_stream();
@@ -508,7 +511,7 @@ void RaftIVFFlat::addEncodedVectorsToList_(
     raft::update_device(list_indices_ptr, indices, numVecs, stream);
 }
 
-RaftIVFFlatCodePackerInterleaved::RaftIVFFlatCodePackerInterleaved(
+CuvsIVFFlatCodePackerInterleaved::CuvsIVFFlatCodePackerInterleaved(
         size_t list_size,
         uint32_t dim,
         uint32_t chunk_size) {
@@ -519,28 +522,28 @@ RaftIVFFlatCodePackerInterleaved::RaftIVFFlatCodePackerInterleaved(
     nvec = list_size;
     code_size = dim * 4;
     block_size =
-            utils::roundUp(nvec, raft::neighbors::ivf_flat::kIndexGroupSize);
+            utils::roundUp(nvec, cuvs::neighbors::ivf_flat::kIndexGroupSize);
 }
 
-void RaftIVFFlatCodePackerInterleaved::pack_1(
+void CuvsIVFFlatCodePackerInterleaved::pack_1(
         const uint8_t* flat_code,
         size_t offset,
         uint8_t* block) const {
-    raft::neighbors::ivf_flat::codepacker::pack_1(
-            reinterpret_cast<const uint32_t*>(flat_code),
-            reinterpret_cast<uint32_t*>(block),
+    cuvs::neighbors::ivf_flat::helpers::codepacker::pack_1(
+            reinterpret_cast<const float*>(flat_code),
+            reinterpret_cast<float*>(block),
             dim,
             chunk_size,
             static_cast<uint32_t>(offset));
 }
 
-void RaftIVFFlatCodePackerInterleaved::unpack_1(
+void CuvsIVFFlatCodePackerInterleaved::unpack_1(
         const uint8_t* block,
         size_t offset,
         uint8_t* flat_code) const {
-    raft::neighbors::ivf_flat::codepacker::unpack_1(
-            reinterpret_cast<const uint32_t*>(block),
-            reinterpret_cast<uint32_t*>(flat_code),
+    cuvs::neighbors::ivf_flat::helpers::codepacker::unpack_1(
+            reinterpret_cast<const float*>(block),
+            reinterpret_cast<float*>(flat_code),
             dim,
             chunk_size,
             static_cast<uint32_t>(offset));
diff --git a/faiss/gpu/impl/RaftIVFFlat.cuh b/faiss/gpu/impl/CuvsIVFFlat.cuh
similarity index 89%
rename from faiss/gpu/impl/RaftIVFFlat.cuh
rename to faiss/gpu/impl/CuvsIVFFlat.cuh
index a2dcef6ce5..72764c8446 100644
--- a/faiss/gpu/impl/RaftIVFFlat.cuh
+++ b/faiss/gpu/impl/CuvsIVFFlat.cuh
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@
 #include <faiss/gpu/impl/GpuScalarQuantizer.cuh>
 #include <faiss/gpu/impl/IVFFlat.cuh>
 
-#include <raft/neighbors/ivf_flat.cuh>
+#include <cuvs/neighbors/ivf_flat.hpp>
 
 #include <optional>
 
@@ -35,9 +35,9 @@
 namespace faiss {
 namespace gpu {
 
-class RaftIVFFlat : public IVFFlat {
+class CuvsIVFFlat : public IVFFlat {
    public:
-    RaftIVFFlat(
+    CuvsIVFFlat(
             GpuResources* resources,
             int dim,
             int nlist,
@@ -50,7 +50,7 @@ class RaftIVFFlat : public IVFFlat {
             IndicesOptions indicesOptions,
             MemorySpace space);
 
-    ~RaftIVFFlat() override;
+    ~CuvsIVFFlat() override;
 
     /// Reserve GPU memory in our inverted lists for this number of vectors
     void reserveMemory(idx_t numVecs) override;
@@ -86,7 +86,7 @@ class RaftIVFFlat : public IVFFlat {
             Tensor<float, 2, true>& vecs,
             Tensor<idx_t, 1, true>& indices) override;
 
-    /// Clear out the Raft index
+    /// Clear out the cuVS index
     void reset() override;
 
     /// For debugging purposes, return the list length of a particular
@@ -100,15 +100,15 @@ class RaftIVFFlat : public IVFFlat {
     std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat)
             const override;
 
-    /// Update our Raft index with this quantizer instance; may be a CPU
+    /// Update our cuVS index with this quantizer instance; may be a CPU
     /// or GPU quantizer
     void updateQuantizer(Index* quantizer) override;
 
     /// Copy all inverted lists from a CPU representation to ourselves
     void copyInvertedListsFrom(const InvertedLists* ivf) override;
 
-    /// Replace the Raft index
-    void setRaftIndex(raft::neighbors::ivf_flat::index<float, idx_t>&& idx);
+    /// Replace the cuVS index
+    void setCuvsIndex(cuvs::neighbors::ivf_flat::index<float, idx_t>&& idx);
 
    private:
     /// Adds a set of codes and indices to a list, with the representation
@@ -127,12 +127,12 @@ class RaftIVFFlat : public IVFFlat {
     /// this is the size for an entire IVF list
     size_t getGpuVectorsEncodingSize_(idx_t numVecs) const override;
 
-    std::optional<raft::neighbors::ivf_flat::index<float, idx_t>>
-            raft_knn_index{std::nullopt};
+    std::shared_ptr<cuvs::neighbors::ivf_flat::index<float, idx_t>> cuvs_index{
+            nullptr};
 };
 
-struct RaftIVFFlatCodePackerInterleaved : CodePacker {
-    RaftIVFFlatCodePackerInterleaved(
+struct CuvsIVFFlatCodePackerInterleaved : CodePacker {
+    CuvsIVFFlatCodePackerInterleaved(
             size_t list_size,
             uint32_t dim,
             uint32_t chuk_size);
diff --git a/faiss/gpu/impl/RaftIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu
similarity index 77%
rename from faiss/gpu/impl/RaftIVFPQ.cu
rename to faiss/gpu/impl/CuvsIVFPQ.cu
index 15f49bbffd..2fc94de0f0 100644
--- a/faiss/gpu/impl/RaftIVFPQ.cu
+++ b/faiss/gpu/impl/CuvsIVFPQ.cu
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,13 +22,15 @@
  */
 
 #include <faiss/gpu/GpuIndexFlat.h>
-#include <faiss/gpu/utils/RaftUtils.h>
+#include <faiss/gpu/StandardGpuResources.h>
+#include <faiss/gpu/utils/CuvsUtils.h>
+#include <faiss/gpu/impl/CuvsIVFPQ.cuh>
 #include <faiss/gpu/impl/FlatIndex.cuh>
-#include <faiss/gpu/impl/RaftIVFPQ.cuh>
 #include <faiss/gpu/utils/Transpose.cuh>
 
-#include <raft/neighbors/ivf_pq.cuh>
-#include <raft/neighbors/ivf_pq_helpers.cuh>
+#include <cuvs/neighbors/common.hpp>
+#include <cuvs/neighbors/ivf_pq.hpp>
+#include <raft/linalg/map.cuh>
 
 #include <limits>
 #include <memory>
@@ -36,7 +38,7 @@
 namespace faiss {
 namespace gpu {
 
-RaftIVFPQ::RaftIVFPQ(
+CuvsIVFPQ::CuvsIVFPQ(
         GpuResources* resources,
         int dim,
         idx_t nlist,
@@ -60,44 +62,44 @@ RaftIVFPQ::RaftIVFPQ(
                 useFloat16LookupTables,
                 useMMCodeDistance,
                 interleavedLayout,
-                // skip ptr allocations in base class (handled by RAFT
+                // skip ptr allocations in base class (handled by cuVS
                 // internally) false,
                 pqCentroidData,
                 indicesOptions,
                 space) {
     FAISS_THROW_IF_NOT_MSG(
             indicesOptions == INDICES_64_BIT,
-            "only INDICES_64_BIT is supported for RAFT index");
+            "only INDICES_64_BIT is supported for cuVS index");
 }
 
-RaftIVFPQ::~RaftIVFPQ() {}
+CuvsIVFPQ::~CuvsIVFPQ() {}
 
-void RaftIVFPQ::reserveMemory(idx_t numVecs) {
+void CuvsIVFPQ::reserveMemory(idx_t numVecs) {
     fprintf(stderr,
-            "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with RAFT enabled.\n");
+            "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with cuVS enabled.\n");
 }
 
-void RaftIVFPQ::reset() {
-    raft_knn_index.reset();
+void CuvsIVFPQ::reset() {
+    cuvs_index.reset();
 }
 
-size_t RaftIVFPQ::reclaimMemory() {
+size_t CuvsIVFPQ::reclaimMemory() {
     fprintf(stderr,
-            "WARN: reclaimMemory is NOP. reclaimMemory is not supported with RAFT enabled.\n");
+            "WARN: reclaimMemory is NOP. reclaimMemory is not supported with cuVS enabled.\n");
     return 0;
 }
 
-void RaftIVFPQ::setPrecomputedCodes(Index* quantizer, bool enable) {}
+void CuvsIVFPQ::setPrecomputedCodes(Index* quantizer, bool enable) {}
 
-idx_t RaftIVFPQ::getListLength(idx_t listId) const {
-    FAISS_ASSERT(raft_knn_index.has_value());
+idx_t CuvsIVFPQ::getListLength(idx_t listId) const {
+    FAISS_ASSERT(cuvs_index);
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
 
     uint32_t size;
     raft::update_host(
             &size,
-            raft_knn_index.value().list_sizes().data_handle() + listId,
+            cuvs_index->list_sizes().data_handle() + listId,
             1,
             raft_handle.get_stream());
     raft_handle.sync_stream();
@@ -105,7 +107,7 @@ idx_t RaftIVFPQ::getListLength(idx_t listId) const {
     return static_cast<int>(size);
 }
 
-void RaftIVFPQ::updateQuantizer(Index* quantizer) {
+void CuvsIVFPQ::updateQuantizer(Index* quantizer) {
     FAISS_THROW_IF_NOT(quantizer->is_trained);
 
     // Must match our basic IVF parameters
@@ -116,18 +118,19 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) {
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
 
-    raft::neighbors::ivf_pq::index_params pams;
-    pams.metric = metricFaissToRaft(metric_, false);
-    pams.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE;
+    cuvs::neighbors::ivf_pq::index_params pams;
+    pams.metric = metricFaissToCuvs(metric_, false);
+    pams.codebook_kind = cuvs::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE;
     pams.n_lists = numLists_;
     pams.pq_bits = bitsPerSubQuantizer_;
     pams.pq_dim = numSubQuantizers_;
-    raft_knn_index.emplace(raft_handle, pams, static_cast<uint32_t>(dim_));
+    cuvs_index = std::make_shared<cuvs::neighbors::ivf_pq::index<idx_t>>(
+            raft_handle, pams, static_cast<uint32_t>(dim_));
 
-    raft::neighbors::ivf_pq::helpers::reset_index(
-            raft_handle, &raft_knn_index.value());
-    raft::neighbors::ivf_pq::helpers::make_rotation_matrix(
-            raft_handle, &(raft_knn_index.value()), false);
+    cuvs::neighbors::ivf_pq::helpers::reset_index(
+            raft_handle, cuvs_index.get());
+    cuvs::neighbors::ivf_pq::helpers::make_rotation_matrix(
+            raft_handle, cuvs_index.get(), false);
 
     // If the index instance is a GpuIndexFlat, then we can use direct access to
     // the centroids within.
@@ -146,9 +149,9 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) {
             // as float32 and store locally
             gpuData->reconstruct(0, gpuData->getSize(), centroids);
 
-            raft::neighbors::ivf_pq::helpers::set_centers(
+            cuvs::neighbors::ivf_pq::helpers::set_centers(
                     raft_handle,
-                    &(raft_knn_index.value()),
+                    cuvs_index.get(),
                     raft::make_device_matrix_view<float, uint32_t>(
                             centroids.data(), numLists_, dim_));
         } else {
@@ -157,9 +160,9 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) {
             // reference it
             auto centroids = gpuData->getVectorsFloat32Ref();
 
-            raft::neighbors::ivf_pq::helpers::set_centers(
+            cuvs::neighbors::ivf_pq::helpers::set_centers(
                     raft_handle,
-                    &(raft_knn_index.value()),
+                    cuvs_index.get(),
                     raft::make_device_matrix_view<float, uint32_t>(
                             centroids.data(), numLists_, dim_));
         }
@@ -177,9 +180,9 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) {
 
         centroids.copyFrom(vecs, stream);
 
-        raft::neighbors::ivf_pq::helpers::set_centers(
+        cuvs::neighbors::ivf_pq::helpers::set_centers(
                 raft_handle,
-                &(raft_knn_index.value()),
+                cuvs_index.get(),
                 raft::make_device_matrix_view<float, uint32_t>(
                         centroids.data(), numLists_, dim_));
     }
@@ -188,8 +191,8 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) {
 }
 
 /// Return the list indices of a particular list back to the CPU
-std::vector<idx_t> RaftIVFPQ::getListIndices(idx_t listId) const {
-    FAISS_ASSERT(raft_knn_index.has_value());
+std::vector<idx_t> CuvsIVFPQ::getListIndices(idx_t listId) const {
+    FAISS_ASSERT(cuvs_index);
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
     auto stream = raft_handle.get_stream();
@@ -203,9 +206,7 @@ std::vector<idx_t> RaftIVFPQ::getListIndices(idx_t listId) const {
 
     raft::update_host(
             &list_indices_ptr,
-            const_cast<idx_t**>(
-                    raft_knn_index.value().inds_ptrs().data_handle()) +
-                    listId,
+            const_cast<idx_t**>(cuvs_index->inds_ptrs().data_handle()) + listId,
             1,
             stream);
     raft_handle.sync_stream();
@@ -218,7 +219,7 @@ std::vector<idx_t> RaftIVFPQ::getListIndices(idx_t listId) const {
 
 /// Performs search when we are already given the IVF cells to look at
 /// (GpuIndexIVF::search_preassigned implementation)
-void RaftIVFPQ::searchPreassigned(
+void CuvsIVFPQ::searchPreassigned(
         Index* coarseQuantizer,
         Tensor<float, 2, true>& vecs,
         Tensor<float, 2, true>& ivfDistances,
@@ -230,19 +231,18 @@ void RaftIVFPQ::searchPreassigned(
     // TODO: Fill this in!
 }
 
-size_t RaftIVFPQ::getGpuListEncodingSize_(idx_t listId) {
-    return static_cast<size_t>(
-            raft_knn_index.value().get_list_size_in_bytes(listId));
+size_t CuvsIVFPQ::getGpuListEncodingSize_(idx_t listId) {
+    return static_cast<size_t>(cuvs_index->get_list_size_in_bytes(listId));
 }
 
 /// Return the encoded vectors of a particular list back to the CPU
-std::vector<uint8_t> RaftIVFPQ::getListVectorData(idx_t listId, bool gpuFormat)
+std::vector<uint8_t> CuvsIVFPQ::getListVectorData(idx_t listId, bool gpuFormat)
         const {
     if (gpuFormat) {
         FAISS_THROW_MSG(
-                "gpuFormat should be false for RAFT indices. Unpacked codes are flat.");
+                "gpuFormat should be false for cuVS indices. Unpacked codes are flat.");
     }
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index);
 
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
@@ -265,13 +265,14 @@ std::vector<uint8_t> RaftIVFPQ::getListVectorData(idx_t listId, bool gpuFormat)
         auto codes_d = raft::make_device_vector<uint8_t>(
                 raft_handle, static_cast<uint32_t>(bufferSize));
 
-        raft::neighbors::ivf_pq::helpers::unpack_contiguous_list_data(
-                raft_handle,
-                raft_knn_index.value(),
-                codes_d.data_handle(),
-                batchSize,
-                listId,
-                offset_b);
+        cuvs::neighbors::ivf_pq::helpers::codepacker::
+                unpack_contiguous_list_data(
+                        raft_handle,
+                        *cuvs_index,
+                        codes_d.data_handle(),
+                        batchSize,
+                        listId,
+                        offset_b);
 
         // Copy the flat PQ codes to host
         raft::update_host(
@@ -287,7 +288,7 @@ std::vector<uint8_t> RaftIVFPQ::getListVectorData(idx_t listId, bool gpuFormat)
 
 /// Find the approximate k nearest neighbors for `queries` against
 /// our database
-void RaftIVFPQ::search(
+void CuvsIVFPQ::search(
         Index* coarseQuantizer,
         Tensor<float, 2, true>& queries,
         int nprobe,
@@ -296,17 +297,17 @@ void RaftIVFPQ::search(
         Tensor<idx_t, 2, true>& outIndices) {
     uint32_t numQueries = queries.getSize(0);
     uint32_t cols = queries.getSize(1);
-    idx_t k_ = std::min(static_cast<idx_t>(k), raft_knn_index.value().size());
+    idx_t k_ = std::min(static_cast<idx_t>(k), cuvs_index->size());
 
     // Device is already set in GpuIndex::search
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index);
     FAISS_ASSERT(numQueries > 0);
     FAISS_ASSERT(cols == dim_);
     FAISS_THROW_IF_NOT(nprobe > 0 && nprobe <= numLists_);
 
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
-    raft::neighbors::ivf_pq::search_params pams;
+    cuvs::neighbors::ivf_pq::search_params pams;
     pams.n_probes = nprobe;
     pams.lut_dtype = useFloat16LookupTables_ ? CUDA_R_16F : CUDA_R_32F;
 
@@ -317,10 +318,10 @@ void RaftIVFPQ::search(
     auto out_dists_view = raft::make_device_matrix_view<float, idx_t>(
             outDistances.data(), (idx_t)numQueries, (idx_t)k_);
 
-    raft::neighbors::ivf_pq::search<float, idx_t>(
+    cuvs::neighbors::ivf_pq::search(
             raft_handle,
             pams,
-            raft_knn_index.value(),
+            *cuvs_index,
             queries_view,
             out_inds_view,
             out_dists_view);
@@ -358,15 +359,15 @@ void RaftIVFPQ::search(
     raft_handle.sync_stream();
 }
 
-idx_t RaftIVFPQ::addVectors(
+idx_t CuvsIVFPQ::addVectors(
         Index* coarseQuantizer,
         Tensor<float, 2, true>& vecs,
         Tensor<idx_t, 1, true>& indices) {
     /// NB: The coarse quantizer is ignored here. The user is assumed to have
-    /// called updateQuantizer() to update the RAFT index if the quantizer was
+    /// called updateQuantizer() to update the cuVS index if the quantizer was
     /// modified externally
 
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index);
 
     const raft::device_resources& raft_handle =
             resources_->getRaftHandleCurrentDevice();
@@ -374,19 +375,18 @@ idx_t RaftIVFPQ::addVectors(
     /// Remove rows containing NaNs
     idx_t n_rows_valid = inplaceGatherFilteredRows(resources_, vecs, indices);
 
-    raft_knn_index.emplace(raft::neighbors::ivf_pq::extend(
+    cuvs::neighbors::ivf_pq::extend(
             raft_handle,
             raft::make_device_matrix_view<const float, idx_t>(
                     vecs.data(), n_rows_valid, dim_),
-            std::make_optional<raft::device_vector_view<const idx_t, idx_t>>(
-                    raft::make_device_vector_view<const idx_t, idx_t>(
-                            indices.data(), n_rows_valid)),
-            raft_knn_index.value()));
+            raft::make_device_vector_view<const idx_t, idx_t>(
+                    indices.data(), n_rows_valid),
+            cuvs_index.get());
 
     return n_rows_valid;
 }
 
-void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
+void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
     size_t nlist = ivf ? ivf->nlist : 0;
     size_t ntotal = ivf ? ivf->compute_ntotal() : 0;
 
@@ -397,12 +397,12 @@ void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
     std::vector<idx_t> indices_(ntotal);
 
     // the index must already exist
-    FAISS_ASSERT(raft_knn_index.has_value());
+    FAISS_ASSERT(cuvs_index);
 
-    auto& raft_lists = raft_knn_index.value().lists();
+    auto& cuvs_index_lists = cuvs_index->lists();
 
     // conservative memory alloc for cloning cpu inverted lists
-    raft::neighbors::ivf_pq::list_spec<uint32_t, idx_t> raft_list_spec{
+    cuvs::neighbors::ivf_pq::list_spec<uint32_t, idx_t> ivf_list_spec{
             static_cast<uint32_t>(bitsPerSubQuantizer_),
             static_cast<uint32_t>(numSubQuantizers_),
             true};
@@ -421,26 +421,26 @@ void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
         // store the list size
         list_sizes_[i] = static_cast<uint32_t>(listSize);
 
-        // This RAFT list must currently be empty
+        // This cuVS list must currently be empty
         FAISS_ASSERT(getListLength(i) == 0);
 
-        raft::neighbors::ivf::resize_list(
+        cuvs::neighbors::ivf::resize_list(
                 raft_handle,
-                raft_lists[i],
-                raft_list_spec,
+                cuvs_index_lists[i],
+                ivf_list_spec,
                 static_cast<uint32_t>(listSize),
                 static_cast<uint32_t>(0));
     }
 
     raft::update_device(
-            raft_knn_index.value().list_sizes().data_handle(),
+            cuvs_index->list_sizes().data_handle(),
             list_sizes_.data(),
             nlist,
             raft_handle.get_stream());
 
     //     Update the pointers and the sizes
-    raft::neighbors::ivf_pq::helpers::recompute_internal_state(
-            raft_handle, &(raft_knn_index.value()));
+    cuvs::neighbors::ivf_pq::helpers::recompute_internal_state(
+            raft_handle, cuvs_index.get());
 
     for (size_t i = 0; i < nlist; ++i) {
         size_t listSize = ivf->list_size(i);
@@ -449,12 +449,13 @@ void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) {
     }
 }
 
-void RaftIVFPQ::setRaftIndex(raft::neighbors::ivf_pq::index<idx_t>&& idx) {
-    raft_knn_index.emplace(std::move(idx));
+void CuvsIVFPQ::setCuvsIndex(cuvs::neighbors::ivf_pq::index<idx_t>&& idx) {
+    cuvs_index = std::make_shared<cuvs::neighbors::ivf_pq::index<idx_t>>(
+            std::move(idx));
     setBasePQCentroids_();
 }
 
-void RaftIVFPQ::addEncodedVectorsToList_(
+void CuvsIVFPQ::addEncodedVectorsToList_(
         idx_t listId,
         const void* codes,
         const idx_t* indices,
@@ -490,9 +491,9 @@ void RaftIVFPQ::addEncodedVectorsToList_(
                 bufferSize,
                 stream);
 
-        raft::neighbors::ivf_pq::helpers::pack_contiguous_list_data(
+        cuvs::neighbors::ivf_pq::helpers::codepacker::pack_contiguous_list_data(
                 raft_handle,
-                &(raft_knn_index.value()),
+                cuvs_index.get(),
                 codes_d.data_handle(),
                 batchSize,
                 listId,
@@ -505,7 +506,7 @@ void RaftIVFPQ::addEncodedVectorsToList_(
     // fetch the list indices ptr on host
     raft::update_host(
             &list_indices_ptr,
-            raft_knn_index.value().inds_ptrs().data_handle() + listId,
+            cuvs_index->inds_ptrs().data_handle() + listId,
             1,
             stream);
     raft_handle.sync_stream();
@@ -513,23 +514,23 @@ void RaftIVFPQ::addEncodedVectorsToList_(
     raft::update_device(list_indices_ptr, indices, numVecs, stream);
 }
 
-void RaftIVFPQ::setPQCentroids_() {
+void CuvsIVFPQ::setPQCentroids_() {
     auto stream = resources_->getDefaultStreamCurrentDevice();
 
     raft::copy(
-            raft_knn_index.value().pq_centers().data_handle(),
+            cuvs_index->pq_centers().data_handle(),
             pqCentroidsInnermostCode_.data(),
             pqCentroidsInnermostCode_.numElements(),
             stream);
 }
 
-void RaftIVFPQ::setBasePQCentroids_() {
+void CuvsIVFPQ::setBasePQCentroids_() {
     auto stream = resources_->getDefaultStreamCurrentDevice();
 
     raft::copy(
             pqCentroidsInnermostCode_.data(),
-            raft_knn_index.value().pq_centers().data_handle(),
-            raft_knn_index.value().pq_centers().size(),
+            cuvs_index->pq_centers().data_handle(),
+            cuvs_index->pq_centers().size(),
             stream);
 
     DeviceTensor<float, 3, true> pqCentroidsMiddleCode(
diff --git a/faiss/gpu/impl/RaftIVFPQ.cuh b/faiss/gpu/impl/CuvsIVFPQ.cuh
similarity index 88%
rename from faiss/gpu/impl/RaftIVFPQ.cuh
rename to faiss/gpu/impl/CuvsIVFPQ.cuh
index 9a54dda79b..e6a3e1edc4 100644
--- a/faiss/gpu/impl/RaftIVFPQ.cuh
+++ b/faiss/gpu/impl/CuvsIVFPQ.cuh
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,17 +26,18 @@
 #include <faiss/gpu/impl/GpuScalarQuantizer.cuh>
 #include <faiss/gpu/impl/IVFPQ.cuh>
 
-#include <raft/neighbors/ivf_pq.cuh>
+#include <cuvs/neighbors/ivf_pq.hpp>
 
+#include <memory>
 #include <optional>
 
 #pragma GCC visibility push(default)
 namespace faiss {
 namespace gpu {
 /// Implementing class for IVFPQ on the GPU
-class RaftIVFPQ : public IVFPQ {
+class CuvsIVFPQ : public IVFPQ {
    public:
-    RaftIVFPQ(
+    CuvsIVFPQ(
             GpuResources* resources,
             int dim,
             idx_t nlist,
@@ -51,12 +52,12 @@ class RaftIVFPQ : public IVFPQ {
             IndicesOptions indicesOptions,
             MemorySpace space);
 
-    ~RaftIVFPQ() override;
+    ~CuvsIVFPQ() override;
 
     /// Reserve GPU memory in our inverted lists for this number of vectors
     void reserveMemory(idx_t numVecs) override;
 
-    /// Clear out the RAFT index
+    /// Clear out the cuVS index
     void reset() override;
 
     /// After adding vectors, one can call this to reclaim device memory
@@ -93,15 +94,15 @@ class RaftIVFPQ : public IVFPQ {
     std::vector<uint8_t> getListVectorData(idx_t listId, bool gpuFormat)
             const override;
 
-    /// Update our Raft index with this quantizer instance; may be a CPU
+    /// Update our cuVS index with this quantizer instance; may be a CPU
     /// or GPU quantizer
     void updateQuantizer(Index* quantizer) override;
 
     /// Copy all inverted lists from a CPU representation to ourselves
     void copyInvertedListsFrom(const InvertedLists* ivf) override;
 
-    /// Replace the Raft index
-    void setRaftIndex(raft::neighbors::ivf_pq::index<idx_t>&& idx);
+    /// Replace the cuVS index
+    void setCuvsIndex(cuvs::neighbors::ivf_pq::index<idx_t>&& idx);
 
     /// Classify and encode/add vectors to our IVF lists.
     /// The input data must be on our current device.
@@ -133,17 +134,16 @@ class RaftIVFPQ : public IVFPQ {
     /// Returns the encoding size for a PQ-encoded IVF list
     size_t getGpuListEncodingSize_(idx_t listId);
 
-    /// Copy the PQ centroids to the Raft index. The data is already in the
+    /// Copy the PQ centroids to the cuVS index. The data is already in the
     /// preferred format with the transpose performed by the IVFPQ class helper.
     void setPQCentroids_();
 
     /// Update the product quantizer centroids buffer held in the IVFPQ class.
-    /// Used when the RAFT index was updated externally.
+    /// Used when the cuVS index was updated externally.
     void setBasePQCentroids_();
 
-    /// optional around the Raft IVF-PQ index
-    std::optional<raft::neighbors::ivf_pq::index<idx_t>> raft_knn_index{
-            std::nullopt};
+    /// cuVS IVF-PQ index
+    std::shared_ptr<cuvs::neighbors::ivf_pq::index<idx_t>> cuvs_index{nullptr};
 };
 
 } // namespace gpu
diff --git a/faiss/gpu/test/CMakeLists.txt b/faiss/gpu/test/CMakeLists.txt
index 8c44e5360e..baf7480b51 100644
--- a/faiss/gpu/test/CMakeLists.txt
+++ b/faiss/gpu/test/CMakeLists.txt
@@ -25,16 +25,15 @@ if(FAISS_ENABLE_ROCM)
   target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest hip::host)
 else()
   find_package(CUDAToolkit REQUIRED)
-  target_link_libraries(faiss_gpu_test_helper PUBLIC
-    faiss gtest CUDA::cudart
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-    $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled>)
+  target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest CUDA::cudart $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs> $<$<BOOL:${FAISS_ENABLE_CUVS}>:OpenMP::OpenMP_CXX>)
 endif()
 
 macro(faiss_gpu_test file)
   get_filename_component(test_name ${file} NAME_WE)
   add_executable(${test_name} ${file})
   target_link_libraries(${test_name} PRIVATE faiss_gpu_test_helper)
+  target_compile_options(${test_name} PRIVATE
+    $<$<COMPILE_LANGUAGE:CUDA>:$<$<BOOL:${FAISS_ENABLE_CUVS}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
   gtest_discover_tests(${test_name})
 endmacro()
 
@@ -48,8 +47,7 @@ faiss_gpu_test(TestGpuIndexIVFPQ.cpp)
 faiss_gpu_test(TestGpuIndexIVFScalarQuantizer.cpp)
 faiss_gpu_test(TestGpuDistance.${GPU_EXT_PREFIX})
 faiss_gpu_test(TestGpuSelect.${GPU_EXT_PREFIX})
-
-if(FAISS_ENABLE_RAFT)
+if(FAISS_ENABLE_CUVS)
   faiss_gpu_test(TestGpuIndexCagra.cu)
 endif()
 
diff --git a/faiss/gpu/test/TestGpuDistance.cu b/faiss/gpu/test/TestGpuDistance.cu
index 11ed085edd..3915055480 100644
--- a/faiss/gpu/test/TestGpuDistance.cu
+++ b/faiss/gpu/test/TestGpuDistance.cu
@@ -49,7 +49,7 @@ void evaluate_bfknn(
     bfKnn(res, args);
 
     std::stringstream str;
-    str << "using raft " << args.use_raft << "metric " << metric
+    str << "using cuVS " << args.use_cuvs << "metric " << metric
         << " colMajorVecs " << colMajorVecs << " colMajorQueries "
         << colMajorQueries;
 
@@ -73,7 +73,7 @@ void testTransposition(
         bool colMajorVecs,
         bool colMajorQueries,
         faiss::MetricType metric,
-        bool use_raft = false,
+        bool use_cuvs = false,
         float metricArg = 0) {
     using namespace faiss::gpu;
 
@@ -169,12 +169,12 @@ void testTransposition(
     args.outIndices = gpuIndices.data();
     args.device = device;
 
-#if defined USE_NVIDIA_RAFT
-    args.use_raft = use_raft;
+#if defined USE_NVIDIA_CUVS
+    args.use_cuvs = use_cuvs;
 #else
     FAISS_THROW_IF_NOT_MSG(
-            !use_raft,
-            "RAFT has not been compiled into the current version so it cannot be used.");
+            !use_cuvs,
+            "cuVS has not been compiled into the current version so it cannot be used.");
 #endif
 
     evaluate_bfknn(
@@ -197,8 +197,8 @@ TEST(TestGpuDistance, Transposition_RR) {
     testTransposition(false, false, faiss::MetricType::METRIC_INNER_PRODUCT);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, Transposition_RR) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, Transposition_RR) {
     testTransposition(false, false, faiss::MetricType::METRIC_L2, true);
     testTransposition(
             false, false, faiss::MetricType::METRIC_INNER_PRODUCT, true);
@@ -209,8 +209,8 @@ TEST(TestGpuDistance, Transposition_RC) {
     testTransposition(false, true, faiss::MetricType::METRIC_L2);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, Transposition_RC) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, Transposition_RC) {
     testTransposition(false, true, faiss::MetricType::METRIC_L2, true);
 }
 #endif
@@ -219,8 +219,8 @@ TEST(TestGpuDistance, Transposition_CR) {
     testTransposition(true, false, faiss::MetricType::METRIC_L2);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, Transposition_CR) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, Transposition_CR) {
     testTransposition(true, false, faiss::MetricType::METRIC_L2, true);
 }
 #endif
@@ -229,8 +229,8 @@ TEST(TestGpuDistance, Transposition_CC) {
     testTransposition(true, true, faiss::MetricType::METRIC_L2);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, Transposition_CC) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, Transposition_CC) {
     testTransposition(true, true, faiss::MetricType::METRIC_L2, true);
 }
 #endif
@@ -239,8 +239,8 @@ TEST(TestGpuDistance, L1) {
     testTransposition(false, false, faiss::MetricType::METRIC_L1);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, L1) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, L1) {
     testTransposition(false, false, faiss::MetricType::METRIC_L1, true);
 }
 #endif
@@ -250,9 +250,9 @@ TEST(TestGpuDistance, L1_RC) {
     testTransposition(false, true, faiss::MetricType::METRIC_L1);
 }
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 // Test other transpositions with the general distance kernel
-TEST(TestRaftGpuDistance, L1_RC) {
+TEST(TestCuvsGpuDistance, L1_RC) {
     testTransposition(false, true, faiss::MetricType::METRIC_L1, true);
 }
 #endif
@@ -261,8 +261,8 @@ TEST(TestGpuDistance, L1_CR) {
     testTransposition(true, false, faiss::MetricType::METRIC_L1);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, L1_CR) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, L1_CR) {
     testTransposition(true, false, faiss::MetricType::METRIC_L1, true);
 }
 #endif
@@ -271,8 +271,8 @@ TEST(TestGpuDistance, L1_CC) {
     testTransposition(true, true, faiss::MetricType::METRIC_L1);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, L1_CC) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, L1_CC) {
     testTransposition(true, true, faiss::MetricType::METRIC_L1, true);
 }
 #endif
@@ -282,9 +282,9 @@ TEST(TestGpuDistance, Linf) {
     testTransposition(false, false, faiss::MetricType::METRIC_Linf);
 }
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
 // Test remainder of metric types
-TEST(TestRaftGpuDistance, Linf) {
+TEST(TestCuvsGpuDistance, Linf) {
     testTransposition(false, false, faiss::MetricType::METRIC_Linf, true);
 }
 #endif
@@ -293,8 +293,8 @@ TEST(TestGpuDistance, Lp) {
     testTransposition(false, false, faiss::MetricType::METRIC_Lp, false, 3);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, Lp) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, Lp) {
     testTransposition(false, false, faiss::MetricType::METRIC_Lp, true, 3);
 }
 #endif
@@ -303,8 +303,8 @@ TEST(TestGpuDistance, Canberra) {
     testTransposition(false, false, faiss::MetricType::METRIC_Canberra);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, Canberra) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, Canberra) {
     testTransposition(false, false, faiss::MetricType::METRIC_Canberra, true);
 }
 #endif
@@ -317,8 +317,8 @@ TEST(TestGpuDistance, JensenShannon) {
     testTransposition(false, false, faiss::MetricType::METRIC_JensenShannon);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuDistance, JensenShannon) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuDistance, JensenShannon) {
     testTransposition(
             false, false, faiss::MetricType::METRIC_JensenShannon, true);
 }
diff --git a/faiss/gpu/test/TestGpuIndexCagra.cu b/faiss/gpu/test/TestGpuIndexCagra.cu
index 7f2ba6821a..16b94703c4 100644
--- a/faiss/gpu/test/TestGpuIndexCagra.cu
+++ b/faiss/gpu/test/TestGpuIndexCagra.cu
@@ -183,7 +183,7 @@ void queryTest(faiss::MetricType metric, double expected_recall) {
                 recall_score.view(),
                 test_dis_mds_opt,
                 ref_dis_mds_opt);
-        ASSERT_GT(*recall_score.data_handle(), expected_recall);
+        ASSERT_TRUE(*recall_score.data_handle() > expected_recall);
     }
 }
 
@@ -331,7 +331,7 @@ void copyToTest(
                 recall_score.view(),
                 copy_ref_dis_mds_opt,
                 ref_dis_mds_opt);
-        ASSERT_GT(*recall_score.data_handle(), expected_recall);
+        ASSERT_TRUE(*recall_score.data_handle() > expected_recall);
     }
 }
 
@@ -453,7 +453,7 @@ void copyFromTest(faiss::MetricType metric, double expected_recall) {
                 recall_score.view(),
                 copy_test_dis_mds_opt,
                 test_dis_mds_opt);
-        ASSERT_GT(*recall_score.data_handle(), expected_recall);
+        ASSERT_TRUE(*recall_score.data_handle() > expected_recall);
     }
 }
 
diff --git a/faiss/gpu/test/TestGpuIndexFlat.cpp b/faiss/gpu/test/TestGpuIndexFlat.cpp
index 0398606bb6..3146e786c0 100644
--- a/faiss/gpu/test/TestGpuIndexFlat.cpp
+++ b/faiss/gpu/test/TestGpuIndexFlat.cpp
@@ -29,7 +29,7 @@ struct TestFlatOptions {
               numQueriesOverride(-1),
               kOverride(-1),
               dimOverride(-1),
-              use_raft(false) {}
+              use_cuvs(false) {}
 
     faiss::MetricType metric;
     float metricArg;
@@ -39,7 +39,7 @@ struct TestFlatOptions {
     int numQueriesOverride;
     int kOverride;
     int dimOverride;
-    bool use_raft;
+    bool use_cuvs;
 };
 
 void testFlat(const TestFlatOptions& opt) {
@@ -75,7 +75,7 @@ void testFlat(const TestFlatOptions& opt) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
     config.useFloat16 = opt.useFloat16;
-    config.use_raft = opt.use_raft;
+    config.use_cuvs = opt.use_cuvs;
 
     faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config);
     gpuIndex.metric_arg = opt.metricArg;
@@ -114,8 +114,8 @@ TEST(TestGpuIndexFlat, IP_Float32) {
 
         testFlat(opt);
 
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -128,8 +128,8 @@ TEST(TestGpuIndexFlat, L1_Float32) {
 
     testFlat(opt);
 
-#if defined USE_NVIDIA_RAFT
-    opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.use_cuvs = true;
     testFlat(opt);
 #endif
 }
@@ -141,8 +141,8 @@ TEST(TestGpuIndexFlat, Lp_Float32) {
     opt.useFloat16 = false;
 
     testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-    opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.use_cuvs = true;
     testFlat(opt);
 #endif
 }
@@ -155,8 +155,8 @@ TEST(TestGpuIndexFlat, L2_Float32) {
         opt.useFloat16 = false;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -173,8 +173,8 @@ TEST(TestGpuIndexFlat, L2_k_2048) {
         opt.numVecsOverride = 10000;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -189,8 +189,8 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) {
         opt.kOverride = 1;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -203,8 +203,8 @@ TEST(TestGpuIndexFlat, IP_Float16) {
         opt.useFloat16 = true;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -217,8 +217,8 @@ TEST(TestGpuIndexFlat, L2_Float16) {
         opt.useFloat16 = true;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -233,8 +233,8 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) {
         opt.kOverride = 1;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -254,8 +254,8 @@ TEST(TestGpuIndexFlat, L2_Tiling) {
         opt.kOverride = 64;
 
         testFlat(opt);
-#if defined USE_NVIDIA_RAFT
-        opt.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+        opt.use_cuvs = true;
         testFlat(opt);
 #endif
     }
@@ -268,7 +268,7 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = 0;
     config.useFloat16 = false;
-    config.use_raft = false;
+    config.use_cuvs = false;
     int dim = 128;
     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
 
@@ -292,7 +292,7 @@ TEST(TestGpuIndexFlat, QueryEmpty) {
     }
 }
 
-void testCopyFrom(bool use_raft) {
+void testCopyFrom(bool use_cuvs) {
     int numVecs = faiss::gpu::randVal(100, 200);
     int dim = faiss::gpu::randVal(1, 1000);
 
@@ -310,7 +310,7 @@ void testCopyFrom(bool use_raft) {
         faiss::gpu::GpuIndexFlatConfig config;
         config.device = device;
         config.useFloat16 = useFloat16;
-        config.use_raft = use_raft;
+        config.use_cuvs = use_cuvs;
 
         // Fill with garbage values
         faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);
@@ -343,13 +343,13 @@ TEST(TestGpuIndexFlat, CopyFrom) {
     testCopyFrom(false);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, CopyFrom) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, CopyFrom) {
     testCopyFrom(true);
 }
 #endif
 
-void testCopyTo(bool use_raft) {
+void testCopyTo(bool use_cuvs) {
     faiss::gpu::StandardGpuResources res;
     res.noTempMemory();
 
@@ -363,7 +363,7 @@ void testCopyTo(bool use_raft) {
         faiss::gpu::GpuIndexFlatConfig config;
         config.device = device;
         config.useFloat16 = useFloat16;
-        config.use_raft = use_raft;
+        config.use_cuvs = use_cuvs;
 
         faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
         gpuIndex.add(numVecs, vecs.data());
@@ -394,13 +394,13 @@ TEST(TestGpuIndexFlat, CopyTo) {
     testCopyTo(false);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, CopyTo) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, CopyTo) {
     testCopyTo(true);
 }
 #endif
 
-void testUnifiedMemory(bool use_raft) {
+void testUnifiedMemory(bool use_cuvs) {
     // Construct on a random device to test multi-device, if we have
     // multiple devices
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
@@ -426,7 +426,7 @@ void testUnifiedMemory(bool use_raft) {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
     config.memorySpace = faiss::gpu::MemorySpace::Unified;
-    config.use_raft = use_raft;
+    config.use_cuvs = use_cuvs;
 
     faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
 
@@ -452,13 +452,13 @@ TEST(TestGpuIndexFlat, UnifiedMemory) {
     testUnifiedMemory(false);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, UnifiedMemory) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, UnifiedMemory) {
     testUnifiedMemory(true);
 }
 #endif
 
-void testLargeIndex(bool use_raft) {
+void testLargeIndex(bool use_cuvs) {
     // Construct on a random device to test multi-device, if we have
     // multiple devices
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
@@ -489,7 +489,7 @@ void testLargeIndex(bool use_raft) {
 
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
-    config.use_raft = use_raft;
+    config.use_cuvs = use_cuvs;
     faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);
 
     cpuIndexL2.add(nb, xb.data());
@@ -513,13 +513,13 @@ TEST(TestGpuIndexFlat, LargeIndex) {
     testLargeIndex(false);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, LargeIndex) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, LargeIndex) {
     testLargeIndex(true);
 }
 #endif
 
-void testResidual(bool use_raft) {
+void testResidual(bool use_cuvs) {
     // Construct on a random device to test multi-device, if we have
     // multiple devices
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
@@ -529,7 +529,7 @@ void testResidual(bool use_raft) {
 
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
-    config.use_raft = use_raft;
+    config.use_cuvs = use_cuvs;
 
     int dim = 32;
     faiss::IndexFlat cpuIndex(dim, faiss::MetricType::METRIC_L2);
@@ -566,13 +566,13 @@ TEST(TestGpuIndexFlat, Residual) {
     testResidual(false);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, Residual) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, Residual) {
     testResidual(true);
 }
 #endif
 
-void testReconstruct(bool use_raft) {
+void testReconstruct(bool use_cuvs) {
     // Construct on a random device to test multi-device, if we have
     // multiple devices
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
@@ -589,7 +589,7 @@ void testReconstruct(bool use_raft) {
         faiss::gpu::GpuIndexFlatConfig config;
         config.device = device;
         config.useFloat16 = useFloat16;
-        config.use_raft = use_raft;
+        config.use_cuvs = use_cuvs;
 
         faiss::gpu::GpuIndexFlat gpuIndex(
                 &res, dim, faiss::MetricType::METRIC_L2, config);
@@ -657,13 +657,13 @@ void testReconstruct(bool use_raft) {
 TEST(TestGpuIndexFlat, Reconstruct) {
     testReconstruct(false);
 }
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, Reconstruct) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, Reconstruct) {
     testReconstruct(true);
 }
 #endif
 
-void testSearchAndReconstruct(bool use_raft) {
+void testSearchAndReconstruct(bool use_cuvs) {
     // Construct on a random device to test multi-device, if we have
     // multiple devices
     int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
@@ -683,7 +683,7 @@ void testSearchAndReconstruct(bool use_raft) {
 
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = device;
-    config.use_raft = use_raft;
+    config.use_cuvs = use_cuvs;
     faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);
 
     cpuIndex.add(nb, xb.data());
@@ -754,8 +754,8 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) {
     testSearchAndReconstruct(false);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestRaftGpuIndexFlat, SearchAndReconstruct) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestCuvsGpuIndexFlat, SearchAndReconstruct) {
     testSearchAndReconstruct(true);
 }
 #endif
diff --git a/faiss/gpu/test/TestGpuIndexIVFFlat.cpp b/faiss/gpu/test/TestGpuIndexIVFFlat.cpp
index df71e9e3fe..a7d0007b5b 100644
--- a/faiss/gpu/test/TestGpuIndexIVFFlat.cpp
+++ b/faiss/gpu/test/TestGpuIndexIVFFlat.cpp
@@ -58,7 +58,7 @@ struct Options {
 
         device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
 
-        useRaft = false;
+        useCuvs = false;
     }
 
     std::string toString() const {
@@ -66,7 +66,7 @@ struct Options {
         str << "IVFFlat device " << device << " numVecs " << numAdd << " dim "
             << dim << " numCentroids " << numCentroids << " nprobe " << nprobe
             << " numQuery " << numQuery << " k " << k << " indicesOpt "
-            << indicesOpt << " useRaft " << useRaft;
+            << indicesOpt << " useCuvs " << useCuvs;
 
         return str.str();
     }
@@ -80,7 +80,7 @@ struct Options {
     int k;
     int device;
     faiss::gpu::IndicesOptions indicesOpt;
-    bool useRaft;
+    bool useCuvs;
 };
 
 void queryTest(
@@ -111,7 +111,7 @@ void queryTest(
         config.device = opt.device;
         config.indicesOptions = opt.indicesOpt;
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-        config.use_raft = opt.useRaft;
+        config.use_cuvs = opt.useCuvs;
 
         faiss::gpu::GpuIndexIVFFlat gpuIndex(
                 &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
@@ -138,9 +138,10 @@ void queryTest(
 void addTest(
         faiss::MetricType metricType,
         bool useFloat16CoarseQuantizer,
-        bool useRaft) {
+        bool useCuvs) {
     for (int tries = 0; tries < 2; ++tries) {
         Options opt;
+        opt.useCuvs = useCuvs;
 
         std::vector<float> trainVecs =
                 faiss::gpu::randVecs(opt.numTrain, opt.dim);
@@ -163,9 +164,9 @@ void addTest(
         faiss::gpu::GpuIndexIVFFlatConfig config;
         config.device = opt.device;
         config.indicesOptions =
-                useRaft ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt;
+                opt.useCuvs ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt;
         config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-        config.use_raft = useRaft;
+        config.use_cuvs = opt.useCuvs;
 
         faiss::gpu::GpuIndexIVFFlat gpuIndex(
                 &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
@@ -189,7 +190,7 @@ void addTest(
     }
 }
 
-void copyToTest(bool useFloat16CoarseQuantizer, bool useRaft) {
+void copyToTest(bool useFloat16CoarseQuantizer, bool useCuvs) {
     Options opt;
     std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
     std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
@@ -200,9 +201,9 @@ void copyToTest(bool useFloat16CoarseQuantizer, bool useRaft) {
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = opt.device;
     config.indicesOptions =
-            useRaft ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt;
+            useCuvs ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt;
     config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-    config.use_raft = useRaft;
+    config.use_cuvs = useCuvs;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
@@ -242,7 +243,7 @@ void copyToTest(bool useFloat16CoarseQuantizer, bool useRaft) {
             compFloat16 ? 0.30f : 0.015f);
 }
 
-void copyFromTest(bool useFloat16CoarseQuantizer, bool useRaft) {
+void copyFromTest(bool useFloat16CoarseQuantizer, bool useCuvs) {
     Options opt;
     std::vector<float> trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim);
     std::vector<float> addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim);
@@ -261,9 +262,9 @@ void copyFromTest(bool useFloat16CoarseQuantizer, bool useRaft) {
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = opt.device;
     config.indicesOptions =
-            useRaft ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt;
+            useCuvs ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt;
     config.flatConfig.useFloat16 = useFloat16CoarseQuantizer;
-    config.use_raft = useRaft;
+    config.use_cuvs = useCuvs;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, 1, faiss::METRIC_L2, config);
     gpuIndex.nprobe = 1;
@@ -297,7 +298,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer, bool useRaft) {
 TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
     addTest(faiss::METRIC_L2, false, false);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     addTest(faiss::METRIC_L2, false, true);
 #endif
 }
@@ -305,7 +306,7 @@ TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) {
 TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
     addTest(faiss::METRIC_INNER_PRODUCT, false, false);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     addTest(faiss::METRIC_INNER_PRODUCT, false, true);
 #endif
 }
@@ -313,7 +314,7 @@ TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) {
 TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
     addTest(faiss::METRIC_L2, true, false);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     addTest(faiss::METRIC_L2, true, true);
 #endif
 }
@@ -321,7 +322,7 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) {
 TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) {
     addTest(faiss::METRIC_INNER_PRODUCT, true, false);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     addTest(faiss::METRIC_INNER_PRODUCT, true, true);
 #endif
 }
@@ -334,8 +335,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_L2) {
     Options opt;
     queryTest(opt, faiss::METRIC_L2, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_L2, false);
 #endif
@@ -345,8 +346,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP) {
     Options opt;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 #endif
@@ -358,8 +359,8 @@ TEST(TestGpuIndexIVFFlat, LargeBatch) {
     opt.numQuery = 100000;
     queryTest(opt, faiss::METRIC_L2, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_L2, false);
 #endif
@@ -371,8 +372,8 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) {
     Options opt;
     queryTest(opt, faiss::METRIC_L2, true);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_L2, true);
 #endif
@@ -382,8 +383,8 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) {
     Options opt;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, true);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, true);
 #endif
@@ -399,8 +400,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) {
     opt.dim = 64;
     queryTest(opt, faiss::METRIC_L2, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_L2, false);
 #endif
@@ -411,8 +412,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) {
     opt.dim = 64;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 #endif
@@ -423,8 +424,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) {
     opt.dim = 128;
     queryTest(opt, faiss::METRIC_L2, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_L2, false);
 #endif
@@ -435,8 +436,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
     opt.dim = 128;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 
-#if defined USE_NVIDIA_RAFT
-    opt.useRaft = true;
+#if defined USE_NVIDIA_CUVS
+    opt.useCuvs = true;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
     queryTest(opt, faiss::METRIC_INNER_PRODUCT, false);
 #endif
@@ -449,7 +450,7 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) {
 TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
     copyToTest(false, false);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     copyToTest(false, true);
 #endif
 }
@@ -457,7 +458,7 @@ TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) {
 TEST(TestGpuIndexIVFFlat, Float32_32_CopyFrom) {
     copyFromTest(false, false);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     copyFromTest(false, true);
 #endif
 }
@@ -500,7 +501,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = opt.device;
     config.indicesOptions = opt.indicesOpt;
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
@@ -523,19 +524,19 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) {
             compFloat16 ? 0.99f : 0.1f,
             compFloat16 ? 0.65f : 0.015f);
 
-#if defined USE_NVIDIA_RAFT
-    config.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    config.use_cuvs = true;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
 
-    faiss::gpu::GpuIndexIVFFlat raftGpuIndex(
+    faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex(
             &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config);
-    raftGpuIndex.copyFrom(&cpuIndex);
-    raftGpuIndex.nprobe = opt.nprobe;
+    cuvsGpuIndex.copyFrom(&cpuIndex);
+    cuvsGpuIndex.nprobe = opt.nprobe;
 
     faiss::gpu::compareIndices(
             queryVecs,
             cpuIndex,
-            raftGpuIndex,
+            cuvsGpuIndex,
             opt.numQuery,
             opt.dim,
             opt.k,
@@ -573,7 +574,7 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
     config.device = opt.device;
     config.indicesOptions = opt.indicesOpt;
     config.flatConfig.useFloat16 = faiss::gpu::randBool();
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
@@ -594,19 +595,19 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) {
         }
     }
 
-#if defined USE_NVIDIA_RAFT
-    config.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    config.use_cuvs = true;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
     std::fill(distances.begin(), distances.end(), 0);
     std::fill(indices.begin(), indices.end(), 0);
-    faiss::gpu::GpuIndexIVFFlat raftGpuIndex(
+    faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
-    raftGpuIndex.nprobe = opt.nprobe;
+    cuvsGpuIndex.nprobe = opt.nprobe;
 
-    raftGpuIndex.train(opt.numTrain, trainVecs.data());
-    raftGpuIndex.add(opt.numAdd, addVecs.data());
+    cuvsGpuIndex.train(opt.numTrain, trainVecs.data());
+    cuvsGpuIndex.add(opt.numAdd, addVecs.data());
 
-    raftGpuIndex.search(
+    cuvsGpuIndex.search(
             numQuery, nans.data(), opt.k, distances.data(), indices.data());
 
     for (int q = 0; q < numQuery; ++q) {
@@ -642,7 +643,7 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
     config.device = opt.device;
     config.indicesOptions = opt.indicesOpt;
     config.flatConfig.useFloat16 = faiss::gpu::randBool();
-    config.use_raft = false;
+    config.use_cuvs = false;
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
     gpuIndex.nprobe = opt.nprobe;
@@ -664,20 +665,20 @@ TEST(TestGpuIndexIVFFlat, AddNaN) {
             distance.data(),
             indices.data());
 
-#if defined USE_NVIDIA_RAFT
-    config.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    config.use_cuvs = true;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
-    faiss::gpu::GpuIndexIVFFlat raftGpuIndex(
+    faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
-    raftGpuIndex.nprobe = opt.nprobe;
-    raftGpuIndex.train(opt.numTrain, trainVecs.data());
+    cuvsGpuIndex.nprobe = opt.nprobe;
+    cuvsGpuIndex.train(opt.numTrain, trainVecs.data());
 
     // should not crash
-    EXPECT_EQ(raftGpuIndex.ntotal, 0);
-    raftGpuIndex.add(numNans, nans.data());
+    EXPECT_EQ(cuvsGpuIndex.ntotal, 0);
+    cuvsGpuIndex.add(numNans, nans.data());
 
     // should not crash
-    raftGpuIndex.search(
+    cuvsGpuIndex.search(
             opt.numQuery,
             queryVecs.data(),
             opt.k,
@@ -724,7 +725,7 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = device;
     config.memorySpace = faiss::gpu::MemorySpace::Unified;
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, dim, numCentroids, faiss::METRIC_L2, config);
@@ -742,17 +743,17 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) {
             0.1f,
             0.015f);
 
-#if defined USE_NVIDIA_RAFT
-    config.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    config.use_cuvs = true;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
-    faiss::gpu::GpuIndexIVFFlat raftGpuIndex(
+    faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex(
             &res, dim, numCentroids, faiss::METRIC_L2, config);
-    raftGpuIndex.copyFrom(&cpuIndex);
-    raftGpuIndex.nprobe = nprobe;
+    cuvsGpuIndex.copyFrom(&cpuIndex);
+    cuvsGpuIndex.nprobe = nprobe;
 
     faiss::gpu::compareIndices(
             cpuIndex,
-            raftGpuIndex,
+            cuvsGpuIndex,
             numQuery,
             dim,
             k,
@@ -802,7 +803,7 @@ TEST(TestGpuIndexIVFFlat, LongIVFList) {
 
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = device;
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, dim, numCentroids, faiss::METRIC_L2, config);
@@ -821,18 +822,18 @@ TEST(TestGpuIndexIVFFlat, LongIVFList) {
             0.1f,
             0.015f);
 
-#if defined USE_NVIDIA_RAFT
-    config.use_raft = true;
+#if defined USE_NVIDIA_CUVS
+    config.use_cuvs = true;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
-    faiss::gpu::GpuIndexIVFFlat raftGpuIndex(
+    faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex(
             &res, dim, numCentroids, faiss::METRIC_L2, config);
-    raftGpuIndex.train(numTrain, trainVecs.data());
-    raftGpuIndex.add(numAdd, addVecs.data());
-    raftGpuIndex.nprobe = 1;
+    cuvsGpuIndex.train(numTrain, trainVecs.data());
+    cuvsGpuIndex.add(numAdd, addVecs.data());
+    cuvsGpuIndex.nprobe = 1;
 
     faiss::gpu::compareIndices(
             cpuIndex,
-            raftGpuIndex,
+            cuvsGpuIndex,
             numQuery,
             dim,
             k,
@@ -862,7 +863,7 @@ TEST(TestGpuIndexIVFFlat, Reconstruct_n) {
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = opt.device;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFFlat gpuIndex(
             &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config);
diff --git a/faiss/gpu/test/TestGpuIndexIVFPQ.cpp b/faiss/gpu/test/TestGpuIndexIVFPQ.cpp
index 134a4b449d..cf1af7f0a2 100644
--- a/faiss/gpu/test/TestGpuIndexIVFPQ.cpp
+++ b/faiss/gpu/test/TestGpuIndexIVFPQ.cpp
@@ -35,7 +35,7 @@ void pickEncoding(int& codes, int& dim) {
     }
 }
 
-void pickRaftEncoding(int& codes, int& dim, int bitsPerCode) {
+void pickCuvsEncoding(int& codes, int& dim, int bitsPerCode) {
     // Above 32 doesn't work with no precomputed codes
     std::vector<int> dimSizes{4, 8, 10, 12, 16, 20, 24, 28, 32};
 
@@ -85,7 +85,7 @@ struct Options {
         device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
 
         interleavedLayout = false;
-        useRaft = false;
+        useCuvs = false;
     }
 
     std::string toString() const {
@@ -126,7 +126,7 @@ struct Options {
     bool useFloat16;
     int device;
     bool interleavedLayout;
-    bool useRaft;
+    bool useCuvs;
 };
 
 void queryTest(Options opt, faiss::MetricType metricType) {
@@ -156,7 +156,7 @@ void queryTest(Options opt, faiss::MetricType metricType) {
     config.indicesOptions = opt.indicesOpt;
     config.useFloat16LookupTables = opt.useFloat16;
     config.interleavedLayout = opt.interleavedLayout;
-    config.use_raft = opt.useRaft;
+    config.use_cuvs = opt.useCuvs;
 
     faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
     gpuIndex.nprobe = opt.nprobe;
@@ -235,7 +235,7 @@ void testMMCodeDistance(faiss::MetricType mt) {
         config.usePrecomputedTables = false;
         config.useMMCodeDistance = true;
         config.indicesOptions = opt.indicesOpt;
-        config.use_raft = false;
+        config.use_cuvs = false;
 
         // Make sure that the float16 version works as well
         config.useFloat16LookupTables = (tries % 2 == 0);
@@ -286,7 +286,7 @@ void testMMCodeDistance(faiss::MetricType mt) {
         config.device = opt.device;
         config.usePrecomputedTables = false;
         config.indicesOptions = opt.indicesOpt;
-        config.use_raft = false;
+        config.use_cuvs = false;
 
         // Make sure that the float16 version works as well
         config.useFloat16LookupTables = (dimPerSubQ == 7);
@@ -340,7 +340,7 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) {
     config.usePrecomputedTables = opt.usePrecomputed;
     config.indicesOptions = opt.indicesOpt;
     config.useFloat16LookupTables = opt.useFloat16;
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
     gpuIndex.nprobe = opt.nprobe;
@@ -386,7 +386,7 @@ void addTest(Options opt, faiss::MetricType metricType) {
     config.indicesOptions = opt.indicesOpt;
     config.useFloat16LookupTables = opt.useFloat16;
     config.interleavedLayout = opt.interleavedLayout;
-    config.use_raft = opt.useRaft;
+    config.use_cuvs = opt.useCuvs;
 
     faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config);
     gpuIndex.nprobe = opt.nprobe;
@@ -436,7 +436,7 @@ void copyToTest(Options opt) {
         config.indicesOptions = opt.indicesOpt;
         config.useFloat16LookupTables = opt.useFloat16;
         config.interleavedLayout = opt.interleavedLayout;
-        config.use_raft = opt.useRaft;
+        config.use_cuvs = opt.useCuvs;
 
         faiss::gpu::GpuIndexIVFPQ gpuIndex(
                 &res,
@@ -513,7 +513,7 @@ void copyFromTest(Options opt) {
     config.indicesOptions = opt.indicesOpt;
     config.useFloat16LookupTables = opt.useFloat16;
     config.interleavedLayout = opt.interleavedLayout;
-    config.use_raft = opt.useRaft;
+    config.use_cuvs = opt.useCuvs;
 
     // Use garbage values to see if we overwrite them
     faiss::gpu::GpuIndexIVFPQ gpuIndex(
@@ -567,8 +567,8 @@ void queryNaNTest(Options opt) {
     config.usePrecomputedTables = opt.usePrecomputed;
     config.indicesOptions = opt.indicesOpt;
     config.useFloat16LookupTables = opt.useFloat16;
-    config.use_raft = opt.useRaft;
-    config.interleavedLayout = opt.useRaft ? true : opt.interleavedLayout;
+    config.use_cuvs = opt.useCuvs;
+    config.interleavedLayout = opt.useCuvs ? true : opt.interleavedLayout;
 
     faiss::gpu::GpuIndexIVFPQ gpuIndex(
             &res,
@@ -606,7 +606,7 @@ void queryNaNTest(Options opt) {
 
 TEST(TestGpuIndexIVFPQ, QueryNaN) {
     Options opt;
-    opt.useRaft = false;
+    opt.useCuvs = false;
     queryNaNTest(opt);
 }
 
@@ -620,7 +620,7 @@ void addNaNTest(Options opt) {
     config.indicesOptions = opt.indicesOpt;
     config.useFloat16LookupTables = opt.useFloat16;
     config.interleavedLayout = opt.interleavedLayout;
-    config.use_raft = opt.useRaft;
+    config.use_cuvs = opt.useCuvs;
 
     faiss::gpu::GpuIndexIVFPQ gpuIndex(
             &res,
@@ -664,46 +664,46 @@ void addNaNTest(Options opt) {
 
 TEST(TestGpuIndexIVFPQ, AddNaN) {
     Options opt;
-    opt.useRaft = false;
+    opt.useCuvs = false;
     addNaNTest(opt);
 }
 
-#if defined USE_NVIDIA_RAFT
-TEST(TestGpuIndexIVFPQ, Query_L2_Raft) {
+#if defined USE_NVIDIA_CUVS
+TEST(TestGpuIndexIVFPQ, Query_L2_Cuvs) {
     for (int tries = 0; tries < 2; ++tries) {
         Options opt;
         opt.bitsPerCode = faiss::gpu::randVal(4, 8);
-        opt.useRaft = true;
+        opt.useCuvs = true;
         opt.interleavedLayout = true;
         opt.usePrecomputed = false;
         opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-        pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+        pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
         queryTest(opt, faiss::MetricType::METRIC_L2);
     }
 }
 
-TEST(TestGpuIndexIVFPQ, Query_IP_Raft) {
+TEST(TestGpuIndexIVFPQ, Query_IP_Cuvs) {
     for (int tries = 0; tries < 2; ++tries) {
         Options opt;
         opt.bitsPerCode = faiss::gpu::randVal(4, 8);
-        opt.useRaft = true;
+        opt.useCuvs = true;
         opt.interleavedLayout = true;
         opt.usePrecomputed = false;
         opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-        pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+        pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
         queryTest(opt, faiss::MetricType::METRIC_INNER_PRODUCT);
     }
 }
 
 // Large batch sizes (>= 65536) should also work
-TEST(TestGpuIndexIVFPQ, LargeBatch_Raft) {
+TEST(TestGpuIndexIVFPQ, LargeBatch_Cuvs) {
     Options opt;
 
     // override for large sizes
     opt.dim = 4;
     opt.numQuery = 100000;
     opt.codes = 2;
-    opt.useRaft = true;
+    opt.useCuvs = true;
     opt.interleavedLayout = true;
     opt.usePrecomputed = false;
     opt.useFloat16 = false;
@@ -713,73 +713,73 @@ TEST(TestGpuIndexIVFPQ, LargeBatch_Raft) {
     queryTest(opt, faiss::MetricType::METRIC_L2);
 }
 
-TEST(TestGpuIndexIVFPQ, CopyFrom_Raft) {
+TEST(TestGpuIndexIVFPQ, CopyFrom_Cuvs) {
     Options opt;
-    opt.useRaft = true;
+    opt.useCuvs = true;
     opt.interleavedLayout = true;
     opt.bitsPerCode = faiss::gpu::randVal(4, 8);
     opt.usePrecomputed = false;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-    pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+    pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
     copyFromTest(opt);
 }
 
-TEST(TestGpuIndexIVFPQ, Add_L2_Raft) {
+TEST(TestGpuIndexIVFPQ, Add_L2_Cuvs) {
     for (int tries = 0; tries < 2; ++tries) {
         Options opt;
-        opt.useRaft = true;
+        opt.useCuvs = true;
         opt.interleavedLayout = true;
         opt.bitsPerCode = faiss::gpu::randVal(4, 8);
         opt.usePrecomputed = false;
         opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-        pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+        pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
         addTest(opt, faiss::METRIC_L2);
     }
 }
 
-TEST(TestGpuIndexIVFPQ, Add_IP_Raft) {
+TEST(TestGpuIndexIVFPQ, Add_IP_Cuvs) {
     for (int tries = 0; tries < 2; ++tries) {
         Options opt;
-        opt.useRaft = true;
+        opt.useCuvs = true;
         opt.interleavedLayout = true;
         opt.bitsPerCode = faiss::gpu::randVal(4, 8);
         opt.usePrecomputed = false;
         opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-        pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+        pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
         addTest(opt, faiss::METRIC_INNER_PRODUCT);
     }
 }
 
-TEST(TestGpuIndexIVFPQ, QueryNaN_Raft) {
+TEST(TestGpuIndexIVFPQ, QueryNaN_Cuvs) {
     Options opt;
-    opt.useRaft = true;
+    opt.useCuvs = true;
     opt.interleavedLayout = true;
     opt.bitsPerCode = faiss::gpu::randVal(4, 8);
     opt.usePrecomputed = false;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-    pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+    pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
     queryNaNTest(opt);
 }
 
-TEST(TestGpuIndexIVFPQ, AddNaN_Raft) {
+TEST(TestGpuIndexIVFPQ, AddNaN_Cuvs) {
     Options opt;
-    opt.useRaft = true;
+    opt.useCuvs = true;
     opt.interleavedLayout = true;
     opt.bitsPerCode = faiss::gpu::randVal(4, 8);
     opt.usePrecomputed = false;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-    pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+    pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
     addNaNTest(opt);
 }
 
-TEST(TestGpuIndexIVFPQ, CopyTo_Raft) {
+TEST(TestGpuIndexIVFPQ, CopyTo_Cuvs) {
     Options opt;
-    opt.useRaft = true;
+    opt.useCuvs = true;
     opt.interleavedLayout = true;
     opt.bitsPerCode = faiss::gpu::randVal(4, 8);
     opt.usePrecomputed = false;
     opt.indicesOpt = faiss::gpu::INDICES_64_BIT;
-    pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode);
+    pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode);
     copyToTest(opt);
 }
 #endif
@@ -824,7 +824,7 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) {
     faiss::gpu::GpuIndexIVFPQConfig config;
     config.device = device;
     config.memorySpace = faiss::gpu::MemorySpace::Unified;
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexIVFPQ gpuIndex(
             &res,
@@ -848,12 +848,12 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) {
             0.1f,
             0.015f);
 
-#if defined USE_NVIDIA_RAFT
+#if defined USE_NVIDIA_CUVS
     config.interleavedLayout = true;
-    config.use_raft = true;
+    config.use_cuvs = true;
     config.indicesOptions = faiss::gpu::INDICES_64_BIT;
 
-    faiss::gpu::GpuIndexIVFPQ raftGpuIndex(
+    faiss::gpu::GpuIndexIVFPQ cuvsGpuIndex(
             &res,
             dim,
             numCentroids,
@@ -861,12 +861,12 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) {
             bitsPerCode,
             faiss::METRIC_L2,
             config);
-    raftGpuIndex.copyFrom(&cpuIndex);
-    raftGpuIndex.nprobe = nprobe;
+    cuvsGpuIndex.copyFrom(&cpuIndex);
+    cuvsGpuIndex.nprobe = nprobe;
 
     faiss::gpu::compareIndices(
             cpuIndex,
-            raftGpuIndex,
+            cuvsGpuIndex,
             numQuery,
             dim,
             k,
diff --git a/faiss/gpu/test/TestGpuMemoryException.cpp b/faiss/gpu/test/TestGpuMemoryException.cpp
index b70b9cc538..d0f23acde4 100644
--- a/faiss/gpu/test/TestGpuMemoryException.cpp
+++ b/faiss/gpu/test/TestGpuMemoryException.cpp
@@ -31,7 +31,7 @@ TEST(TestGpuMemoryException, AddException) {
 
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);
-    config.use_raft = false;
+    config.use_cuvs = false;
 
     faiss::gpu::GpuIndexFlatL2 gpuIndexL2Broken(
             &res, (int)brokenAddDims, config);
diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py
index 5324379fc5..20eb8ece16 100644
--- a/faiss/gpu/test/test_cagra.py
+++ b/faiss/gpu/test/test_cagra.py
@@ -11,8 +11,8 @@
 
 
 @unittest.skipIf(
-    "RAFT" not in faiss.get_compile_options(),
-    "only if RAFT is compiled in")
+    "CUVS" not in faiss.get_compile_options(),
+    "only if cuVS is compiled in")
 class TestComputeGT(unittest.TestCase):
 
     def do_compute_GT(self, metric):
@@ -36,8 +36,8 @@ def test_compute_GT_IP(self):
         self.do_compute_GT(faiss.METRIC_INNER_PRODUCT)
 
 @unittest.skipIf(
-    "RAFT" not in faiss.get_compile_options(),
-    "only if RAFT is compiled in")
+    "CUVS" not in faiss.get_compile_options(),
+    "only if cuVS is compiled in")
 class TestInterop(unittest.TestCase):
 
     def do_interop(self, metric):
diff --git a/faiss/gpu/test/test_raft.py b/faiss/gpu/test/test_cuvs.py
similarity index 82%
rename from faiss/gpu/test/test_raft.py
rename to faiss/gpu/test/test_cuvs.py
index d08faaa57b..538e265209 100644
--- a/faiss/gpu/test/test_raft.py
+++ b/faiss/gpu/test/test_cuvs.py
@@ -11,8 +11,8 @@
 
 
 @unittest.skipIf(
-    "RAFT" not in faiss.get_compile_options(),
-    "only if RAFT is compiled in")
+    "CUVS" not in faiss.get_compile_options(),
+    "only if CUVS is compiled in")
 class TestBfKnn(unittest.TestCase):
 
     def test_bfKnn(self):
@@ -25,14 +25,14 @@ def test_bfKnn(self):
 
         # Faiss internal implementation
         Dnew, Inew = faiss.knn_gpu(
-            res, ds.get_queries(), ds.get_database(), 12, use_raft=False)
-        np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
+            res, ds.get_queries(), ds.get_database(), 12, use_cuvs=False)
+        np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
         np.testing.assert_array_equal(Iref, Inew)
 
-        # RAFT version
+        # cuVS version
         Dnew, Inew = faiss.knn_gpu(
-            res, ds.get_queries(), ds.get_database(), 12, use_raft=True)
-        np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
+            res, ds.get_queries(), ds.get_database(), 12, use_cuvs=True)
+        np.testing.assert_allclose(Dref, Dnew, atol=1e-4)
         np.testing.assert_array_equal(Iref, Inew)
 
     def test_IndexFlat(self):
@@ -46,7 +46,7 @@ def test_IndexFlat(self):
 
         res = faiss.StandardGpuResources()
         co = faiss.GpuClonerOptions()
-        co.use_raft = True
+        co.use_cuvs = True
         index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co)
         Dnew, Inew = index_gpu.search(ds.get_queries(), 13)
         np.testing.assert_allclose(Dref, Dnew, atol=1e-5)
diff --git a/faiss/gpu/test/test_gpu_index.py b/faiss/gpu/test/test_gpu_index.py
index f85f6ff913..05d4ed02c6 100755
--- a/faiss/gpu/test/test_gpu_index.py
+++ b/faiss/gpu/test/test_gpu_index.py
@@ -25,7 +25,7 @@ def test_ivfflat_search_preassigned(self):
         k = 50
 
         config = faiss.GpuIndexIVFFlatConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config)
         idx_gpu.nprobe = nprobe
 
@@ -59,7 +59,7 @@ def test_ivfpq_search_preassigned(self):
         k = 50
 
         config = faiss.GpuIndexIVFPQConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFPQ(res, d, nlist, 4, 8, faiss.METRIC_L2, config)
         idx_gpu.nprobe = nprobe
 
@@ -141,7 +141,7 @@ def test_ivfflat_cpu_coarse(self):
         # construct a GPU index using the same trained coarse quantizer
         # from the CPU index
         config = faiss.GpuIndexIVFFlatConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFFlat(res, q, d, nlist, faiss.METRIC_L2, config)
         assert(idx_gpu.is_trained)
         idx_gpu.add(xb)
@@ -233,7 +233,7 @@ def test_ivfpq_cpu_coarse(self):
         # construct a GPU index using the same trained coarse quantizer
         # from the CPU index
         config = faiss.GpuIndexIVFPQConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         idx_gpu = faiss.GpuIndexIVFPQ(
             res, idx_coarse_cpu, d, nlist_lvl_2, 4, 8, faiss.METRIC_L2, config)
         assert(not idx_gpu.is_trained)
@@ -414,7 +414,7 @@ def test_indices_ivfflat(self):
 
         # Store values using 32-bit indices instead
         config.indicesOptions = faiss.INDICES_32_BIT
-        config.use_raft = False
+        config.use_cuvs = False
         idx = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config)
         idx.train(xb)
         idx.add_with_ids(xb, xb_indices)
@@ -439,7 +439,7 @@ def test_indices_ivfpq(self):
         xb_indices = (xb_indices_base + 4294967296).astype('int64')
 
         config = faiss.GpuIndexIVFPQConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         idx = faiss.GpuIndexIVFPQ(res, d, nlist, M, nbits,
                                   faiss.METRIC_L2, config)
         idx.train(xb)
@@ -501,7 +501,7 @@ def test_sq_cpu_to_gpu(self):
         index = faiss.index_factory(32, "SQfp16")
         index.add(np.random.rand(1000, 32).astype(np.float32))
         config = faiss.GpuClonerOptions()
-        config.use_raft = False
+        config.use_cuvs = False
         gpu_index = faiss.index_cpu_to_gpu(res, 0, index, config)
         self.assertIsInstance(gpu_index, faiss.GpuIndexFlat)
 
diff --git a/faiss/gpu/test/test_gpu_index_ivfflat.py b/faiss/gpu/test/test_gpu_index_ivfflat.py
index 1dbf9b694d..2d4b70f764 100644
--- a/faiss/gpu/test/test_gpu_index_ivfflat.py
+++ b/faiss/gpu/test/test_gpu_index_ivfflat.py
@@ -18,7 +18,7 @@ def test_reconstruct_n(self):
         res = faiss.StandardGpuResources()
         res.noTempMemory()
         config = faiss.GpuIndexIVFFlatConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         index2 = faiss.GpuIndexIVFFlat(res, index, config)
         recons = index2.reconstruct_n(0, 10)
 
diff --git a/faiss/gpu/test/test_gpu_index_ivfsq.py b/faiss/gpu/test/test_gpu_index_ivfsq.py
index 6600d46661..99239152d3 100755
--- a/faiss/gpu/test/test_gpu_index_ivfsq.py
+++ b/faiss/gpu/test/test_gpu_index_ivfsq.py
@@ -28,7 +28,7 @@ def make_indices_copy_from_cpu(nlist, d, qtype, by_residual, metric, clamp):
     res = faiss.StandardGpuResources()
     res.noTempMemory()
     config = faiss.GpuIndexIVFScalarQuantizerConfig()
-    config.use_raft = False
+    config.use_cuvs = False
     idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, idx_cpu, config)
 
     return idx_cpu, idx_gpu
@@ -40,7 +40,7 @@ def make_indices_copy_from_gpu(nlist, d, qtype, by_residual, metric, clamp):
     res = faiss.StandardGpuResources()
     res.noTempMemory()
     config = faiss.GpuIndexIVFScalarQuantizerConfig()
-    config.use_raft = False
+    config.use_cuvs = False
     idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, d, nlist,
                                                qtype, metric, by_residual, config)
     idx_gpu.train(to_train)
@@ -68,7 +68,7 @@ def make_indices_train(nlist, d, qtype, by_residual, metric, clamp):
     res = faiss.StandardGpuResources()
     res.noTempMemory()
     config = faiss.GpuIndexIVFScalarQuantizerConfig()
-    config.use_raft = False
+    config.use_cuvs = False
     idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, d, nlist,
                                                qtype, metric, by_residual, config)
     assert(by_residual == idx_gpu.by_residual)
diff --git a/faiss/gpu/test/test_gpu_index_serialize.py b/faiss/gpu/test/test_gpu_index_serialize.py
index fcd62f573c..3dafcfdce3 100644
--- a/faiss/gpu/test/test_gpu_index_serialize.py
+++ b/faiss/gpu/test/test_gpu_index_serialize.py
@@ -35,7 +35,7 @@ def test_serialize(self):
 
         # IVFSQ
         config = faiss.GpuIndexIVFScalarQuantizerConfig()
-        config.use_raft = False
+        config.use_cuvs = False
         indexes.append(faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, faiss.ScalarQuantizer.QT_fp16, faiss.METRIC_L2, True, config))
 
         # IVFPQ
@@ -52,7 +52,7 @@ def test_serialize(self):
              
             gpu_cloner_options = faiss.GpuClonerOptions()
             if isinstance(index, faiss.GpuIndexIVFScalarQuantizer):
-                gpu_cloner_options.use_raft = False
+                gpu_cloner_options.use_cuvs = False
             gpu_index_restore = faiss.index_cpu_to_gpu(res, 0, cpu_index, gpu_cloner_options)
 
             restore_d, restore_i = gpu_index_restore.search(query, k)
diff --git a/faiss/gpu/test/test_index_cpu_to_gpu.py b/faiss/gpu/test/test_index_cpu_to_gpu.py
index 82a9fdbf05..7b043b31cf 100644
--- a/faiss/gpu/test/test_index_cpu_to_gpu.py
+++ b/faiss/gpu/test/test_index_cpu_to_gpu.py
@@ -28,13 +28,13 @@ def create_index(self, factory_string):
 
     def create_and_clone(self, factory_string,
                          allowCpuCoarseQuantizer=None,
-                         use_raft=None):
+                         use_cuvs=None):
         idx = self.create_index(factory_string)
         config = faiss.GpuClonerOptions()
         if allowCpuCoarseQuantizer is not None:
             config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer
-        if use_raft is not None:
-            config.use_raft = use_raft
+        if use_cuvs is not None:
+            config.use_cuvs = use_cuvs
         faiss.index_cpu_to_gpu(self.res, 0, idx, config)
 
     def verify_throws_not_implemented_exception(self, factory_string):
@@ -47,12 +47,12 @@ def verify_throws_not_implemented_exception(self, factory_string):
 
     def verify_clones_successfully(self, factory_string,
                                    allowCpuCoarseQuantizer=None,
-                                   use_raft=None):
+                                   use_cuvs=None):
         try:
             self.create_and_clone(
                 factory_string,
                 allowCpuCoarseQuantizer=allowCpuCoarseQuantizer,
-                use_raft=use_raft)
+                use_cuvs=use_cuvs)
         except Exception as e:
             self.fail("Unexpected exception thrown factory_string: "
                       "%s; error message: %s." % (factory_string, str(e)))
@@ -74,10 +74,10 @@ def test_implemented_indices(self):
         self.verify_clones_successfully("PCA32,IVF32,PQ8")
         self.verify_clones_successfully("PCA32,IVF32,PQ8np")
 
-        # set use_raft to false, these index types are not supported on RAFT
-        self.verify_clones_successfully("IVF32,SQ8", use_raft=False)
+        # set use_cuvs to false, these index types are not supported on cuVS
+        self.verify_clones_successfully("IVF32,SQ8", use_cuvs=False)
         self.verify_clones_successfully(
-            "PCA32,IVF32,SQ8", use_raft=False)
+            "PCA32,IVF32,SQ8", use_cuvs=False)
 
     def test_with_flag(self):
         self.verify_clones_successfully("IVF32_HNSW,Flat",
diff --git a/faiss/gpu/test/test_multi_gpu.py b/faiss/gpu/test/test_multi_gpu.py
index aca0e988d6..6ec2079ff0 100644
--- a/faiss/gpu/test/test_multi_gpu.py
+++ b/faiss/gpu/test/test_multi_gpu.py
@@ -29,7 +29,7 @@ def test_sharded(self):
 
         co = faiss.GpuMultipleClonerOptions()
         co.shard = True
-        co.use_raft = False
+        co.use_cuvs = False
         index = faiss.index_cpu_to_all_gpus(index_cpu, co, ngpu=2)
 
         index.add(xb)
@@ -72,7 +72,7 @@ def do_test_sharded_ivf(self, index_key):
         co = faiss.GpuMultipleClonerOptions()
         co.shard = True
         co.common_ivf_quantizer = True
-        co.use_raft = False
+        co.use_cuvs = False
         index = faiss.index_cpu_to_all_gpus(index, co, ngpu=2)
 
         index.quantizer  # make sure there is indeed a quantizer
@@ -113,7 +113,7 @@ def test_binary_clone(self, ngpu=1, shard=False):
 
         co = faiss.GpuMultipleClonerOptions()
         co.shard = shard
-        co.use_raft = False
+        co.use_cuvs = False
 
         # index2 = faiss.index_cpu_to_all_gpus(index, ngpu=ngpu)
         res = faiss.StandardGpuResources()
@@ -192,7 +192,7 @@ def do_cpu_to_gpu(self, index_key):
 
         res = faiss.StandardGpuResources()
         co = faiss.GpuClonerOptions()
-        co.use_raft = False
+        co.use_cuvs = False
         gpu_index = faiss.index_cpu_to_gpu(res, 0, index, co)
         ts.append(time.time())
 
@@ -222,7 +222,7 @@ def do_cpu_to_gpu(self, index_key):
             res = [faiss.StandardGpuResources() for i in range(2)]
             co = faiss.GpuMultipleClonerOptions()
             co.shard = shard
-            co.use_raft = False
+            co.use_cuvs = False
 
             gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)
 
diff --git a/faiss/gpu/test/torch_test_contrib_gpu.py b/faiss/gpu/test/torch_test_contrib_gpu.py
index 8bb9d58120..e24e511401 100644
--- a/faiss/gpu/test/torch_test_contrib_gpu.py
+++ b/faiss/gpu/test/torch_test_contrib_gpu.py
@@ -80,7 +80,11 @@ def test_train_add_with_ids(self):
         res = faiss.StandardGpuResources()
         res.noTempMemory()
 
-        index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2)
+        config = faiss.GpuIndexIVFFlatConfig()
+        # FIXME: triage failure when use_cuvs is set to True (issue #3968)
+        config.use_cuvs = False
+
+        index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config)
         xb = torch.rand(1000, d, device=torch.device('cuda', 0), dtype=torch.float32)
         index.train(xb)
 
@@ -167,7 +171,7 @@ def test_ivfflat_reconstruct(self):
         res = faiss.StandardGpuResources()
         res.noTempMemory()
         config = faiss.GpuIndexIVFFlatConfig()
-        config.use_raft = False
+        config.use_cuvs = False
 
         index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config)
 
@@ -253,7 +257,7 @@ def test_sa_encode_decode(self):
         return
 
 class TestTorchUtilsKnnGpu(unittest.TestCase):
-    def test_knn_gpu(self, use_raft=False):
+    def test_knn_gpu(self, use_cuvs=False):
         torch.manual_seed(10)
         d = 32
         nb = 1024
@@ -290,7 +294,7 @@ def test_knn_gpu(self, use_raft=False):
                     else:
                         xb_c = xb_np
 
-                    D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_raft=use_raft)
+                    D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_cuvs=use_cuvs)
 
                     self.assertTrue(torch.equal(torch.from_numpy(I), gt_I))
                     self.assertLess((torch.from_numpy(D) - gt_D).abs().max(), 1e-4)
@@ -316,7 +320,7 @@ def test_knn_gpu(self, use_raft=False):
                             xb_c = to_column_major_torch(xb)
                             assert not xb_c.is_contiguous()
 
-                        D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_raft=use_raft)
+                        D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_cuvs=use_cuvs)
 
                         self.assertTrue(torch.equal(I.cpu(), gt_I))
                         self.assertLess((D.cpu() - gt_D).abs().max(), 1e-4)
@@ -324,7 +328,7 @@ def test_knn_gpu(self, use_raft=False):
                         # test on subset
                         try:
                             # This internally uses the current pytorch stream
-                            D, I = faiss.knn_gpu(res, xq_c[6:8], xb_c, k, use_raft=use_raft)
+                            D, I = faiss.knn_gpu(res, xq_c[6:8], xb_c, k, use_cuvs=use_cuvs)
                         except TypeError:
                             if not xq_row_major:
                                 # then it is expected
@@ -336,12 +340,12 @@ def test_knn_gpu(self, use_raft=False):
                         self.assertLess((D.cpu() - gt_D[6:8]).abs().max(), 1e-4)
 
     @unittest.skipUnless(
-        "RAFT" in faiss.get_compile_options(),
-        "only if RAFT is compiled in")
-    def test_knn_gpu_raft(self):
-        self.test_knn_gpu(use_raft=True)
+        "CUVS" in faiss.get_compile_options(),
+        "only if CUVS is compiled in")
+    def test_knn_gpu_cuvs(self):
+        self.test_knn_gpu(use_cuvs=True)
 
-    def test_knn_gpu_datatypes(self, use_raft=False):
+    def test_knn_gpu_datatypes(self, use_cuvs=False):
         torch.manual_seed(10)
         d = 10
         nb = 1024
@@ -364,7 +368,7 @@ def test_knn_gpu_datatypes(self, use_raft=False):
         D = torch.zeros(nq, k, device=xb_c.device, dtype=torch.float32)
         I = torch.zeros(nq, k, device=xb_c.device, dtype=torch.int32)
 
-        faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_raft=use_raft)
+        faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_cuvs=use_cuvs)
 
         self.assertTrue(torch.equal(I.long().cpu(), gt_I))
         self.assertLess((D.float().cpu() - gt_D).abs().max(), 1.5e-3)
@@ -376,7 +380,7 @@ def test_knn_gpu_datatypes(self, use_raft=False):
         xb_c = xb.half().numpy()
         xq_c = xq.half().numpy()
 
-        faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_raft=use_raft)
+        faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_cuvs=use_cuvs)
 
         self.assertTrue(torch.equal(torch.from_numpy(I).long(), gt_I))
         self.assertLess((torch.from_numpy(D) - gt_D).abs().max(), 1.5e-3)
diff --git a/faiss/gpu/utils/RaftUtils.cu b/faiss/gpu/utils/CuvsUtils.cu
similarity index 97%
rename from faiss/gpu/utils/RaftUtils.cu
rename to faiss/gpu/utils/CuvsUtils.cu
index a759336eb9..1ec32179c6 100644
--- a/faiss/gpu/utils/RaftUtils.cu
+++ b/faiss/gpu/utils/CuvsUtils.cu
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
  */
 
 #include <faiss/gpu/GpuIndex.h>
-#include <faiss/gpu/utils/RaftUtils.h>
+#include <faiss/gpu/utils/CuvsUtils.h>
 #include <raft/core/device_mdarray.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/linalg/coalesced_reduction.cuh>
diff --git a/faiss/gpu/utils/RaftUtils.h b/faiss/gpu/utils/CuvsUtils.h
similarity index 76%
rename from faiss/gpu/utils/RaftUtils.h
rename to faiss/gpu/utils/CuvsUtils.h
index 59ee1442f0..e44e5f12d5 100644
--- a/faiss/gpu/utils/RaftUtils.h
+++ b/faiss/gpu/utils/CuvsUtils.h
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,32 +27,32 @@
 #include <faiss/gpu/GpuResources.h>
 #include <faiss/gpu/utils/Tensor.cuh>
 
-#include <raft/distance/distance_types.hpp>
+#include <cuvs/distance/distance.h>
 
 #pragma GCC visibility push(default)
 namespace faiss {
 namespace gpu {
 
-inline raft::distance::DistanceType metricFaissToRaft(
+inline cuvsDistanceType metricFaissToCuvs(
         MetricType metric,
         bool exactDistance) {
     switch (metric) {
         case MetricType::METRIC_INNER_PRODUCT:
-            return raft::distance::DistanceType::InnerProduct;
+            return cuvsDistanceType::InnerProduct;
         case MetricType::METRIC_L2:
-            return raft::distance::DistanceType::L2Expanded;
+            return cuvsDistanceType::L2Expanded;
         case MetricType::METRIC_L1:
-            return raft::distance::DistanceType::L1;
+            return cuvsDistanceType::L1;
         case MetricType::METRIC_Linf:
-            return raft::distance::DistanceType::Linf;
+            return cuvsDistanceType::Linf;
         case MetricType::METRIC_Lp:
-            return raft::distance::DistanceType::LpUnexpanded;
+            return cuvsDistanceType::LpUnexpanded;
         case MetricType::METRIC_Canberra:
-            return raft::distance::DistanceType::Canberra;
+            return cuvsDistanceType::Canberra;
         case MetricType::METRIC_BrayCurtis:
-            return raft::distance::DistanceType::BrayCurtis;
+            return cuvsDistanceType::BrayCurtis;
         case MetricType::METRIC_JensenShannon:
-            return raft::distance::DistanceType::JensenShannon;
+            return cuvsDistanceType::JensenShannon;
         default:
             RAFT_FAIL("Distance type not supported");
     }
diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt
index 4813176bb7..41e08eb348 100644
--- a/faiss/python/CMakeLists.txt
+++ b/faiss/python/CMakeLists.txt
@@ -42,9 +42,9 @@ macro(configure_swigfaiss source)
         COMPILE_DEFINITIONS FAISS_ENABLE_ROCM
       )
     endif()
-    if (FAISS_ENABLE_RAFT)
+    if (FAISS_ENABLE_CUVS)
       set_property(SOURCE ${source} APPEND PROPERTY
-        COMPILE_DEFINITIONS FAISS_ENABLE_RAFT
+        COMPILE_DEFINITIONS FAISS_ENABLE_CUVS
       )
     endif()
   endif()
@@ -191,21 +191,13 @@ if(FAISS_ENABLE_GPU)
     target_link_libraries(faiss_example_external_module PRIVATE hip::host)
   else()
     find_package(CUDAToolkit REQUIRED)
-    if(FAISS_ENABLE_RAFT)
-      find_package(raft COMPONENTS compiled distributed)
+    if(FAISS_ENABLE_CUVS)
+      find_package(cuvs)
     endif()
-    target_link_libraries(swigfaiss PRIVATE CUDA::cudart
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-    target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-    target_link_libraries(swigfaiss_avx512 PRIVATE CUDA::cudart
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
-    target_link_libraries(swigfaiss_sve PRIVATE CUDA::cudart
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
-      $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
+    target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs>)
+    target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs>)
+    target_link_libraries(swigfaiss_avx512 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs>)
+    target_link_libraries(swigfaiss_sve PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs>)
   endif()
 endif()
 
diff --git a/faiss/python/gpu_wrappers.py b/faiss/python/gpu_wrappers.py
index 6e788511d2..4945722f6c 100644
--- a/faiss/python/gpu_wrappers.py
+++ b/faiss/python/gpu_wrappers.py
@@ -56,7 +56,7 @@ def index_cpu_to_gpus_list(index, co=None, gpus=None, ngpu=-1):
 # allows numpy ndarray usage with bfKnn
 
 
-def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2, device=-1, use_raft=False, vectorsMemoryLimit=0, queriesMemoryLimit=0):
+def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2, device=-1, use_cuvs=False, vectorsMemoryLimit=0, queriesMemoryLimit=0):
     """
     Compute the k nearest neighbors of a vector on one GPU without constructing an index
 
@@ -178,7 +178,7 @@ def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2, device=-1, use_raf
     args.outIndices = I_ptr
     args.outIndicesType = I_type
     args.device = device
-    args.use_raft = use_raft
+    args.use_cuvs = use_cuvs
 
     # no stream synchronization needed, inputs and outputs are guaranteed to
     # be on the CPU (numpy arrays)
diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig
index b13e23963d..493e42ef0e 100644
--- a/faiss/python/swigfaiss.swig
+++ b/faiss/python/swigfaiss.swig
@@ -680,7 +680,7 @@ struct faiss::simd16uint16 {};
 %include  <faiss/gpu/GpuIndicesOptions.h>
 %include  <faiss/gpu/GpuClonerOptions.h>
 %include  <faiss/gpu/GpuIndex.h>
-#ifdef FAISS_ENABLE_RAFT
+#ifdef FAISS_ENABLE_CUVS
 %include  <faiss/gpu/GpuIndexCagra.h>
 #endif
 %include  <faiss/gpu/GpuIndexFlat.h>
@@ -797,7 +797,7 @@ struct faiss::simd16uint16 {};
     DOWNCAST ( IndexRowwiseMinMax )
     DOWNCAST ( IndexRowwiseMinMaxFP16 )
 #ifdef GPU_WRAPPER
-#ifdef FAISS_ENABLE_RAFT
+#ifdef FAISS_ENABLE_CUVS
     DOWNCAST_GPU ( GpuIndexCagra )
 #endif
     DOWNCAST_GPU ( GpuIndexIVFPQ )
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 6202a2cbf9..63c1729d16 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -81,7 +81,7 @@ find_package(GTest CONFIG REQUIRED)
 target_link_libraries(faiss_test PRIVATE
   OpenMP::OpenMP_CXX
   GTest::gtest_main
-  $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft>
+  $<$<BOOL:${FAISS_ENABLE_CUVS}>:cuvs::cuvs>
   $<$<BOOL:${FAISS_ENABLE_ROCM}>:hip::host>
 )