diff --git a/.github/actions/build_cmake/action.yml b/.github/actions/build_cmake/action.yml index 1ec1186484..3fadbc8eb5 100644 --- a/.github/actions/build_cmake/action.yml +++ b/.github/actions/build_cmake/action.yml @@ -8,8 +8,8 @@ inputs: description: 'Enable GPU support.' required: false default: OFF - raft: - description: 'Enable RAFT support.' + cuvs: + description: 'Enable cuVS support.' required: false default: OFF rocm: @@ -50,11 +50,11 @@ runs: if [ "${{ inputs.rocm }}" = "ON" ]; then : # regular CUDA for GPU builds - elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.raft }}" = "OFF" ]; then + elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.cuvs }}" = "OFF" ]; then conda install -y -q cuda-toolkit -c "nvidia/label/cuda-12.4.0" - # and CUDA from RAFT channel for RAFT builds - elif [ "${{ inputs.raft }}" = "ON" ]; then - conda install -y -q libraft=24.06 cuda-version=12.4 cuda-toolkit -c rapidsai -c "nvidia/label/cuda-12.4.0" -c conda-forge + # and CUDA from cuVS channel for cuVS builds + elif [ "${{ inputs.cuvs }}" = "ON" ]; then + conda install -y -q libcuvs=24.08 cuda-version=12.4 cuda-toolkit -c rapidsai -c conda-forge -c "nvidia/label/cuda-12.4.0" fi # install test packages @@ -102,7 +102,7 @@ runs: sudo apt-get -qq clean >/dev/null sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* - name: Symblink system dependencies - if: inputs.raft == 'ON' || inputs.rocm == 'ON' + if: inputs.rocm == 'ON' shell: bash run: | # symblink system libraries for HIP compiler @@ -119,7 +119,7 @@ runs: -DBUILD_TESTING=ON \ -DBUILD_SHARED_LIBS=ON \ -DFAISS_ENABLE_GPU=${{ inputs.gpu }} \ - -DFAISS_ENABLE_RAFT=${{ inputs.raft }} \ + -DFAISS_ENABLE_CUVS=${{ inputs.cuvs }} \ -DFAISS_ENABLE_ROCM=${{ inputs.rocm }} \ -DFAISS_OPT_LEVEL=${{ inputs.opt_level }} \ -DFAISS_ENABLE_C_API=ON \ @@ -174,5 +174,5 @@ runs: if: always() uses: actions/upload-artifact@v4 with: - name: test-results-arch=${{ runner.arch }}-opt=${{ inputs.opt_level }}-gpu=${{ inputs.gpu }}-raft=${{ inputs.raft }}-rocm=${{ inputs.rocm }} + name: test-results-arch=${{ runner.arch }}-opt=${{ inputs.opt_level }}-gpu=${{ inputs.gpu }}-cuvs=${{ inputs.cuvs }}-rocm=${{ inputs.rocm }} path: test-results diff --git a/.github/actions/build_conda/action.yml b/.github/actions/build_conda/action.yml index d644e3e120..71153231b9 100644 --- a/.github/actions/build_conda/action.yml +++ b/.github/actions/build_conda/action.yml @@ -9,8 +9,8 @@ inputs: description: "CUDA toolkit version to use." default: "" required: false - raft: - description: "Enable RAFT support." + cuvs: + description: "Enable cuVS support." default: "" required: false runs: @@ -59,14 +59,14 @@ runs: run: | conda build faiss --user pytorch --label ${{ inputs.label }} -c pytorch - name: Conda build (GPU) - if: inputs.label == '' && inputs.cuda != '' && inputs.raft == '' + if: inputs.label == '' && inputs.cuda != '' && inputs.cuvs == '' shell: ${{ steps.choose_shell.outputs.shell }} working-directory: conda run: | conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia - name: Conda build (GPU) w/ anaconda upload - if: inputs.label != '' && inputs.cuda != '' && inputs.raft == '' + if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs == '' shell: ${{ steps.choose_shell.outputs.shell }} working-directory: conda env: @@ -74,19 +74,19 @@ runs: run: | conda build faiss-gpu --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia - - name: Conda build (GPU w/ RAFT) - if: inputs.label == '' && inputs.cuda != '' && inputs.raft != '' + - name: Conda build (GPU w/ cuVS) + if: inputs.label == '' && inputs.cuda != '' && inputs.cuvs != '' shell: ${{ steps.choose_shell.outputs.shell }} working-directory: conda run: | - conda build faiss-gpu-raft --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ + conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge - - name: Conda build (GPU w/ RAFT) w/ anaconda upload - if: inputs.label != '' && inputs.cuda != '' && inputs.raft != '' + - name: Conda build (GPU w/ cuVS) w/ anaconda upload + if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != '' shell: ${{ steps.choose_shell.outputs.shell }} working-directory: conda env: PACKAGE_TYPE: ${{ inputs.label }} run: | - conda build faiss-gpu-raft --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ + conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \ --user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index edba3b1c83..ac1387c438 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -78,8 +78,8 @@ jobs: uses: ./.github/actions/build_cmake with: gpu: ON - linux-x86_64-GPU-w-RAFT-cmake: - name: Linux x86_64 GPU w/ RAFT (cmake) + linux-x86_64-GPU-w-CUVS-cmake: + name: Linux x86_64 GPU w/ cuVS (cmake) needs: linux-x86_64-cmake runs-on: 4-core-ubuntu-gpu-t4 steps: @@ -89,7 +89,7 @@ jobs: uses: ./.github/actions/build_cmake with: gpu: ON - raft: ON + cuvs: ON linux-x86_64-GPU-w-ROCm-cmake: name: Linux x86_64 GPU w/ ROCm (cmake) needs: linux-x86_64-cmake @@ -199,8 +199,8 @@ jobs: with: label: main cuda: "11.4.4" - linux-x86_64-GPU-RAFT-packages-CUDA11-8-0: - name: Linux x86_64 GPU w/ RAFT packages (CUDA 11.8.0) + linux-x86_64-GPU-CUVS-packages-CUDA11-8-0: + name: Linux x86_64 GPU w/ cuVS packages (CUDA 11.8.0) if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') runs-on: 4-core-ubuntu-gpu-t4 env: @@ -217,7 +217,7 @@ jobs: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} with: label: main - raft: "ON" + cuvs: "ON" cuda: "11.8.0" linux-x86_64-GPU-packages-CUDA-12-1-1: name: Linux x86_64 GPU packages (CUDA 12.1.1) @@ -238,8 +238,8 @@ jobs: with: label: main cuda: "12.1.1" - linux-x86_64-GPU-RAFT-packages-CUDA12-1-1: - name: Linux x86_64 GPU w/ RAFT packages (CUDA 12.1.1) + linux-x86_64-GPU-CUVS-packages-CUDA12-1-1: + name: Linux x86_64 GPU w/ cuVS packages (CUDA 12.1.1) if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') runs-on: 4-core-ubuntu-gpu-t4 env: @@ -256,7 +256,7 @@ jobs: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} with: label: main - raft: "ON" + cuvs: "ON" cuda: "12.1.1" windows-x86_64-packages: name: Windows x86_64 packages diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index cdca7d873a..c3929683b8 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -38,8 +38,8 @@ jobs: with: label: nightly cuda: "11.4.4" - linux-x86_64-GPU-RAFT-CUDA11-8-0-nightly: - name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 11.8.0) + linux-x86_64-GPU-CUVS-CUDA11-8-0-nightly: + name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 11.8.0) runs-on: 4-core-ubuntu-gpu-t4 env: CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" @@ -54,7 +54,7 @@ jobs: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} with: label: nightly - raft: "ON" + cuvs: "ON" cuda: "11.8.0" linux-x86_64-GPU-CUDA-12-1-1-nightly: name: Linux x86_64 GPU nightlies (CUDA 12.1.1) @@ -73,8 +73,8 @@ jobs: with: label: nightly cuda: "12.1.1" - linux-x86_64-GPU-RAFT-CUDA12-1-1-nightly: - name: Linux x86_64 GPU w/ RAFT nightlies (CUDA 12.1.1) + linux-x86_64-GPU-CUVS-CUDA12-1-1-nightly: + name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 12.1.1) runs-on: 4-core-ubuntu-gpu-t4 env: CUDA_ARCHS: "70-real;72-real;75-real;80;86-real" @@ -89,7 +89,7 @@ jobs: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_API_TOKEN }} with: label: nightly - raft: "ON" + cuvs: "ON" cuda: "12.1.1" windows-x86_64-nightly: name: Windows x86_64 nightlies diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dab5900aa..ec990a8734 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,7 @@ if(FAISS_ENABLE_GPU) endif() endif() -if(FAISS_ENABLE_RAFT) +if(FAISS_ENABLE_CUVS) include(cmake/thirdparty/fetch_rapids.cmake) include(rapids-cmake) include(rapids-cpm) @@ -60,7 +60,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") # Valid values are "generic", "avx2", "avx512", "sve". option(FAISS_OPT_LEVEL "" "generic") option(FAISS_ENABLE_GPU "Enable support for GPU indexes." ON) -option(FAISS_ENABLE_RAFT "Enable RAFT for GPU indexes." OFF) +option(FAISS_ENABLE_CUVS "Enable cuVS for GPU indexes." OFF) option(FAISS_ENABLE_ROCM "Enable ROCm for GPU indexes." OFF) option(FAISS_ENABLE_PYTHON "Build Python extension." ON) option(FAISS_ENABLE_C_API "Build C API." OFF) @@ -81,9 +81,9 @@ if(FAISS_ENABLE_GPU) endif() endif() -if(FAISS_ENABLE_RAFT AND NOT TARGET raft::raft) - find_package(raft COMPONENTS compiled distributed) -endif() +if(FAISS_ENABLE_CUVS AND NOT TARGET cuvs::cuvs) + find_package(cuvs) + endif() add_subdirectory(faiss) diff --git a/INSTALL.md b/INSTALL.md index e6d3f33fb8..e16de484fe 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -118,7 +118,7 @@ Several options can be passed to CMake, among which: values are `ON` and `OFF`), - `-DFAISS_ENABLE_PYTHON=OFF` in order to disable building python bindings (possible values are `ON` and `OFF`), - - `-DFAISS_ENABLE_RAFT=ON` in order to enable building the RAFT implementations + - `-DFAISS_ENABLE_CUVS=ON` in order to enable building the cuVS implementations of the IVF-Flat and IVF-PQ GPU-accelerated indices (default is `OFF`, possible values are `ON` and `OFF`) - `-DBUILD_TESTING=OFF` in order to disable building C++ tests, diff --git a/benchs/bench_ivfflat_raft.py b/benchs/bench_ivfflat_cuvs.py similarity index 81% rename from benchs/bench_ivfflat_raft.py rename to benchs/bench_ivfflat_cuvs.py index d8e299165c..3628ec7422 100644 --- a/benchs/bench_ivfflat_raft.py +++ b/benchs/bench_ivfflat_cuvs.py @@ -4,7 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,8 +44,8 @@ def aa(*args, **kwargs): help='whether to benchmark add operation on GPU index') aa('--bm_search', default=True, help='whether to benchmark search operation on GPU index') -aa('--raft_only', default=False, action='store_true', - help='whether to only produce RAFT enabled benchmarks') +aa('--cuvs_only', default=False, action='store_true', + help='whether to only produce cuVS enabled benchmarks') group = parser.add_argument_group('IVF options') @@ -70,9 +70,9 @@ def aa(*args, **kwargs): mr = rmm.mr.PoolMemoryResource(rmm.mr.CudaMemoryResource()) rmm.mr.set_current_device_resource(mr) -def bench_train_milliseconds(index, trainVecs, use_raft): +def bench_train_milliseconds(index, trainVecs, use_cuvs): co = faiss.GpuMultipleClonerOptions() - co.use_raft = use_raft + co.use_cuvs = use_cuvs index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) t0 = time.time() index_gpu.train(trainVecs) @@ -89,21 +89,21 @@ def bench_train_milliseconds(index, trainVecs, use_raft): for n_cols in dataset_dims: index = faiss.index_factory(n_cols, "IVF{},Flat".format(args.n_centroids)) trainVecs = rs.rand(n_rows, n_cols).astype('float32') - raft_gpu_train_time = bench_train_milliseconds( + cuvs_gpu_train_time = bench_train_milliseconds( index, trainVecs, True) - if args.raft_only: - print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, RAFT enabled GPU train time: %.3f milliseconds" % ( - n_cols, args.n_centroids, n_rows, raft_gpu_train_time)) + if args.cuvs_only: + print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, cuVS enabled GPU train time: %.3f milliseconds" % ( + n_cols, args.n_centroids, n_rows, cuvs_gpu_train_time)) else: classical_gpu_train_time = bench_train_milliseconds( index, trainVecs, False) - print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, classical GPU train time: %.3f milliseconds, RAFT enabled GPU train time: %.3f milliseconds" % ( - n_cols, args.n_centroids, n_rows, classical_gpu_train_time, raft_gpu_train_time)) + print("Method: IVFFlat, Operation: TRAIN, dim: %d, n_centroids %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % ( + n_cols, args.n_centroids, n_rows, classical_gpu_train_time, cuvs_gpu_train_time)) -def bench_add_milliseconds(index, addVecs, use_raft): +def bench_add_milliseconds(index, addVecs, use_cuvs): co = faiss.GpuMultipleClonerOptions() - co.use_raft = use_raft + co.use_cuvs = use_cuvs index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) index_gpu.copyFrom(index) t0 = time.time() @@ -125,20 +125,20 @@ def bench_add_milliseconds(index, addVecs, use_raft): for n_rows in addset_sizes: for n_cols in dataset_dims: addVecs = rs.rand(n_rows, n_cols).astype('float32') - raft_gpu_add_time = bench_add_milliseconds(index, addVecs, True) - if args.raft_only: - print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, RAFT enabled GPU add time: %.3f milliseconds" % ( - n_train, n_rows, n_cols, args.n_centroids, raft_gpu_add_time)) + cuvs_gpu_add_time = bench_add_milliseconds(index, addVecs, True) + if args.cuvs_only: + print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, cuVS enabled GPU add time: %.3f milliseconds" % ( + n_train, n_rows, n_cols, args.n_centroids, cuvs_gpu_add_time)) else: classical_gpu_add_time = bench_add_milliseconds( index, addVecs, False) - print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, classical GPU add time: %.3f milliseconds, RAFT enabled GPU add time: %.3f milliseconds" % ( - n_train, n_rows, n_cols, args.n_centroids, classical_gpu_add_time, raft_gpu_add_time)) + print("Method: IVFFlat, Operation: ADD, dim: %d, n_centroids %d, numAdd: %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % ( + n_train, n_rows, n_cols, args.n_centroids, classical_gpu_add_time, cuvs_gpu_add_time)) -def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft): +def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs): co = faiss.GpuMultipleClonerOptions() - co.use_raft = use_raft + co.use_cuvs = use_cuvs index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) index_gpu.copyFrom(index) index_gpu.add(addVecs) @@ -163,19 +163,19 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft): addVecs = rs.rand(n_add, n_cols).astype('float32') for n_rows in queryset_sizes: queryVecs = rs.rand(n_rows, n_cols).astype('float32') - raft_gpu_search_time = bench_search_milliseconds( + cuvs_gpu_search_time = bench_search_milliseconds( index, addVecs, queryVecs, args.nprobe, args.k, True) - if args.raft_only: - print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, RAFT enabled GPU search time: %.3f milliseconds" % ( - n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, raft_gpu_search_time)) + if args.cuvs_only: + print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % ( + n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time)) else: classical_gpu_search_time = bench_search_milliseconds( index, addVecs, queryVecs, args.nprobe, args.k, False) - print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, RAFT enabled GPU search time: %.3f milliseconds" % ( - n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, raft_gpu_search_time)) + print("Method: IVFFlat, Operation: SEARCH, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % ( + n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time)) print("=" * 40) - print("Large RAFT Enabled Benchmarks") + print("Large cuVS Enabled Benchmarks") print("=" * 40) # Avoid classical GPU Benchmarks for large datasets because of OOM for more than 500000 queries and/or large dims as well as for large k queryset_sizes = [100000, 500000, 1000000] @@ -188,7 +188,7 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft): addVecs = rs.rand(n_add, n_cols).astype('float32') for n_rows in queryset_sizes: queryVecs = rs.rand(n_rows, n_cols).astype('float32') - raft_gpu_search_time = bench_search_milliseconds( + cuvs_gpu_search_time = bench_search_milliseconds( index, addVecs, queryVecs, args.nprobe, args.k, True) - print("Method: IVFFlat, Operation: SEARCH, numTrain: %d, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, RAFT enabled GPU search time: %.3f milliseconds" % ( - n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, raft_gpu_search_time)) + print("Method: IVFFlat, Operation: SEARCH, numTrain: %d, dim: %d, n_centroids: %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % ( + n_cols, args.n_centroids, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time)) diff --git a/benchs/bench_ivfpq_raft.py b/benchs/bench_ivfpq_cuvs.py similarity index 78% rename from benchs/bench_ivfpq_raft.py rename to benchs/bench_ivfpq_cuvs.py index 96a9ab1512..7668afffea 100644 --- a/benchs/bench_ivfpq_raft.py +++ b/benchs/bench_ivfpq_cuvs.py @@ -1,10 +1,9 @@ -# @lint-ignore-every LICENSELINT -# Copyright (c) Meta Platforms, Inc. and its affiliates. +# Copyright (c) Facebook, Inc. and its affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -42,13 +41,13 @@ def aa(*args, **kwargs): group = parser.add_argument_group('benchmarking options') -aa('--raft_only', default=False, action='store_true', - help='whether to only produce RAFT enabled benchmarks') +aa('--cuvs_only', default=False, action='store_true', + help='whether to only produce cuVS enabled benchmarks') group = parser.add_argument_group('IVF options') -aa('--bits_per_code', default=8, type=int, help='bits per code. Note that < 8 is only supported when RAFT is enabled') +aa('--bits_per_code', default=8, type=int, help='bits per code. Note that < 8 is only supported when cuVS is enabled') aa('--pq_len', default=2, type=int, help='number of vector elements represented by one PQ code') -aa('--use_precomputed', default=True, type=bool, help='use precomputed codes (not with RAFT enabled)') +aa('--use_precomputed', default=True, type=bool, help='use precomputed codes (not with cuVS enabled)') group = parser.add_argument_group('searching') aa('--k', default=10, type=int, help='nb of nearest neighbors') @@ -74,11 +73,11 @@ def compute_nlist(numVecs): return int(nlist) -def bench_train_milliseconds(index, trainVecs, use_raft): +def bench_train_milliseconds(index, trainVecs, use_cuvs): co = faiss.GpuMultipleClonerOptions() # use float 16 lookup tables to save space co.useFloat16LookupTables = True - co.use_raft = use_raft + co.use_cuvs = use_cuvs index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) t0 = time.time() index_gpu.train(trainVecs) @@ -93,22 +92,22 @@ def bench_train_milliseconds(index, trainVecs, use_raft): print("=" * 40) print("GPU Train Benchmarks") print("=" * 40) -raft_gpu_train_time = bench_train_milliseconds(index, xt, True) -if args.raft_only: - print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, RAFT enabled GPU train time: %.3f milliseconds" % ( - n_cols, nlist, M, args.bits_per_code, n_train, raft_gpu_train_time)) +cuvs_gpu_train_time = bench_train_milliseconds(index, xt, True) +if args.cuvs_only: + print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, cuVS enabled GPU train time: %.3f milliseconds" % ( + n_cols, nlist, M, args.bits_per_code, n_train, cuvs_gpu_train_time)) else: classical_gpu_train_time = bench_train_milliseconds( index, xt, False) - print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, classical GPU train time: %.3f milliseconds, RAFT enabled GPU train time: %.3f milliseconds" % ( - n_cols, nlist, M, args.bits_per_code, n_train, classical_gpu_train_time, raft_gpu_train_time)) + print("Method: IVFPQ, Operation: TRAIN, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numTrain: %d, classical GPU train time: %.3f milliseconds, cuVS enabled GPU train time: %.3f milliseconds" % ( + n_cols, nlist, M, args.bits_per_code, n_train, classical_gpu_train_time, cuvs_gpu_train_time)) -def bench_add_milliseconds(index, addVecs, use_raft): +def bench_add_milliseconds(index, addVecs, use_cuvs): co = faiss.GpuMultipleClonerOptions() # use float 16 lookup tables to save space co.useFloat16LookupTables = True - co.use_raft = use_raft + co.use_cuvs = use_cuvs index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) index_gpu.copyFrom(index) t0 = time.time() @@ -119,20 +118,20 @@ def bench_add_milliseconds(index, addVecs, use_raft): print("GPU Add Benchmarks") print("=" * 40) index.train(xt) -raft_gpu_add_time = bench_add_milliseconds(index, xb, True) -if args.raft_only: - print("Method: IVFPQ, Operation: ADD, dim: %d, n_centroids %d numSubQuantizers %d, bitsPerCode %d, numAdd %d, RAFT enabled GPU add time: %.3f milliseconds" % ( - n_cols, nlist, M, args.bits_per_code, n_rows, raft_gpu_add_time)) +cuvs_gpu_add_time = bench_add_milliseconds(index, xb, True) +if args.cuvs_only: + print("Method: IVFPQ, Operation: ADD, dim: %d, n_centroids %d numSubQuantizers %d, bitsPerCode %d, numAdd %d, cuVS enabled GPU add time: %.3f milliseconds" % ( + n_cols, nlist, M, args.bits_per_code, n_rows, cuvs_gpu_add_time)) else: classical_gpu_add_time = bench_add_milliseconds( index, xb, False) - print("Method: IVFFPQ, Operation: ADD, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numAdd %d, classical GPU add time: %.3f milliseconds, RAFT enabled GPU add time: %.3f milliseconds" % ( - n_cols, nlist, M, args.bits_per_code, n_rows, classical_gpu_add_time, raft_gpu_add_time)) + print("Method: IVFFPQ, Operation: ADD, dim: %d, n_centroids %d, numSubQuantizers %d, bitsPerCode %d, numAdd %d, classical GPU add time: %.3f milliseconds, cuVS enabled GPU add time: %.3f milliseconds" % ( + n_cols, nlist, M, args.bits_per_code, n_rows, classical_gpu_add_time, cuvs_gpu_add_time)) -def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft): +def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_cuvs): co = faiss.GpuMultipleClonerOptions() - co.use_raft = use_raft + co.use_cuvs = use_cuvs co.useFloat16LookupTables = True index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) index_gpu.copyFrom(index) @@ -157,13 +156,13 @@ def bench_search_milliseconds(index, addVecs, queryVecs, nprobe, k, use_raft): index.train(xt) for n_rows in queryset_sizes: queryVecs = xq[np.random.choice(xq.shape[0], n_rows, replace=False)] - raft_gpu_search_time = bench_search_milliseconds( + cuvs_gpu_search_time = bench_search_milliseconds( index, xb, queryVecs, args.nprobe, args.k, True) - if args.raft_only: - print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, RAFT enabled GPU search time: %.3f milliseconds" % ( - n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, raft_gpu_search_time)) + if args.cuvs_only: + print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, cuVS enabled GPU search time: %.3f milliseconds" % ( + n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, cuvs_gpu_search_time)) else: classical_gpu_search_time = bench_search_milliseconds( index, xb, queryVecs, args.nprobe, args.k, False) - print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, RAFT enabled GPU search time: %.3f milliseconds" % ( - n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, raft_gpu_search_time)) + print("Method: IVFPQ, Operation: SEARCH, dim: %d, n_centroids: %d, numSubQuantizers %d, bitsPerCode %d, numVecs: %d, numQuery: %d, nprobe: %d, k: %d, classical GPU search time: %.3f milliseconds, cuVS enabled GPU search time: %.3f milliseconds" % ( + n_cols, nlist, M, args.bits_per_code, n_add, n_rows, args.nprobe, args.k, classical_gpu_search_time, cuvs_gpu_search_time)) \ No newline at end of file diff --git a/c_api/gpu/CMakeLists.txt b/c_api/gpu/CMakeLists.txt index 88550e6750..5fdfc34dfd 100644 --- a/c_api/gpu/CMakeLists.txt +++ b/c_api/gpu/CMakeLists.txt @@ -19,9 +19,7 @@ if (FAISS_ENABLE_ROCM) target_link_libraries(faiss_c PUBLIC hip::host roc::hipblas) else() find_package(CUDAToolkit REQUIRED) - target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas - $<$:raft::raft> - $<$:nvidia::cutlass::cutlass>) + target_link_libraries(faiss_c PUBLIC CUDA::cudart CUDA::cublas $<$:cuvs::cuvs>) endif() add_executable(example_gpu_c EXCLUDE_FROM_ALL example_gpu_c.c) diff --git a/cmake/thirdparty/fetch_rapids.cmake b/cmake/thirdparty/fetch_rapids.cmake index 5e40865586..8d99161071 100644 --- a/cmake/thirdparty/fetch_rapids.cmake +++ b/cmake/thirdparty/fetch_rapids.cmake @@ -3,7 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. # ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -15,7 +15,7 @@ # or implied. See the License for the specific language governing permissions and limitations under # the License. # ============================================================================= -set(RAPIDS_VERSION "24.06") +set(RAPIDS_VERSION "24.08") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/FAISS_RAPIDS.cmake) file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake diff --git a/conda/faiss-gpu-raft/build-lib.sh b/conda/faiss-gpu-cuvs/build-lib.sh similarity index 95% rename from conda/faiss-gpu-raft/build-lib.sh rename to conda/faiss-gpu-cuvs/build-lib.sh index 78a7f87eae..37f0381809 100644 --- a/conda/faiss-gpu-raft/build-lib.sh +++ b/conda/faiss-gpu-cuvs/build-lib.sh @@ -13,7 +13,7 @@ cmake -B _build \ -DBUILD_TESTING=OFF \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=ON \ - -DFAISS_ENABLE_RAFT=ON \ + -DFAISS_ENABLE_CUVS=ON \ -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \ -DFAISS_ENABLE_PYTHON=OFF \ -DBLA_VENDOR=Intel10_64lp \ diff --git a/conda/faiss-gpu-raft/build-pkg.sh b/conda/faiss-gpu-cuvs/build-pkg.sh similarity index 95% rename from conda/faiss-gpu-raft/build-pkg.sh rename to conda/faiss-gpu-cuvs/build-pkg.sh index 66a91bd006..09d6e6b7af 100644 --- a/conda/faiss-gpu-raft/build-pkg.sh +++ b/conda/faiss-gpu-cuvs/build-pkg.sh @@ -12,7 +12,7 @@ cmake -B _build_python_${PY_VER} \ -Dfaiss_ROOT=_libfaiss_stage/ \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=ON \ - -DFAISS_ENABLE_RAFT=ON \ + -DFAISS_ENABLE_CUVS=ON \ -DCMAKE_BUILD_TYPE=Release \ -DPython_EXECUTABLE=$PYTHON \ faiss/python diff --git a/conda/faiss-gpu-raft/meta.yaml b/conda/faiss-gpu-cuvs/meta.yaml similarity index 97% rename from conda/faiss-gpu-raft/meta.yaml rename to conda/faiss-gpu-cuvs/meta.yaml index 1dde8e9868..6c43d19a7c 100644 --- a/conda/faiss-gpu-raft/meta.yaml +++ b/conda/faiss-gpu-cuvs/meta.yaml @@ -38,7 +38,7 @@ outputs: script: build-lib-arm64.sh # [not x86_64] script: build-lib.bat # [win] build: - string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_raft{{ suffix }}" + string: "h{{ PKG_HASH }}_{{ number }}_cuda{{ cudatoolkit }}_cuvs{{ suffix }}" run_exports: - {{ pin_compatible('libfaiss', exact=True) }} script_env: @@ -58,7 +58,7 @@ outputs: - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] - mkl =2023 # [x86_64] - openblas # [not x86_64] - - libraft =24.06 + - libcuvs =24.08 - cuda-version {{ cuda_constraints }} run: - _openmp_mutex =4.5=2_kmp_llvm # [x86_64] @@ -66,7 +66,7 @@ outputs: - openblas # [not x86_64] - cuda-cudart {{ cuda_constraints }} - libcublas {{ libcublas_constraints }} - - libraft =24.06 + - libcuvs =24.08 - cuda-version {{ cuda_constraints }} test: requires: @@ -77,7 +77,7 @@ outputs: - conda inspect linkages -p $PREFIX $PKG_NAME # [not win] - conda inspect objects -p $PREFIX $PKG_NAME # [osx] - - name: faiss-gpu-raft + - name: faiss-gpu-cuvs script: build-pkg.sh # [x86_64 and not win and not osx] script: build-pkg-osx.sh # [x86_64 and osx] script: build-pkg-arm64.sh # [not x86_64] diff --git a/conda/faiss-gpu-raft/test_cpu_dispatch.sh b/conda/faiss-gpu-cuvs/test_cpu_dispatch.sh similarity index 100% rename from conda/faiss-gpu-raft/test_cpu_dispatch.sh rename to conda/faiss-gpu-cuvs/test_cpu_dispatch.sh diff --git a/conda/faiss-gpu/build-lib.sh b/conda/faiss-gpu/build-lib.sh index 9cb3ad468b..befad80547 100755 --- a/conda/faiss-gpu/build-lib.sh +++ b/conda/faiss-gpu/build-lib.sh @@ -19,7 +19,7 @@ cmake -B _build \ -DBUILD_TESTING=OFF \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=ON \ - -DFAISS_ENABLE_RAFT=OFF \ + -DFAISS_ENABLE_CUVS=OFF \ -DCMAKE_CUDA_ARCHITECTURES="${CUDA_ARCHS}" \ -DFAISS_ENABLE_PYTHON=OFF \ -DBLA_VENDOR=Intel10_64lp \ diff --git a/conda/faiss-gpu/build-pkg.sh b/conda/faiss-gpu/build-pkg.sh index f90ff7d38f..64afe6c13f 100755 --- a/conda/faiss-gpu/build-pkg.sh +++ b/conda/faiss-gpu/build-pkg.sh @@ -12,7 +12,7 @@ cmake -B _build_python_${PY_VER} \ -Dfaiss_ROOT=_libfaiss_stage/ \ -DFAISS_OPT_LEVEL=avx512 \ -DFAISS_ENABLE_GPU=ON \ - -DFAISS_ENABLE_RAFT=OFF \ + -DFAISS_ENABLE_CUVS=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DPython_EXECUTABLE=$PYTHON \ faiss/python diff --git a/contrib/torch_utils.py b/contrib/torch_utils.py index 6db271ec48..5568901d84 100644 --- a/contrib/torch_utils.py +++ b/contrib/torch_utils.py @@ -586,7 +586,7 @@ def torch_replacement_knn(xq, xb, k, metric=faiss.METRIC_L2, metric_arg=0): # allows torch tensor usage with bfKnn -def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRIC_L2, device=-1, use_raft=False): +def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRIC_L2, device=-1, use_cuvs=False): if type(xb) is np.ndarray: # Forward to faiss __init__.py base method return faiss.knn_gpu_numpy(res, xq, xb, k, D, I, metric, device) @@ -667,7 +667,7 @@ def torch_replacement_knn_gpu(res, xq, xb, k, D=None, I=None, metric=faiss.METRI args.outIndices = I_ptr args.outIndicesType = I_type args.device = device - args.use_raft = use_raft + args.use_cuvs = use_cuvs with using_stream(res): faiss.bfKnn(res, args) diff --git a/faiss/gpu/CMakeLists.txt b/faiss/gpu/CMakeLists.txt index 3517827750..84cb222145 100644 --- a/faiss/gpu/CMakeLists.txt +++ b/faiss/gpu/CMakeLists.txt @@ -5,7 +5,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -241,21 +241,21 @@ endfunction() generate_ivf_interleaved_code() -if(FAISS_ENABLE_RAFT) +if(FAISS_ENABLE_CUVS) list(APPEND FAISS_GPU_HEADERS GpuIndexCagra.h - impl/RaftCagra.cuh - impl/RaftFlatIndex.cuh - impl/RaftIVFFlat.cuh - impl/RaftIVFPQ.cuh - utils/RaftUtils.h) + impl/CuvsCagra.cuh + impl/CuvsFlatIndex.cuh + impl/CuvsIVFFlat.cuh + impl/CuvsIVFPQ.cuh + utils/CuvsUtils.h) list(APPEND FAISS_GPU_SRC GpuIndexCagra.cu - impl/RaftCagra.cu - impl/RaftFlatIndex.cu - impl/RaftIVFFlat.cu - impl/RaftIVFPQ.cu - utils/RaftUtils.cu) + impl/CuvsCagra.cu + impl/CuvsFlatIndex.cu + impl/CuvsIVFFlat.cu + impl/CuvsIVFPQ.cu + utils/CuvsUtils.cu) endif() add_library(faiss_gpu STATIC ${FAISS_GPU_SRC}) @@ -266,10 +266,10 @@ set_target_properties(faiss_gpu PROPERTIES target_include_directories(faiss_gpu PUBLIC $) -if(FAISS_ENABLE_RAFT) - target_compile_definitions(faiss PUBLIC USE_NVIDIA_RAFT=1) - target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_RAFT=1) - target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_RAFT=1) +if(FAISS_ENABLE_CUVS) + target_compile_definitions(faiss PUBLIC USE_NVIDIA_CUVS=1) + target_compile_definitions(faiss_avx2 PUBLIC USE_NVIDIA_CUVS=1) + target_compile_definitions(faiss_avx512 PUBLIC USE_NVIDIA_CUVS=1) # Mark all functions as hidden so that we don't generate # global 'public' functions that also exist in libraft.so @@ -286,13 +286,13 @@ if(FAISS_ENABLE_RAFT) set_source_files_properties( GpuDistance.cu StandardGpuResources.cpp - impl/RaftFlatIndex.cu - impl/RaftIVFFlat.cu - impl/RaftIVFPQ.cu - utils/RaftUtils.cu + impl/CuvsFlatIndex.cu + impl/CuvsIVFFlat.cu + impl/CuvsIVFPQ.cu + utils/CuvsUtils.cu TARGET_DIRECTORY faiss PROPERTIES COMPILE_OPTIONS "-fvisibility=hidden") - target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_RAFT=1) + target_compile_definitions(faiss_gpu PUBLIC USE_NVIDIA_CUVS=1) endif() if (FAISS_ENABLE_ROCM) @@ -333,14 +333,11 @@ else() ) target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") + find_package(CUDAToolkit REQUIRED) - target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas - $<$:raft::raft> - $<$:raft::compiled> - $<$:nvidia::cutlass::cutlass> - $<$:OpenMP::OpenMP_CXX>) + target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$:cuvs::cuvs> $<$:OpenMP::OpenMP_CXX>) target_compile_options(faiss_gpu PRIVATE $<$:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr - $<$:-Xcompiler=${OpenMP_CXX_FLAGS}>>) + $<$:-Xcompiler=${OpenMP_CXX_FLAGS}>>) endif() diff --git a/faiss/gpu/GpuCloner.cpp b/faiss/gpu/GpuCloner.cpp index a5a57da770..575ee2e0a5 100644 --- a/faiss/gpu/GpuCloner.cpp +++ b/faiss/gpu/GpuCloner.cpp @@ -14,7 +14,7 @@ #include #include -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS #include #endif #include @@ -27,7 +27,7 @@ #include #include #include -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS #include #endif #include @@ -92,7 +92,7 @@ Index* ToCPUCloner::clone_Index(const Index* index) { // (inverse op of ToGpuClonerMultiple) } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS else if (auto icg = dynamic_cast(index)) { IndexHNSWCagra* res = new IndexHNSWCagra(); icg->copyTo(res); @@ -138,7 +138,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { GpuIndexFlatConfig config; config.device = device; config.useFloat16 = useFloat16; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; return new GpuIndexFlat(provider, ifl, config); } else if ( dynamic_cast(index) && @@ -148,7 +148,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { config.device = device; config.useFloat16 = true; FAISS_THROW_IF_NOT_MSG( - !use_raft, "this type of index is not implemented for RAFT"); + !use_cuvs, "this type of index is not implemented for cuVS"); GpuIndexFlat* gif = new GpuIndexFlat( provider, index->d, index->metric_type, config); // transfer data by blocks @@ -166,7 +166,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { config.device = device; config.indicesOptions = indicesOptions; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer; GpuIndexIVFFlat* res = new GpuIndexIVFFlat( @@ -185,7 +185,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { config.indicesOptions = indicesOptions; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; FAISS_THROW_IF_NOT_MSG( - !use_raft, "this type of index is not implemented for RAFT"); + !use_cuvs, "this type of index is not implemented for cuVS"); GpuIndexIVFScalarQuantizer* res = new GpuIndexIVFScalarQuantizer( provider, @@ -218,8 +218,8 @@ Index* ToGpuCloner::clone_Index(const Index* index) { config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; config.useFloat16LookupTables = useFloat16; config.usePrecomputedTables = usePrecomputed; - config.use_raft = use_raft; - config.interleavedLayout = use_raft; + config.use_cuvs = use_cuvs; + config.interleavedLayout = use_cuvs; config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer; GpuIndexIVFPQ* res = new GpuIndexIVFPQ(provider, ipq, config); @@ -230,7 +230,7 @@ Index* ToGpuCloner::clone_Index(const Index* index) { return res; } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS else if (auto icg = dynamic_cast(index)) { GpuIndexCagraConfig config; config.device = device; @@ -541,7 +541,7 @@ faiss::IndexBinary* index_binary_cpu_to_gpu( GpuIndexBinaryFlatConfig config; config.device = device; if (options) { - config.use_raft = options->use_raft; + config.use_cuvs = options->use_cuvs; } return new GpuIndexBinaryFlat(provider, ii, config); } else { diff --git a/faiss/gpu/GpuClonerOptions.h b/faiss/gpu/GpuClonerOptions.h index f3788fd170..e524ca6829 100644 --- a/faiss/gpu/GpuClonerOptions.h +++ b/faiss/gpu/GpuClonerOptions.h @@ -37,11 +37,11 @@ struct GpuClonerOptions { /// Set verbose options on the index bool verbose = false; - /// use the RAFT implementation -#if defined USE_NVIDIA_RAFT - bool use_raft = true; + /// use the cuVS implementation +#if defined USE_NVIDIA_CUVS + bool use_cuvs = true; #else - bool use_raft = false; + bool use_cuvs = false; #endif /// This flag controls the CPU fallback logic for coarse quantizer diff --git a/faiss/gpu/GpuDistance.cu b/faiss/gpu/GpuDistance.cu index d5082bbee7..f515067889 100644 --- a/faiss/gpu/GpuDistance.cu +++ b/faiss/gpu/GpuDistance.cu @@ -30,9 +30,11 @@ #include #include #include +#include -#if defined USE_NVIDIA_RAFT -#include +#if defined USE_NVIDIA_CUVS +#include +#include #include #include #include @@ -41,32 +43,19 @@ #include #include #include -#define RAFT_NAME "raft" #endif namespace faiss { namespace gpu { -#if defined USE_NVIDIA_RAFT -using namespace raft::distance; -using namespace raft::neighbors; -#endif - -/// Caches device major version -int device_major_version = -1; - -bool should_use_raft(GpuDistanceParams args) { - if (device_major_version < 0) { - cudaDeviceProp prop; - int dev = args.device >= 0 ? args.device : getCurrentDevice(); - cudaGetDeviceProperties(&prop, dev); - device_major_version = prop.major; - } +bool should_use_cuvs(GpuDistanceParams args) { + int dev = args.device >= 0 ? args.device : getCurrentDevice(); + auto prop = getDeviceProperties(dev); - if (device_major_version < 7) + if (prop.major < 7) return false; - return args.use_raft; + return args.use_cuvs; } template @@ -244,13 +233,33 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) { "limitation: both vectorType and queryType must currently " "be the same (F32 or F16"); -#if defined USE_NVIDIA_RAFT - // Note: For now, RAFT bfknn requires queries and vectors to be same layout - if (should_use_raft(args) && args.queriesRowMajor == args.vectorsRowMajor) { - DistanceType distance = metricFaissToRaft(args.metric, false); +#if defined USE_NVIDIA_CUVS + // Note: For now, cuVS bfknn requires queries and vectors to be same layout + if (should_use_cuvs(args) && args.queriesRowMajor == args.vectorsRowMajor && + args.outIndicesType == IndicesDataType::I64 && + args.vectorType == DistanceDataType::F32 && args.k > 0) { + cuvsDistanceType distance = metricFaissToCuvs(args.metric, false); auto resImpl = prov->getResources(); auto res = resImpl.get(); + // If the user specified a device, then ensure that it is currently set + int device = -1; + if (args.device == -1) { + // Original behavior if no device is specified, use the current CUDA + // thread local device + device = getCurrentDevice(); + } else { + // Otherwise, use the device specified in `args` + device = args.device; + + FAISS_THROW_IF_NOT_FMT( + device >= 0 && device < getNumDevices(), + "bfKnn: device specified must be -1 (current CUDA thread local device) " + "or within the range [0, %d)", + getNumDevices()); + } + + DeviceScope scope(device); raft::device_resources& handle = res->getRaftHandleCurrentDevice(); auto stream = res->getDefaultStreamCurrentDevice(); @@ -306,10 +315,16 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) { raft::vector_extent(num_queries)); norms_view = norms->view(); } - raft::neighbors::brute_force::index idx( + + cuvs::neighbors::brute_force::index idx( handle, index.view(), norms_view, distance, metric_arg); - raft::neighbors::brute_force::search( - handle, idx, search.view(), inds.view(), dists.view()); + cuvs::neighbors::brute_force::search( + handle, + idx, + search.view(), + inds.view(), + dists.view(), + std::nullopt); } else { auto index = raft::make_readonly_temporary_device_buffer< const float, @@ -329,20 +344,31 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) { reinterpret_cast(args.queries)), raft::matrix_extent(num_queries, dims)); - std::vector> - index_vec = {index.view()}; + raft::vector_extent>> + norms; + std::optional> + norms_view; + if (args.vectorNorms) { + norms = raft::make_readonly_temporary_device_buffer< + const float, + int64_t>( + handle, + args.vectorNorms, + raft::vector_extent(num_queries)); + norms_view = norms->view(); + } - brute_force::knn( + cuvs::neighbors::brute_force::index idx( + handle, index.view(), norms_view, distance, metric_arg); + cuvs::neighbors::brute_force::search( handle, - index_vec, + idx, search.view(), inds.view(), dists.view(), - distance, - metric_arg); + std::nullopt); } if (args.metric == MetricType::METRIC_Lp) { @@ -364,10 +390,10 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) { handle.sync_stream(); } else #else - if (should_use_raft(args)) { + if (should_use_cuvs(args)) { FAISS_THROW_IF_NOT_MSG( - !should_use_raft(args), - "RAFT has not been compiled into the current version so it cannot be used."); + !should_use_cuvs(args), + "cuVS has not been compiled into the current version so it cannot be used."); } else #endif if (args.vectorType == DistanceDataType::F32) { diff --git a/faiss/gpu/GpuDistance.h b/faiss/gpu/GpuDistance.h index 54777058a6..7052fc68b0 100644 --- a/faiss/gpu/GpuDistance.h +++ b/faiss/gpu/GpuDistance.h @@ -106,14 +106,17 @@ struct GpuDistanceParams { /// execution int device = -1; - /// Should the index dispatch down to RAFT? - /// TODO: change default to true if RAFT is enabled - bool use_raft = false; + /// Should the index dispatch down to cuVS? +#if defined USE_NVIDIA_CUVS + bool use_cuvs = true; +#else + bool use_cuvs = false; +#endif }; -/// A function that determines whether RAFT should be used based on various +/// A function that determines whether cuVS should be used based on various /// conditions (such as unsupported architecture) -bool should_use_raft(GpuDistanceParams args); +bool should_use_cuvs(GpuDistanceParams args); /// A wrapper for gpu/impl/Distance.cuh to expose direct brute-force k-nearest /// neighbor searches on an externally-provided region of memory (e.g., from a diff --git a/faiss/gpu/GpuIndex.cu b/faiss/gpu/GpuIndex.cu index 0740ad4dab..033cb9aaa9 100644 --- a/faiss/gpu/GpuIndex.cu +++ b/faiss/gpu/GpuIndex.cu @@ -42,20 +42,13 @@ constexpr idx_t kAddVecSize = (idx_t)512 * 1024; // FIXME: parameterize based on algorithm need constexpr idx_t kSearchVecSize = (idx_t)32 * 1024; -/// Caches device major version -extern int device_major_version; - -bool should_use_raft(GpuIndexConfig config_) { - if (device_major_version < 0) { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, config_.device); - device_major_version = prop.major; - } +bool should_use_cuvs(GpuIndexConfig config_) { + auto prop = getDeviceProperties(config_.device); - if (device_major_version < 7) + if (prop.major < 7) return false; - return config_.use_raft; + return config_.use_cuvs; } GpuIndex::GpuIndex( @@ -148,7 +141,7 @@ void GpuIndex::addPaged_(idx_t n, const float* x, const idx_t* ids) { if (n > 0) { idx_t totalSize = n * this->d * sizeof(float); - if (!should_use_raft(config_) && + if (!should_use_cuvs(config_) && (totalSize > kAddPageSize || n > kAddVecSize)) { // How many vectors fit into kAddPageSize? idx_t maxNumVecsForPageSize = @@ -540,8 +533,8 @@ extern std::string gpu_compile_options; struct InitGpuCompileOptions { InitGpuCompileOptions() { gpu_compile_options = "GPU "; -#ifdef USE_NVIDIA_RAFT - gpu_compile_options += "NVIDIA_RAFT "; +#ifdef USE_NVIDIA_CUVS + gpu_compile_options += "NVIDIA_CUVS "; #endif #ifdef USE_AMD_ROCM diff --git a/faiss/gpu/GpuIndex.h b/faiss/gpu/GpuIndex.h index 4098e6a101..48bb7a17a3 100644 --- a/faiss/gpu/GpuIndex.h +++ b/faiss/gpu/GpuIndex.h @@ -38,17 +38,17 @@ struct GpuIndexConfig { /// more memory than is available on the GPU. MemorySpace memorySpace = MemorySpace::Device; - /// Should the index dispatch down to RAFT? -#if defined USE_NVIDIA_RAFT - bool use_raft = true; + /// Should the index dispatch down to cuVS? +#if defined USE_NVIDIA_CUVS + bool use_cuvs = true; #else - bool use_raft = false; + bool use_cuvs = false; #endif }; -/// A centralized function that determines whether RAFT should +/// A centralized function that determines whether cuVS should /// be used based on various conditions (such as unsupported architecture) -bool should_use_raft(GpuIndexConfig config_); +bool should_use_cuvs(GpuIndexConfig config_); class GpuIndex : public faiss::Index { public: diff --git a/faiss/gpu/GpuIndexCagra.cu b/faiss/gpu/GpuIndexCagra.cu index fb90758f78..fe0c82b8aa 100644 --- a/faiss/gpu/GpuIndexCagra.cu +++ b/faiss/gpu/GpuIndexCagra.cu @@ -23,8 +23,9 @@ #include #include +#include #include -#include +#include #include namespace faiss { @@ -41,6 +42,7 @@ GpuIndexCagra::GpuIndexCagra( } void GpuIndexCagra::train(idx_t n, const float* x) { + DeviceScope scope(config_.device); if (this->is_trained) { FAISS_ASSERT(index_); return; @@ -48,13 +50,13 @@ void GpuIndexCagra::train(idx_t n, const float* x) { FAISS_ASSERT(!index_); - std::optional ivf_pq_params = + std::optional ivf_pq_params = std::nullopt; - std::optional ivf_pq_search_params = + std::optional ivf_pq_search_params = std::nullopt; if (cagraConfig_.ivf_pq_params != nullptr) { ivf_pq_params = - std::make_optional(); + std::make_optional(); ivf_pq_params->n_lists = cagraConfig_.ivf_pq_params->n_lists; ivf_pq_params->kmeans_n_iters = cagraConfig_.ivf_pq_params->kmeans_n_iters; @@ -63,7 +65,7 @@ void GpuIndexCagra::train(idx_t n, const float* x) { ivf_pq_params->pq_bits = cagraConfig_.ivf_pq_params->pq_bits; ivf_pq_params->pq_dim = cagraConfig_.ivf_pq_params->pq_dim; ivf_pq_params->codebook_kind = - static_cast( + static_cast( cagraConfig_.ivf_pq_params->codebook_kind); ivf_pq_params->force_random_rotation = cagraConfig_.ivf_pq_params->force_random_rotation; @@ -72,7 +74,7 @@ void GpuIndexCagra::train(idx_t n, const float* x) { } if (cagraConfig_.ivf_pq_search_params != nullptr) { ivf_pq_search_params = - std::make_optional(); + std::make_optional(); ivf_pq_search_params->n_probes = cagraConfig_.ivf_pq_search_params->n_probes; ivf_pq_search_params->lut_dtype = @@ -80,18 +82,20 @@ void GpuIndexCagra::train(idx_t n, const float* x) { ivf_pq_search_params->preferred_shmem_carveout = cagraConfig_.ivf_pq_search_params->preferred_shmem_carveout; } - index_ = std::make_shared( + index_ = std::make_shared( this->resources_.get(), this->d, cagraConfig_.intermediate_graph_degree, cagraConfig_.graph_degree, static_cast(cagraConfig_.build_algo), cagraConfig_.nn_descent_niter, + cagraConfig_.store_dataset, this->metric_type, this->metric_arg, INDICES_64_BIT, ivf_pq_params, - ivf_pq_search_params); + ivf_pq_search_params, + cagraConfig_.refine_rate); index_->train(n, x); @@ -180,7 +184,7 @@ void GpuIndexCagra::copyFrom(const faiss::IndexHNSWCagra* index) { } } - index_ = std::make_shared( + index_ = std::make_shared( this->resources_.get(), this->d, index->ntotal, @@ -226,17 +230,33 @@ void GpuIndexCagra::copyTo(faiss::IndexHNSWCagra* index) const { index->hnsw.set_default_probas(M, 1.0 / log(M)); auto n_train = this->ntotal; - auto train_dataset = index_->get_training_dataset(); + float* train_dataset; + auto dataset = index_->get_training_dataset(); + bool allocation = false; + if (getDeviceForAddress(dataset) >= 0) { + train_dataset = new float[n_train * index->d]; + allocation = true; + raft::copy( + train_dataset, + dataset, + n_train * index->d, + this->resources_->getRaftHandleCurrentDevice().get_stream()); + } else { + train_dataset = const_cast(dataset); + } // turn off as level 0 is copied from CAGRA graph index->init_level0 = false; if (!index->base_level_only) { - index->add(n_train, train_dataset.data()); + index->add(n_train, train_dataset); } else { index->hnsw.prepare_level_tab(n_train, false); - index->storage->add(n_train, train_dataset.data()); + index->storage->add(n_train, train_dataset); index->ntotal = n_train; } + if (allocation) { + delete[] train_dataset; + } auto graph = get_knngraph(); diff --git a/faiss/gpu/GpuIndexCagra.h b/faiss/gpu/GpuIndexCagra.h index 9c39e6a95a..d6fae29b58 100644 --- a/faiss/gpu/GpuIndexCagra.h +++ b/faiss/gpu/GpuIndexCagra.h @@ -34,12 +34,12 @@ struct IndexHNSWCagra; namespace faiss { namespace gpu { -class RaftCagra; +class CuvsCagra; enum class graph_build_algo { /// Use IVF-PQ to build all-neighbors knn graph IVF_PQ, - /// Experimental, use NN-Descent to build all-neighbors knn graph + /// Use NN-Descent to build all-neighbors knn graph NN_DESCENT }; @@ -175,6 +175,8 @@ struct GpuIndexCagraConfig : public GpuIndexConfig { IVFPQBuildCagraConfig* ivf_pq_params = nullptr; IVFPQSearchCagraConfig* ivf_pq_search_params = nullptr; + float refine_rate = 2.0f; + bool store_dataset = true; }; enum class search_algo { @@ -276,7 +278,7 @@ struct GpuIndexCagra : public GpuIndex { const GpuIndexCagraConfig cagraConfig_; /// Instance that we own; contains the inverted lists - std::shared_ptr index_; + std::shared_ptr index_; }; } // namespace gpu diff --git a/faiss/gpu/GpuIndexFlat.cu b/faiss/gpu/GpuIndexFlat.cu index baa27d6e85..eb87e082e9 100644 --- a/faiss/gpu/GpuIndexFlat.cu +++ b/faiss/gpu/GpuIndexFlat.cu @@ -18,8 +18,8 @@ #include #include -#if defined USE_NVIDIA_RAFT -#include +#if defined USE_NVIDIA_CUVS +#include #endif namespace faiss { @@ -93,19 +93,19 @@ GpuIndexFlat::GpuIndexFlat( GpuIndexFlat::~GpuIndexFlat() {} void GpuIndexFlat::resetIndex_(int dims) { -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS - if (should_use_raft(config_)) { - data_.reset(new RaftFlatIndex( + if (should_use_cuvs(config_)) { + data_.reset(new CuvsFlatIndex( resources_.get(), dims, flatConfig_.useFloat16, config_.memorySpace)); } else #else - if (should_use_raft(config_)) { + if (should_use_cuvs(config_)) { FAISS_THROW_MSG( - "RAFT has not been compiled into the current version so it cannot be used."); + "cuVS has not been compiled into the current version so it cannot be used."); } else #endif { diff --git a/faiss/gpu/GpuIndexIVF.cu b/faiss/gpu/GpuIndexIVF.cu index 846bc197e4..a882d8a3d2 100644 --- a/faiss/gpu/GpuIndexIVF.cu +++ b/faiss/gpu/GpuIndexIVF.cu @@ -92,7 +92,7 @@ void GpuIndexIVF::init_() { GpuIndexFlatConfig config = ivfConfig_.flatConfig; // inherit our same device config.device = config_.device; - config.use_raft = config_.use_raft; + config.use_cuvs = config_.use_cuvs; if (metric_type == faiss::METRIC_L2) { quantizer = new GpuIndexFlatL2(resources_, d, config); diff --git a/faiss/gpu/GpuIndexIVFFlat.cu b/faiss/gpu/GpuIndexIVFFlat.cu index 0d8396b193..ceeb2dda76 100644 --- a/faiss/gpu/GpuIndexIVFFlat.cu +++ b/faiss/gpu/GpuIndexIVFFlat.cu @@ -15,10 +15,10 @@ #include #include -#if defined USE_NVIDIA_RAFT -#include -#include -#include +#if defined USE_NVIDIA_CUVS +#include +#include +#include #endif #include @@ -73,8 +73,8 @@ GpuIndexIVFFlat::GpuIndexIVFFlat( ivfFlatConfig_(config), reserveMemoryVecs_(0) { FAISS_THROW_IF_NOT_MSG( - !should_use_raft(config), - "GpuIndexIVFFlat: RAFT does not support separate coarseQuantizer"); + !should_use_cuvs(config), + "GpuIndexIVFFlat: cuVS does not support separate coarseQuantizer"); // We could have been passed an already trained coarse quantizer. There is // no other quantizer that we need to train, so this is sufficient if (this->is_trained) { @@ -100,9 +100,9 @@ GpuIndexIVFFlat::~GpuIndexIVFFlat() {} void GpuIndexIVFFlat::reserveMemory(size_t numVecs) { DeviceScope scope(config_.device); - if (should_use_raft(config_)) { + if (should_use_cuvs(config_)) { FAISS_THROW_MSG( - "Pre-allocation of IVF lists is not supported with RAFT enabled."); + "Pre-allocation of IVF lists is not supported with cuVS enabled."); } reserveMemoryVecs_ = numVecs; @@ -120,8 +120,8 @@ void GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) { // Clear out our old data index_.reset(); - // skip base class allocations if RAFT is enabled - if (!should_use_raft(config_)) { + // skip base class allocations if cuVS is enabled + if (!should_use_cuvs(config_)) { baseIndex_.reset(); } @@ -213,12 +213,12 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) { if (this->is_trained) { FAISS_ASSERT(index_); - if (should_use_raft(config_)) { - // if RAFT is enabled, copy the IVF centroids to the RAFT index in - // case it has been reset. This is because reset clears the RAFT - // index and its centroids. + if (should_use_cuvs(config_)) { + // copy the IVF centroids to the cuVS index + // in case it has been reset. This is because `reset` clears the + // cuVS index and its centroids. // TODO: change this once the coarse quantizer is separated from - // RAFT index + // cuVS index updateQuantizer(); }; return; @@ -226,8 +226,8 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) { FAISS_ASSERT(!index_); - if (should_use_raft(config_)) { -#if defined USE_NVIDIA_RAFT + if (should_use_cuvs(config_)) { +#if defined USE_NVIDIA_CUVS setIndex_( resources_.get(), this->d, @@ -242,30 +242,43 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) { const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - raft::neighbors::ivf_flat::index_params raft_idx_params; - raft_idx_params.n_lists = nlist; - raft_idx_params.metric = metricFaissToRaft(metric_type, false); - raft_idx_params.add_data_on_build = false; - raft_idx_params.kmeans_trainset_fraction = + cuvs::neighbors::ivf_flat::index_params cuvs_index_params; + cuvs_index_params.n_lists = nlist; + cuvs_index_params.metric = metricFaissToCuvs(metric_type, false); + cuvs_index_params.add_data_on_build = false; + cuvs_index_params.kmeans_trainset_fraction = static_cast(cp.max_points_per_centroid * nlist) / static_cast(n); - raft_idx_params.kmeans_n_iters = cp.niter; - - auto raftIndex_ = - std::static_pointer_cast(index_); - - raft::neighbors::ivf_flat::index raft_ivfflat_index = - raft::neighbors::ivf_flat::build( - raft_handle, raft_idx_params, x, n, (idx_t)d); + cuvs_index_params.kmeans_n_iters = cp.niter; + + auto cuvsIndex_ = + std::static_pointer_cast(index_); + + std::optional> + cuvs_ivfflat_index; + + if (getDeviceForAddress(x) >= 0) { + auto dataset_d = + raft::make_device_matrix_view(x, n, d); + cuvs_ivfflat_index = cuvs::neighbors::ivf_flat::build( + raft_handle, cuvs_index_params, dataset_d); + } else { + auto dataset_h = + raft::make_host_matrix_view(x, n, d); + cuvs_ivfflat_index = cuvs::neighbors::ivf_flat::build( + raft_handle, cuvs_index_params, dataset_h); + } - quantizer->train(nlist, raft_ivfflat_index.centers().data_handle()); - quantizer->add(nlist, raft_ivfflat_index.centers().data_handle()); + quantizer->train( + nlist, cuvs_ivfflat_index.value().centers().data_handle()); + quantizer->add( + nlist, cuvs_ivfflat_index.value().centers().data_handle()); raft_handle.sync_stream(); - raftIndex_->setRaftIndex(std::move(raft_ivfflat_index)); + cuvsIndex_->setCuvsIndex(std::move(*cuvs_ivfflat_index)); #else FAISS_THROW_MSG( - "RAFT has not been compiled into the current version so it cannot be used."); + "cuVS has not been compiled into the current version so it cannot be used."); #endif } else { // FIXME: GPUize more of this @@ -295,9 +308,9 @@ void GpuIndexIVFFlat::train(idx_t n, const float* x) { baseIndex_ = std::static_pointer_cast(index_); if (reserveMemoryVecs_) { - if (should_use_raft(config_)) { + if (should_use_cuvs(config_)) { FAISS_THROW_MSG( - "Pre-allocation of IVF lists is not supported with RAFT enabled."); + "Pre-allocation of IVF lists is not supported with cuVS enabled."); } else index_->reserveMemory(reserveMemoryVecs_); } @@ -317,16 +330,16 @@ void GpuIndexIVFFlat::setIndex_( bool interleavedLayout, IndicesOptions indicesOptions, MemorySpace space) { - if (should_use_raft(config_)) { -#if defined USE_NVIDIA_RAFT + if (should_use_cuvs(config_)) { +#if defined USE_NVIDIA_CUVS FAISS_THROW_IF_NOT_MSG( ivfFlatConfig_.indicesOptions == INDICES_64_BIT, - "RAFT only supports INDICES_64_BIT"); + "cuVS only supports INDICES_64_BIT"); if (!ivfFlatConfig_.interleavedLayout) { fprintf(stderr, - "WARN: interleavedLayout is set to False with RAFT enabled. This will be ignored.\n"); + "WARN: interleavedLayout is set to False with cuVS enabled. This will be ignored.\n"); } - index_.reset(new RaftIVFFlat( + index_.reset(new CuvsIVFFlat( resources, dim, nlist, @@ -339,7 +352,7 @@ void GpuIndexIVFFlat::setIndex_( space)); #else FAISS_THROW_MSG( - "RAFT has not been compiled into the current version so it cannot be used."); + "cuVS has not been compiled into the current version so it cannot be used."); #endif } else { index_.reset(new IVFFlat( diff --git a/faiss/gpu/GpuIndexIVFPQ.cu b/faiss/gpu/GpuIndexIVFPQ.cu index 27cc7ecc9e..da0e5ac8f3 100644 --- a/faiss/gpu/GpuIndexIVFPQ.cu +++ b/faiss/gpu/GpuIndexIVFPQ.cu @@ -15,11 +15,10 @@ #include #include -#if defined USE_NVIDIA_RAFT -#include -#include -#include -#include +#if defined USE_NVIDIA_CUVS +#include +#include +#include #endif #include @@ -95,8 +94,8 @@ GpuIndexIVFPQ::GpuIndexIVFPQ( this->is_trained = false; FAISS_THROW_IF_NOT_MSG( - !config.use_raft, - "GpuIndexIVFPQ: RAFT does not support separate coarseQuantizer"); + !config.use_cuvs, + "GpuIndexIVFPQ: cuVS does not support separate coarseQuantizer"); verifyPQSettings_(); } @@ -112,8 +111,8 @@ void GpuIndexIVFPQ::copyFrom(const faiss::IndexIVFPQ* index) { // Clear out our old data index_.reset(); - // skip base class allocations if RAFT is enabled - if (!should_use_raft(config_)) { + // skip base class allocations if cuVS is enabled + if (!should_use_cuvs(config_)) { baseIndex_.reset(); } @@ -323,7 +322,7 @@ void GpuIndexIVFPQ::trainResidualQuantizer_(idx_t n, const float* x) { try { GpuIndexFlatConfig config; config.device = ivfpqConfig_.device; - config.use_raft = false; + config.use_cuvs = false; GpuIndexFlatL2 pqIndex(resources_, pq.dsub, config); pq.assign_index = &pqIndex; @@ -349,12 +348,12 @@ void GpuIndexIVFPQ::train(idx_t n, const float* x) { if (this->is_trained) { FAISS_ASSERT(index_); - if (should_use_raft(config_)) { - // if RAFT is enabled, copy the IVF centroids to the RAFT index in - // case it has been reset. This is because reset clears the RAFT + if (should_use_cuvs(config_)) { + // if cuVS is enabled, copy the IVF centroids to the cuVS index in + // case it has been reset. This is because reset clears the cuVS // index and its centroids. // TODO: change this once the coarse quantizer is separated from - // RAFT index + // cuVS index updateQuantizer(); }; return; @@ -362,13 +361,13 @@ void GpuIndexIVFPQ::train(idx_t n, const float* x) { FAISS_ASSERT(!index_); - // RAFT does not support using an external index for assignment. Fall back + // cuVS does not support using an external index for assignment. Fall back // to the classical GPU impl - if (should_use_raft(config_)) { -#if defined USE_NVIDIA_RAFT + if (should_use_cuvs(config_)) { +#if defined USE_NVIDIA_CUVS if (pq.assign_index) { fprintf(stderr, - "WARN: The Product Quantizer's assign_index will be ignored with RAFT enabled.\n"); + "WARN: The Product Quantizer's assign_index will be ignored with cuVS enabled.\n"); } // first initialize the index. The PQ centroids will be updated // retroactively. @@ -390,44 +389,54 @@ void GpuIndexIVFPQ::train(idx_t n, const float* x) { const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - raft::neighbors::ivf_pq::index_params raft_idx_params; - raft_idx_params.n_lists = nlist; - raft_idx_params.metric = metricFaissToRaft(metric_type, false); - raft_idx_params.kmeans_trainset_fraction = + cuvs::neighbors::ivf_pq::index_params cuvs_index_params; + cuvs_index_params.n_lists = nlist; + cuvs_index_params.metric = metricFaissToCuvs(metric_type, false); + cuvs_index_params.kmeans_trainset_fraction = static_cast(cp.max_points_per_centroid * nlist) / static_cast(n); - raft_idx_params.kmeans_n_iters = cp.niter; - raft_idx_params.pq_bits = bitsPerCode_; - raft_idx_params.pq_dim = subQuantizers_; - raft_idx_params.conservative_memory_allocation = false; - raft_idx_params.add_data_on_build = false; + cuvs_index_params.kmeans_n_iters = cp.niter; + cuvs_index_params.pq_bits = bitsPerCode_; + cuvs_index_params.pq_dim = subQuantizers_; + cuvs_index_params.conservative_memory_allocation = false; + cuvs_index_params.add_data_on_build = false; - auto raftIndex_ = std::static_pointer_cast(index_); + auto cuvsIndex_ = std::static_pointer_cast(index_); - raft::neighbors::ivf_pq::index raft_ivfpq_index = - raft::neighbors::ivf_pq::build( - raft_handle, raft_idx_params, x, n, (idx_t)d); + std::optional> cuvs_ivfpq_index; - auto raft_centers = raft::make_device_matrix( + if (getDeviceForAddress(x) >= 0) { + auto dataset_d = + raft::make_device_matrix_view(x, n, d); + cuvs_ivfpq_index = cuvs::neighbors::ivf_pq::build( + raft_handle, cuvs_index_params, dataset_d); + } else { + auto dataset_h = + raft::make_host_matrix_view(x, n, d); + cuvs_ivfpq_index = cuvs::neighbors::ivf_pq::build( + raft_handle, cuvs_index_params, dataset_h); + } + + auto cluster_centers = raft::make_device_matrix( raft_handle, - raft_ivfpq_index.n_lists(), - raft_ivfpq_index.dim()); - raft::neighbors::ivf_pq::helpers::extract_centers( - raft_handle, raft_ivfpq_index, raft_centers.view()); + cuvs_ivfpq_index.value().n_lists(), + cuvs_ivfpq_index.value().dim()); + cuvs::neighbors::ivf_pq::helpers::extract_centers( + raft_handle, cuvs_ivfpq_index.value(), cluster_centers.view()); - quantizer->train(nlist, raft_centers.data_handle()); - quantizer->add(nlist, raft_centers.data_handle()); + quantizer->train(nlist, cluster_centers.data_handle()); + quantizer->add(nlist, cluster_centers.data_handle()); raft::copy( pq.get_centroids(0, 0), - raft_ivfpq_index.pq_centers().data_handle(), - raft_ivfpq_index.pq_centers().size(), + cuvs_ivfpq_index.value().pq_centers().data_handle(), + cuvs_ivfpq_index.value().pq_centers().size(), raft_handle.get_stream()); raft_handle.sync_stream(); - raftIndex_->setRaftIndex(std::move(raft_ivfpq_index)); + cuvsIndex_->setCuvsIndex(std::move(*cuvs_ivfpq_index)); #else FAISS_THROW_MSG( - "RAFT has not been compiled into the current version so it cannot be used."); + "cuVS has not been compiled into the current version so it cannot be used."); #endif } else { // FIXME: GPUize more of this @@ -484,9 +493,9 @@ void GpuIndexIVFPQ::setIndex_( float* pqCentroidData, IndicesOptions indicesOptions, MemorySpace space) { - if (should_use_raft(config_)) { -#if defined USE_NVIDIA_RAFT - index_.reset(new RaftIVFPQ( + if (should_use_cuvs(config_)) { +#if defined USE_NVIDIA_CUVS + index_.reset(new CuvsIVFPQ( resources, dim, nlist, @@ -502,7 +511,7 @@ void GpuIndexIVFPQ::setIndex_( space)); #else FAISS_THROW_MSG( - "RAFT has not been compiled into the current version so it cannot be used."); + "cuVS has not been compiled into the current version so it cannot be used."); #endif } else { index_.reset(new IVFPQ( @@ -529,10 +538,10 @@ void GpuIndexIVFPQ::verifyPQSettings_() const { FAISS_THROW_IF_NOT_MSG(nlist > 0, "nlist must be >0"); // up to a single byte per code - if (should_use_raft(config_)) { + if (should_use_cuvs(config_)) { if (!ivfpqConfig_.interleavedLayout) { fprintf(stderr, - "WARN: interleavedLayout is set to False with RAFT enabled. This will be ignored.\n"); + "WARN: interleavedLayout is set to False with cuVS enabled. This will be ignored.\n"); } FAISS_THROW_IF_NOT_FMT( bitsPerCode_ >= 4 && bitsPerCode_ <= 8, @@ -567,7 +576,7 @@ void GpuIndexIVFPQ::verifyPQSettings_() const { "is not supported", subQuantizers_); - if (!should_use_raft(config_)) { + if (!should_use_cuvs(config_)) { // Sub-quantizers must evenly divide dimensions available FAISS_THROW_IF_NOT_FMT( this->d % subQuantizers_ == 0, diff --git a/faiss/gpu/GpuIndexIVFPQ.h b/faiss/gpu/GpuIndexIVFPQ.h index 06b0cad274..072a0d81d5 100644 --- a/faiss/gpu/GpuIndexIVFPQ.h +++ b/faiss/gpu/GpuIndexIVFPQ.h @@ -34,7 +34,7 @@ struct GpuIndexIVFPQConfig : public GpuIndexIVFConfig { /// Use the alternative memory layout for the IVF lists /// WARNING: this is a feature under development, and is only supported with - /// RAFT enabled for the index. Do not use if RAFT is not enabled. + /// cuVS enabled for the index. Do not use if cuVS is not enabled. bool interleavedLayout = false; /// Use GEMM-backed computation of PQ code distances for the no precomputed diff --git a/faiss/gpu/GpuResources.cpp b/faiss/gpu/GpuResources.cpp index 7cb5905315..1f0f2541f1 100644 --- a/faiss/gpu/GpuResources.cpp +++ b/faiss/gpu/GpuResources.cpp @@ -169,7 +169,7 @@ cudaStream_t GpuResources::getDefaultStreamCurrentDevice() { return getDefaultStream(getCurrentDevice()); } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS raft::device_resources& GpuResources::getRaftHandleCurrentDevice() { return getRaftHandle(getCurrentDevice()); } diff --git a/faiss/gpu/GpuResources.h b/faiss/gpu/GpuResources.h index 8edd8f3d81..3fec634fef 100644 --- a/faiss/gpu/GpuResources.h +++ b/faiss/gpu/GpuResources.h @@ -31,7 +31,7 @@ #include #include -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS #include #include #endif @@ -162,7 +162,7 @@ struct AllocRequest : public AllocInfo { /// The size in bytes of the allocation size_t size = 0; -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS rmm::mr::device_memory_resource* mr = nullptr; #endif }; @@ -212,7 +212,7 @@ class GpuResources { /// given device virtual cudaStream_t getDefaultStream(int device) = 0; -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS /// Returns the raft handle for the given device which can be used to /// make calls to other raft primitives. virtual raft::device_resources& getRaftHandle(int device) = 0; diff --git a/faiss/gpu/StandardGpuResources.cpp b/faiss/gpu/StandardGpuResources.cpp index 4aad6ea1f1..a91c7f693c 100644 --- a/faiss/gpu/StandardGpuResources.cpp +++ b/faiss/gpu/StandardGpuResources.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ * limitations under the License. */ -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS #include #include #include @@ -91,7 +91,7 @@ std::string allocsToString(const std::unordered_map& map) { StandardGpuResourcesImpl::StandardGpuResourcesImpl() : -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS mmr_(new rmm::mr::managed_memory_resource), pmr_(new rmm::mr::pinned_memory_resource), #endif @@ -130,6 +130,10 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() { FAISS_ASSERT_MSG( !allocError, "GPU memory allocations not properly cleaned up"); +#if defined USE_NVIDIA_CUVS + raftHandles_.clear(); +#endif + for (auto& entry : defaultStreams_) { DeviceScope scope(entry.first); @@ -159,7 +163,7 @@ StandardGpuResourcesImpl::~StandardGpuResourcesImpl() { } if (pinnedMemAlloc_) { -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS pmr_->deallocate(pinnedMemAlloc_, pinnedMemAllocSize_); #else auto err = cudaFreeHost(pinnedMemAlloc_); @@ -258,12 +262,12 @@ void StandardGpuResourcesImpl::setDefaultStream( if (prevStream != stream) { streamWait({stream}, {prevStream}); } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS // delete the raft handle for this device, which will be initialized // with the updated stream during any subsequent calls to getRaftHandle auto it2 = raftHandles_.find(device); if (it2 != raftHandles_.end()) { - raftHandles_.erase(it2); + raft::resource::set_cuda_stream(it2->second, stream); } #endif } @@ -283,15 +287,25 @@ void StandardGpuResourcesImpl::revertDefaultStream(int device) { cudaStream_t newStream = defaultStreams_[device]; streamWait({newStream}, {prevStream}); - } -#if defined USE_NVIDIA_RAFT - // delete the raft handle for this device, which will be initialized - // with the updated stream during any subsequent calls to getRaftHandle - auto it2 = raftHandles_.find(device); - if (it2 != raftHandles_.end()) { - raftHandles_.erase(it2); - } + +#if defined USE_NVIDIA_CUVS + // update the stream on the raft handle for this device + auto it2 = raftHandles_.find(device); + if (it2 != raftHandles_.end()) { + raft::resource::set_cuda_stream(it2->second, newStream); + } #endif + } else { +#if defined USE_NVIDIA_CUVS + // delete the raft handle for this device, which will be initialized + // with the updated stream during any subsequent calls to + // getRaftHandle + auto it2 = raftHandles_.find(device); + if (it2 != raftHandles_.end()) { + raftHandles_.erase(it2); + } +#endif + } } userDefaultStreams_.erase(device); @@ -324,7 +338,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) { // If this is the first device that we're initializing, create our // pinned memory allocation if (defaultStreams_.empty() && pinnedMemSize_ > 0) { -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS // If this is the first device that we're initializing, create our // pinned memory allocation if (defaultStreams_.empty() && pinnedMemSize_ > 0) { @@ -386,7 +400,7 @@ void StandardGpuResourcesImpl::initializeForDevice(int device) { defaultStreams_[device] = defaultStream; -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS raftHandles_.emplace(std::make_pair(device, defaultStream)); #endif @@ -452,7 +466,7 @@ cudaStream_t StandardGpuResourcesImpl::getDefaultStream(int device) { return defaultStreams_[device]; } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS raft::device_resources& StandardGpuResourcesImpl::getRaftHandle(int device) { initializeForDevice(device); @@ -523,7 +537,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) { // Otherwise, we can handle this locally p = tempMemory_[adjReq.device]->allocMemory(adjReq.stream, adjReq.size); } else if (adjReq.space == MemorySpace::Device) { -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS try { rmm::mr::device_memory_resource* current_mr = rmm::mr::get_per_device_resource( @@ -557,7 +571,7 @@ void* StandardGpuResourcesImpl::allocMemory(const AllocRequest& req) { } #endif } else if (adjReq.space == MemorySpace::Unified) { -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS try { // for now, use our own managed MR to do Unified Memory allocations. // TODO: change this to use the current device resource once RMM has @@ -626,7 +640,7 @@ void StandardGpuResourcesImpl::deallocMemory(int device, void* p) { } else if ( req.space == MemorySpace::Device || req.space == MemorySpace::Unified) { -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS req.mr->deallocate_async(p, req.size, req.stream); #else auto err = cudaFree(p); @@ -720,7 +734,7 @@ cudaStream_t StandardGpuResources::getDefaultStream(int device) { return res_->getDefaultStream(device); } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS raft::device_resources& StandardGpuResources::getRaftHandle(int device) { return res_->getRaftHandle(device); } diff --git a/faiss/gpu/StandardGpuResources.h b/faiss/gpu/StandardGpuResources.h index 7a48948663..322a341a00 100644 --- a/faiss/gpu/StandardGpuResources.h +++ b/faiss/gpu/StandardGpuResources.h @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ #pragma once -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS #include #include #endif @@ -80,7 +80,7 @@ class StandardGpuResourcesImpl : public GpuResources { /// this stream upon exit from an index or other Faiss GPU call. cudaStream_t getDefaultStream(int device) override; -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS /// Returns the raft handle for the given device which can be used to /// make calls to other raft primitives. raft::device_resources& getRaftHandle(int device) override; @@ -152,7 +152,7 @@ class StandardGpuResourcesImpl : public GpuResources { /// cuBLAS handle for each device std::unordered_map blasHandles_; -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS /// raft handle for each device std::unordered_map raftHandles_; @@ -235,7 +235,7 @@ class StandardGpuResources : public GpuResourcesProvider { /// Returns the current default stream cudaStream_t getDefaultStream(int device); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS /// Returns the raft handle for the given device which can be used to /// make calls to other raft primitives. raft::device_resources& getRaftHandle(int device); diff --git a/faiss/gpu/impl/RaftCagra.cu b/faiss/gpu/impl/CuvsCagra.cu similarity index 53% rename from faiss/gpu/impl/RaftCagra.cu rename to faiss/gpu/impl/CuvsCagra.cu index e0f302ae04..82e3007d59 100644 --- a/faiss/gpu/impl/RaftCagra.cu +++ b/faiss/gpu/impl/CuvsCagra.cu @@ -21,74 +21,65 @@ * limitations under the License. */ +#include +#include #include -#include -#include -#include +#include +#include #include #include #include -#include -#include -#include namespace faiss { namespace gpu { -RaftCagra::RaftCagra( +CuvsCagra::CuvsCagra( GpuResources* resources, int dim, idx_t intermediate_graph_degree, idx_t graph_degree, faiss::cagra_build_algo graph_build_algo, size_t nn_descent_niter, + bool store_dataset, faiss::MetricType metric, float metricArg, IndicesOptions indicesOptions, - std::optional ivf_pq_params, - std::optional - ivf_pq_search_params) + std::optional ivf_pq_params, + std::optional + ivf_pq_search_params, + float refine_rate) : resources_(resources), dim_(dim), + graph_build_algo_(graph_build_algo), + nn_descent_niter_(nn_descent_niter), + store_dataset_(store_dataset), metric_(metric), metricArg_(metricArg), index_params_(), ivf_pq_params_(ivf_pq_params), - ivf_pq_search_params_(ivf_pq_search_params) { + ivf_pq_search_params_(ivf_pq_search_params), + refine_rate_(refine_rate) { FAISS_THROW_IF_NOT_MSG( metric == faiss::METRIC_L2 || metric == faiss::METRIC_INNER_PRODUCT, "CAGRA currently only supports L2 or Inner Product metric."); FAISS_THROW_IF_NOT_MSG( indicesOptions == faiss::gpu::INDICES_64_BIT, - "only INDICES_64_BIT is supported for RAFT CAGRA index"); + "only INDICES_64_BIT is supported for cuVS CAGRA index"); index_params_.intermediate_graph_degree = intermediate_graph_degree; index_params_.graph_degree = graph_degree; - index_params_.build_algo = - static_cast( - graph_build_algo); - index_params_.nn_descent_niter = nn_descent_niter; - if (!ivf_pq_params_) { - ivf_pq_params_ = - std::make_optional(); - } if (!ivf_pq_search_params_) { ivf_pq_search_params_ = - std::make_optional(); + std::make_optional(); } - index_params_.metric = metric_ == faiss::METRIC_L2 - ? raft::distance::DistanceType::L2Expanded - : raft::distance::DistanceType::InnerProduct; - ivf_pq_params_->metric = metric_ == faiss::METRIC_L2 - ? raft::distance::DistanceType::L2Expanded - : raft::distance::DistanceType::InnerProduct; + index_params_.metric = metricFaissToCuvs(metric_, false); reset(); } -RaftCagra::RaftCagra( +CuvsCagra::CuvsCagra( GpuResources* resources, int dim, idx_t n, @@ -107,19 +98,22 @@ RaftCagra::RaftCagra( "CAGRA currently only supports L2 or Inner Product metric."); FAISS_THROW_IF_NOT_MSG( indicesOptions == faiss::gpu::INDICES_64_BIT, - "only INDICES_64_BIT is supported for RAFT CAGRA index"); + "only INDICES_64_BIT is supported for cuVS CAGRA index"); auto distances_on_gpu = getDeviceForAddress(distances) >= 0; auto knn_graph_on_gpu = getDeviceForAddress(knn_graph) >= 0; FAISS_ASSERT(distances_on_gpu == knn_graph_on_gpu); + storage_ = distances; + n_ = n; + const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); if (distances_on_gpu && knn_graph_on_gpu) { raft_handle.sync_stream(); - // Copying to host so that raft::neighbors::cagra::index + // Copying to host so that cuvs::neighbors::cagra::index // creates an owning copy of the knn graph on device auto knn_graph_copy = raft::make_host_matrix(n, graph_degree); @@ -132,11 +126,10 @@ RaftCagra::RaftCagra( raft::make_device_matrix_view( distances, n, dim); - raft_knn_index = raft::neighbors::cagra::index( + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( raft_handle, - metric_ == faiss::METRIC_L2 - ? raft::distance::DistanceType::L2Expanded - : raft::distance::DistanceType::InnerProduct, + metricFaissToCuvs(metric_, false), distances_mds, raft::make_const_mdspan(knn_graph_copy.view())); } else if (!distances_on_gpu && !knn_graph_on_gpu) { @@ -151,11 +144,10 @@ RaftCagra::RaftCagra( auto distances_mds = raft::make_host_matrix_view( distances, n, dim); - raft_knn_index = raft::neighbors::cagra::index( + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( raft_handle, - metric_ == faiss::METRIC_L2 - ? raft::distance::DistanceType::L2Expanded - : raft::distance::DistanceType::InnerProduct, + metricFaissToCuvs(metric_, false), distances_mds, raft::make_const_mdspan(knn_graph_copy.view())); } else { @@ -164,86 +156,56 @@ RaftCagra::RaftCagra( } } -void RaftCagra::train(idx_t n, const float* x) { +void CuvsCagra::train(idx_t n, const float* x) { + storage_ = x; + n_ = n; + const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - if (index_params_.build_algo == - raft::neighbors::cagra::graph_build_algo::IVF_PQ) { - std::optional> knn_graph( - raft::make_host_matrix( - n, index_params_.intermediate_graph_degree)); - if (getDeviceForAddress(x) >= 0) { - auto dataset_d = - raft::make_device_matrix_view( - x, n, dim_); - raft::neighbors::cagra::build_knn_graph( - raft_handle, - dataset_d, - knn_graph->view(), - 1.0f, - ivf_pq_params_, - ivf_pq_search_params_); - } else { - auto dataset_h = raft::make_host_matrix_view( - x, n, dim_); - raft::neighbors::cagra::build_knn_graph( - raft_handle, - dataset_h, - knn_graph->view(), - 1.0f, - ivf_pq_params_, - ivf_pq_search_params_); - } - auto cagra_graph = raft::make_host_matrix( - n, index_params_.graph_degree); - - raft::neighbors::cagra::optimize( - raft_handle, knn_graph->view(), cagra_graph.view()); - - // free intermediate graph before trying to create the index - knn_graph.reset(); - - if (getDeviceForAddress(x) >= 0) { - auto dataset_d = - raft::make_device_matrix_view( - x, n, dim_); - raft_knn_index = raft::neighbors::cagra::index( - raft_handle, - metric_ == faiss::METRIC_L2 - ? raft::distance::DistanceType::L2Expanded - : raft::distance::DistanceType::InnerProduct, - dataset_d, - raft::make_const_mdspan(cagra_graph.view())); - } else { - auto dataset_h = raft::make_host_matrix_view( - x, n, dim_); - raft_knn_index = raft::neighbors::cagra::index( - raft_handle, - metric_ == faiss::METRIC_L2 - ? raft::distance::DistanceType::L2Expanded - : raft::distance::DistanceType::InnerProduct, - dataset_h, - raft::make_const_mdspan(cagra_graph.view())); + + if (!ivf_pq_params_) { + ivf_pq_params_ = cuvs::neighbors::ivf_pq::index_params::from_dataset( + raft::make_extents( + static_cast(n_), static_cast(dim_)), + metricFaissToCuvs(metric_, false)); + } + if (graph_build_algo_ == faiss::cagra_build_algo::IVF_PQ) { + cuvs::neighbors::cagra::graph_build_params::ivf_pq_params + graph_build_params; + graph_build_params.build_params = ivf_pq_params_.value(); + graph_build_params.search_params = ivf_pq_search_params_.value(); + graph_build_params.refinement_rate = refine_rate_.value(); + index_params_.graph_build_params = graph_build_params; + if (index_params_.graph_degree == + index_params_.intermediate_graph_degree) { + index_params_.intermediate_graph_degree = + 1.5 * index_params_.graph_degree; } + } else { + cuvs::neighbors::cagra::graph_build_params::nn_descent_params + graph_build_params(index_params_.intermediate_graph_degree); + graph_build_params.max_iterations = nn_descent_niter_; + index_params_.graph_build_params = graph_build_params; + } + if (getDeviceForAddress(x) >= 0) { + auto dataset = + raft::make_device_matrix_view(x, n, dim_); + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + cuvs::neighbors::cagra::build( + raft_handle, index_params_, dataset)); } else { - if (getDeviceForAddress(x) >= 0) { - raft_knn_index = raft::runtime::neighbors::cagra::build( - raft_handle, - index_params_, - raft::make_device_matrix_view( - x, n, dim_)); - } else { - raft_knn_index = raft::runtime::neighbors::cagra::build( - raft_handle, - index_params_, - raft::make_host_matrix_view( - x, n, dim_)); - } + auto dataset = + raft::make_host_matrix_view(x, n, dim_); + cuvs_index = std::make_shared< + cuvs::neighbors::cagra::index>( + cuvs::neighbors::cagra::build( + raft_handle, index_params_, dataset)); } } -void RaftCagra::search( +void CuvsCagra::search( Tensor& queries, int k, Tensor& outDistances, @@ -267,10 +229,22 @@ void RaftCagra::search( idx_t cols = queries.getSize(1); idx_t k_ = k; - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index); FAISS_ASSERT(numQueries > 0); FAISS_ASSERT(cols == dim_); + if (!store_dataset_) { + if (getDeviceForAddress(storage_) >= 0) { + auto dataset = raft::make_device_matrix_view( + storage_, n_, dim_); + cuvs_index->update_dataset(raft_handle, dataset); + } else { + auto dataset = raft::make_host_matrix_view( + storage_, n_, dim_); + cuvs_index->update_dataset(raft_handle, dataset); + } + } + auto queries_view = raft::make_device_matrix_view( queries.data(), numQueries, cols); auto distances_view = raft::make_device_matrix_view( @@ -278,18 +252,18 @@ void RaftCagra::search( auto indices_view = raft::make_device_matrix_view( outIndices.data(), numQueries, k_); - raft::neighbors::cagra::search_params search_pams; + cuvs::neighbors::cagra::search_params search_pams; search_pams.max_queries = max_queries; search_pams.itopk_size = itopk_size; search_pams.max_iterations = max_iterations; search_pams.algo = - static_cast(graph_search_algo); + static_cast(graph_search_algo); search_pams.team_size = team_size; search_pams.search_width = search_width; search_pams.min_iterations = min_iterations; search_pams.thread_block_size = thread_block_size; search_pams.hashmap_mode = - static_cast(hash_mode); + static_cast(hash_mode); search_pams.hashmap_min_bitlen = hashmap_min_bitlen; search_pams.hashmap_max_fill_rate = hashmap_max_fill_rate; search_pams.num_random_samplings = num_random_samplings; @@ -298,10 +272,10 @@ void RaftCagra::search( auto indices_copy = raft::make_device_matrix( raft_handle, numQueries, k_); - raft::runtime::neighbors::cagra::search( + cuvs::neighbors::cagra::search( raft_handle, search_pams, - raft_knn_index.value(), + *cuvs_index, queries_view, indices_copy.view(), distances_view); @@ -312,22 +286,22 @@ void RaftCagra::search( indices_view.data_handle()); } -void RaftCagra::reset() { - raft_knn_index.reset(); +void CuvsCagra::reset() { + cuvs_index.reset(); } -idx_t RaftCagra::get_knngraph_degree() const { - FAISS_ASSERT(raft_knn_index.has_value()); - return static_cast(raft_knn_index.value().graph_degree()); +idx_t CuvsCagra::get_knngraph_degree() const { + FAISS_ASSERT(cuvs_index); + return static_cast(cuvs_index->graph_degree()); } -std::vector RaftCagra::get_knngraph() const { - FAISS_ASSERT(raft_knn_index.has_value()); +std::vector CuvsCagra::get_knngraph() const { + FAISS_ASSERT(cuvs_index); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); auto stream = raft_handle.get_stream(); - auto device_graph = raft_knn_index.value().graph(); + auto device_graph = cuvs_index->graph(); std::vector host_graph( device_graph.extent(0) * device_graph.extent(1)); @@ -343,29 +317,8 @@ std::vector RaftCagra::get_knngraph() const { return host_graph; } -std::vector RaftCagra::get_training_dataset() const { - FAISS_ASSERT(raft_knn_index.has_value()); - const raft::device_resources& raft_handle = - resources_->getRaftHandleCurrentDevice(); - auto stream = raft_handle.get_stream(); - - auto device_dataset = raft_knn_index.value().dataset(); - - std::vector host_dataset( - device_dataset.extent(0) * device_dataset.extent(1)); - - RAFT_CUDA_TRY(cudaMemcpy2DAsync( - host_dataset.data(), - sizeof(float) * dim_, - device_dataset.data_handle(), - sizeof(float) * device_dataset.stride(0), - sizeof(float) * dim_, - device_dataset.extent(0), - cudaMemcpyDefault, - raft_handle.get_stream())); - raft_handle.sync_stream(); - - return host_dataset; +const float* CuvsCagra::get_training_dataset() const { + return storage_; } } // namespace gpu diff --git a/faiss/gpu/impl/RaftCagra.cuh b/faiss/gpu/impl/CuvsCagra.cuh similarity index 72% rename from faiss/gpu/impl/RaftCagra.cuh rename to faiss/gpu/impl/CuvsCagra.cuh index 39d7d75765..c466aceec4 100644 --- a/faiss/gpu/impl/RaftCagra.cuh +++ b/faiss/gpu/impl/CuvsCagra.cuh @@ -31,8 +31,8 @@ #include -#include -#include +#include +#include namespace faiss { @@ -45,24 +45,26 @@ enum class cagra_hash_mode { HASH, SMALL, AUTO }; namespace gpu { -class RaftCagra { +class CuvsCagra { public: - RaftCagra( + CuvsCagra( GpuResources* resources, int dim, idx_t intermediate_graph_degree, idx_t graph_degree, faiss::cagra_build_algo graph_build_algo, size_t nn_descent_niter, + bool store_dataset, faiss::MetricType metric, float metricArg, IndicesOptions indicesOptions, - std::optional ivf_pq_params = + std::optional ivf_pq_params = std::nullopt, - std::optional - ivf_pq_search_params = std::nullopt); + std::optional + ivf_pq_search_params = std::nullopt, + float refine_rate = 2.0f); - RaftCagra( + CuvsCagra( GpuResources* resources, int dim, idx_t n, @@ -73,7 +75,7 @@ class RaftCagra { float metricArg, IndicesOptions indicesOptions); - ~RaftCagra() = default; + ~CuvsCagra() = default; void train(idx_t n, const float* x); @@ -102,31 +104,44 @@ class RaftCagra { std::vector get_knngraph() const; - std::vector get_training_dataset() const; + const float* get_training_dataset() const; private: /// Collection of GPU resources that we use GpuResources* resources_; + /// Training dataset + const float* storage_; + int n_; + /// Expected dimensionality of the vectors const int dim_; + /// Controls the underlying cuVS index if it should store the dataset in + /// device memory + bool store_dataset_; + /// Metric type of the index faiss::MetricType metric_; /// Metric arg float metricArg_; - /// Parameters to build RAFT CAGRA index - raft::neighbors::cagra::index_params index_params_; + /// Parameters to build cuVS CAGRA index + faiss::cagra_build_algo graph_build_algo_; + cuvs::neighbors::cagra::index_params index_params_; /// Parameters to build CAGRA graph using IVF PQ - std::optional ivf_pq_params_; - std::optional ivf_pq_search_params_; + std::optional ivf_pq_params_; + std::optional ivf_pq_search_params_; + std::optional refine_rate_; + + /// Parameters to build CAGRA graph using NN Descent + size_t nn_descent_niter_ = 20; - /// Instance of trained RAFT CAGRA index - std::optional> - raft_knn_index{std::nullopt}; + /// Instance of trained cuVS CAGRA index + std::shared_ptr> cuvs_index{ + nullptr}; }; } // namespace gpu diff --git a/faiss/gpu/impl/RaftFlatIndex.cu b/faiss/gpu/impl/CuvsFlatIndex.cu similarity index 86% rename from faiss/gpu/impl/RaftFlatIndex.cu rename to faiss/gpu/impl/CuvsFlatIndex.cu index c06ca62cb5..08f63300ef 100644 --- a/faiss/gpu/impl/RaftFlatIndex.cu +++ b/faiss/gpu/impl/CuvsFlatIndex.cu @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,33 +21,33 @@ * limitations under the License. */ -#include -#include +#include +#include #include +#include #include +#include #include #include #include -#include - -#define RAFT_NAME "raft" +#include namespace faiss { namespace gpu { -using namespace raft::distance; -using namespace raft::neighbors; +using namespace cuvs::distance; +using namespace cuvs::neighbors; -RaftFlatIndex::RaftFlatIndex( +CuvsFlatIndex::CuvsFlatIndex( GpuResources* res, int dim, bool useFloat16, MemorySpace space) : FlatIndex(res, dim, useFloat16, space) {} -void RaftFlatIndex::query( +void CuvsFlatIndex::query( Tensor& input, int k, faiss::MetricType metric, @@ -56,7 +56,7 @@ void RaftFlatIndex::query( Tensor& outIndices, bool exactDistance) { /** - * RAFT doesn't yet support half-precision in bfknn. + * cuVS doesn't yet support half-precision in bfknn. * Use FlatIndex for float16 for now */ if (useFloat16_) { @@ -92,16 +92,16 @@ void RaftFlatIndex::query( outDistances.getSize(0), outDistances.getSize(1)); - DistanceType distance = metricFaissToRaft(metric, exactDistance); + cuvsDistanceType distance = metricFaissToCuvs(metric, exactDistance); std::optional> norms_view = raft::make_device_vector_view( norms_.data(), norms_.getSize(0)); - raft::neighbors::brute_force::index idx( + cuvs::neighbors::brute_force::index idx( handle, index, norms_view, distance, metricArg); - raft::neighbors::brute_force::search( - handle, idx, search, inds, dists); + cuvs::neighbors::brute_force::search( + handle, idx, search, inds, dists, std::nullopt); if (metric == MetricType::METRIC_Lp) { raft::linalg::unary_op( @@ -121,7 +121,7 @@ void RaftFlatIndex::query( } } -void RaftFlatIndex::query( +void CuvsFlatIndex::query( Tensor& vecs, int k, faiss::MetricType metric, diff --git a/faiss/gpu/impl/RaftFlatIndex.cuh b/faiss/gpu/impl/CuvsFlatIndex.cuh similarity index 95% rename from faiss/gpu/impl/RaftFlatIndex.cuh rename to faiss/gpu/impl/CuvsFlatIndex.cuh index 9d20fade61..b856351cfa 100644 --- a/faiss/gpu/impl/RaftFlatIndex.cuh +++ b/faiss/gpu/impl/CuvsFlatIndex.cuh @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,9 +40,9 @@ class GpuResources; /// the vectors in float32. /// If float16, we store the vectors in both float16 and float32, where float32 /// data is possibly needed for certain residual operations -class RaftFlatIndex : public FlatIndex { +class CuvsFlatIndex : public FlatIndex { public: - RaftFlatIndex( + CuvsFlatIndex( GpuResources* res, int dim, bool useFloat16, diff --git a/faiss/gpu/impl/RaftIVFFlat.cu b/faiss/gpu/impl/CuvsIVFFlat.cu similarity index 76% rename from faiss/gpu/impl/RaftIVFFlat.cu rename to faiss/gpu/impl/CuvsIVFFlat.cu index 181cf94968..0de7100c72 100644 --- a/faiss/gpu/impl/RaftIVFFlat.cu +++ b/faiss/gpu/impl/CuvsIVFFlat.cu @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,15 +25,17 @@ #include #include -#include +#include +#include +#include #include #include -#include #include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -41,7 +43,7 @@ namespace faiss { namespace gpu { -RaftIVFFlat::RaftIVFFlat( +CuvsIVFFlat::CuvsIVFFlat( GpuResources* res, int dim, int nlist, @@ -60,32 +62,34 @@ RaftIVFFlat::RaftIVFFlat( useResidual, scalarQ, interleavedLayout, - // skip ptr allocations in base class (handled by RAFT + // skip ptr allocations in base class (handled by cuVS // internally) indicesOptions, space) { FAISS_THROW_IF_NOT_MSG( indicesOptions == INDICES_64_BIT, - "only INDICES_64_BIT is supported for RAFT index"); + "only INDICES_64_BIT is supported for cuVS index"); } -RaftIVFFlat::~RaftIVFFlat() {} +CuvsIVFFlat::~CuvsIVFFlat() {} -void RaftIVFFlat::reserveMemory(idx_t numVecs) { +void CuvsIVFFlat::reserveMemory(idx_t numVecs) { fprintf(stderr, - "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with RAFT enabled.\n"); + "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with cuVS enabled.\n"); } -void RaftIVFFlat::reset() { - raft_knn_index.reset(); +void CuvsIVFFlat::reset() { + cuvs_index.reset(); } -void RaftIVFFlat::setRaftIndex( - raft::neighbors::ivf_flat::index&& idx) { - raft_knn_index.emplace(std::move(idx)); +void CuvsIVFFlat::setCuvsIndex( + cuvs::neighbors::ivf_flat::index&& idx) { + cuvs_index = + std::make_shared>( + std::move(idx)); } -void RaftIVFFlat::search( +void CuvsIVFFlat::search( Index* coarseQuantizer, Tensor& queries, int nprobe, @@ -93,7 +97,7 @@ void RaftIVFFlat::search( Tensor& outDistances, Tensor& outIndices) { /// NB: The coarse quantizer is ignored here. The user is assumed to have - /// called updateQuantizer() to modify the RAFT index if the quantizer was + /// called updateQuantizer() to modify the cuVS index if the quantizer was /// modified externally uint32_t numQueries = queries.getSize(0); @@ -101,14 +105,14 @@ void RaftIVFFlat::search( uint32_t k_ = k; // Device is already set in GpuIndex::search - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index != nullptr); FAISS_ASSERT(numQueries > 0); FAISS_ASSERT(cols == dim_); FAISS_THROW_IF_NOT(nprobe > 0 && nprobe <= numLists_); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - raft::neighbors::ivf_flat::search_params pams; + cuvs::neighbors::ivf_flat::search_params pams; pams.n_probes = nprobe; auto queries_view = raft::make_device_matrix_view( @@ -118,10 +122,10 @@ void RaftIVFFlat::search( auto out_dists_view = raft::make_device_matrix_view( outDistances.data(), (idx_t)numQueries, (idx_t)k_); - raft::neighbors::ivf_flat::search( + cuvs::neighbors::ivf_flat::search( raft_handle, pams, - raft_knn_index.value(), + *cuvs_index, queries_view, out_inds_view, out_dists_view); @@ -158,15 +162,15 @@ void RaftIVFFlat::search( }); } -idx_t RaftIVFFlat::addVectors( +idx_t CuvsIVFFlat::addVectors( Index* coarseQuantizer, Tensor& vecs, Tensor& indices) { /// NB: The coarse quantizer is ignored here. The user is assumed to have - /// called updateQuantizer() to update the RAFT index if the quantizer was + /// called updateQuantizer() to update the cuVS index if the quantizer was /// modified externally - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index != nullptr); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); @@ -174,27 +178,27 @@ idx_t RaftIVFFlat::addVectors( /// Remove rows containing NaNs idx_t n_rows_valid = inplaceGatherFilteredRows(resources_, vecs, indices); - raft_knn_index.emplace(raft::neighbors::ivf_flat::extend( + cuvs::neighbors::ivf_flat::extend( raft_handle, raft::make_device_matrix_view( vecs.data(), n_rows_valid, dim_), std::make_optional>( raft::make_device_vector_view( indices.data(), n_rows_valid)), - raft_knn_index.value())); + cuvs_index.get()); return n_rows_valid; } -idx_t RaftIVFFlat::getListLength(idx_t listId) const { - FAISS_ASSERT(raft_knn_index.has_value()); +idx_t CuvsIVFFlat::getListLength(idx_t listId) const { + FAISS_ASSERT(cuvs_index != nullptr); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); uint32_t size; raft::update_host( &size, - raft_knn_index.value().list_sizes().data_handle() + listId, + cuvs_index->list_sizes().data_handle() + listId, 1, raft_handle.get_stream()); raft_handle.sync_stream(); @@ -203,8 +207,8 @@ idx_t RaftIVFFlat::getListLength(idx_t listId) const { } /// Return the list indices of a particular list back to the CPU -std::vector RaftIVFFlat::getListIndices(idx_t listId) const { - FAISS_ASSERT(raft_knn_index.has_value()); +std::vector CuvsIVFFlat::getListIndices(idx_t listId) const { + FAISS_ASSERT(cuvs_index != nullptr); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); auto stream = raft_handle.get_stream(); @@ -218,9 +222,7 @@ std::vector RaftIVFFlat::getListIndices(idx_t listId) const { raft::update_host( &list_indices_ptr, - const_cast( - raft_knn_index.value().inds_ptrs().data_handle()) + - listId, + const_cast(cuvs_index->inds_ptrs().data_handle()) + listId, 1, stream); raft_handle.sync_stream(); @@ -232,13 +234,13 @@ std::vector RaftIVFFlat::getListIndices(idx_t listId) const { } /// Return the encoded vectors of a particular list back to the CPU -std::vector RaftIVFFlat::getListVectorData( +std::vector CuvsIVFFlat::getListVectorData( idx_t listId, bool gpuFormat) const { if (gpuFormat) { - FAISS_THROW_MSG("gpuFormat should be false for RAFT indices"); + FAISS_THROW_MSG("gpuFormat should be false for cuVS indices"); } - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index != nullptr); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); @@ -259,7 +261,7 @@ std::vector RaftIVFFlat::getListVectorData( // fetch the list data ptr on host raft::update_host( &list_data_ptr, - raft_knn_index.value().data_ptrs().data_handle() + listId, + cuvs_index->data_ptrs().data_handle() + listId, 1, stream); raft_handle.sync_stream(); @@ -271,15 +273,15 @@ std::vector RaftIVFFlat::getListVectorData( stream); raft_handle.sync_stream(); - RaftIVFFlatCodePackerInterleaved packer( - (size_t)listSize, dim_, raft_knn_index.value().veclen()); + CuvsIVFFlatCodePackerInterleaved packer( + (size_t)listSize, dim_, cuvs_index->veclen()); packer.unpack_all(interleaved_codes.data(), flat_codes.data()); return flat_codes; } /// Performs search when we are already given the IVF cells to look at /// (GpuIndexIVF::search_preassigned implementation) -void RaftIVFFlat::searchPreassigned( +void CuvsIVFFlat::searchPreassigned( Index* coarseQuantizer, Tensor& vecs, Tensor& ivfDistances, @@ -291,7 +293,7 @@ void RaftIVFFlat::searchPreassigned( // TODO: Fill this in! } -void RaftIVFFlat::updateQuantizer(Index* quantizer) { +void CuvsIVFFlat::updateQuantizer(Index* quantizer) { FAISS_THROW_IF_NOT(quantizer->is_trained); // Must match our basic IVF parameters @@ -304,14 +306,15 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) { const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - raft::neighbors::ivf_flat::index_params pams; + cuvs::neighbors::ivf_flat::index_params pams; pams.add_data_on_build = false; - pams.metric = metricFaissToRaft(metric_, false); + pams.metric = metricFaissToCuvs(metric_, false); pams.n_lists = numLists_; - raft_knn_index.emplace(raft_handle, pams, static_cast(dim_)); - - raft::neighbors::ivf_flat::helpers::reset_index( - raft_handle, &raft_knn_index.value()); + cuvs_index = + std::make_shared>( + raft_handle, pams, static_cast(dim_)); + cuvs::neighbors::ivf_flat::helpers::reset_index( + raft_handle, cuvs_index.get()); // If the index instance is a GpuIndexFlat, then we can use direct access to // the centroids within. @@ -330,7 +333,7 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) { gpuData->reconstruct(0, gpuData->getSize(), centroids); raft::update_device( - raft_knn_index.value().centers().data_handle(), + cuvs_index->centers().data_handle(), centroids.data(), total_elems, stream); @@ -339,7 +342,7 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) { auto centroids = gpuData->getVectorsFloat32Ref(); raft::update_device( - raft_knn_index.value().centers().data_handle(), + cuvs_index->centers().data_handle(), centroids.data(), total_elems, stream); @@ -352,14 +355,14 @@ void RaftIVFFlat::updateQuantizer(Index* quantizer) { quantizer->reconstruct_n(0, quantizer->ntotal, vecs.data()); raft::update_device( - raft_knn_index.value().centers().data_handle(), + cuvs_index->centers().data_handle(), vecs.data(), total_elems, stream); } } -void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) { +void CuvsIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) { size_t nlist = ivf ? ivf->nlist : 0; size_t ntotal = ivf ? ivf->compute_ntotal() : 0; @@ -370,12 +373,12 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) { std::vector indices_(ntotal); // the index must already exist - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index != nullptr); - auto& raft_lists = raft_knn_index.value().lists(); + auto& cuvs_index_lists = cuvs_index->lists(); // conservative memory alloc for cloning cpu inverted lists - raft::neighbors::ivf_flat::list_spec raft_list_spec{ + cuvs::neighbors::ivf_flat::list_spec ivf_list_spec{ static_cast(dim_), true}; for (size_t i = 0; i < nlist; ++i) { @@ -392,20 +395,20 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) { // store the list size list_sizes_[i] = static_cast(listSize); - // This RAFT list must currently be empty + // This cuVS list must currently be empty FAISS_ASSERT(getListLength(i) == 0); - raft::neighbors::ivf::resize_list( + cuvs::neighbors::ivf::resize_list( raft_handle, - raft_lists[i], - raft_list_spec, + cuvs_index_lists[i], + ivf_list_spec, (uint32_t)listSize, (uint32_t)0); } // Update the pointers and the sizes - raft::neighbors::ivf_flat::helpers::recompute_internal_state( - raft_handle, &(raft_knn_index.value())); + cuvs::neighbors::ivf_flat::helpers::recompute_internal_state( + raft_handle, cuvs_index.get()); for (size_t i = 0; i < nlist; ++i) { size_t listSize = ivf->list_size(i); @@ -414,18 +417,18 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) { } raft::update_device( - raft_knn_index.value().list_sizes().data_handle(), + cuvs_index->list_sizes().data_handle(), list_sizes_.data(), nlist, raft_handle.get_stream()); // Precompute the centers vector norms for L2Expanded distance if (this->metric_ == faiss::METRIC_L2) { - raft_knn_index.value().allocate_center_norms(raft_handle); + cuvs_index->allocate_center_norms(raft_handle); raft::linalg::rowNorm( - raft_knn_index.value().center_norms().value().data_handle(), - raft_knn_index.value().centers().data_handle(), - raft_knn_index.value().dim(), + cuvs_index->center_norms().value().data_handle(), + cuvs_index->centers().data_handle(), + cuvs_index->dim(), (uint32_t)nlist, raft::linalg::L2Norm, true, @@ -433,7 +436,7 @@ void RaftIVFFlat::copyInvertedListsFrom(const InvertedLists* ivf) { } } -size_t RaftIVFFlat::getGpuVectorsEncodingSize_(idx_t numVecs) const { +size_t CuvsIVFFlat::getGpuVectorsEncodingSize_(idx_t numVecs) const { idx_t bits = 32 /* float */; // bytes to encode a block of 32 vectors (single dimension) @@ -444,13 +447,13 @@ size_t RaftIVFFlat::getGpuVectorsEncodingSize_(idx_t numVecs) const { // number of blocks of 32 vectors we have idx_t numBlocks = - utils::divUp(numVecs, raft::neighbors::ivf_flat::kIndexGroupSize); + utils::divUp(numVecs, cuvs::neighbors::ivf_flat::kIndexGroupSize); // total size to encode numVecs return bytesPerBlock * numBlocks; } -void RaftIVFFlat::addEncodedVectorsToList_( +void CuvsIVFFlat::addEncodedVectorsToList_( idx_t listId, const void* codes, const idx_t* indices, @@ -470,8 +473,8 @@ void RaftIVFFlat::addEncodedVectorsToList_( FAISS_ASSERT(gpuListSizeInBytes <= (size_t)std::numeric_limits::max()); std::vector interleaved_codes(gpuListSizeInBytes); - RaftIVFFlatCodePackerInterleaved packer( - (size_t)numVecs, (uint32_t)dim_, raft_knn_index.value().veclen()); + CuvsIVFFlatCodePackerInterleaved packer( + (size_t)numVecs, (uint32_t)dim_, cuvs_index->veclen()); packer.pack_all( reinterpret_cast(codes), interleaved_codes.data()); @@ -483,7 +486,7 @@ void RaftIVFFlat::addEncodedVectorsToList_( /// fetch the list data ptr on host raft::update_host( &list_data_ptr, - raft_knn_index.value().data_ptrs().data_handle() + listId, + cuvs_index->data_ptrs().data_handle() + listId, 1, stream); raft_handle.sync_stream(); @@ -500,7 +503,7 @@ void RaftIVFFlat::addEncodedVectorsToList_( // fetch the list indices ptr on host raft::update_host( &list_indices_ptr, - raft_knn_index.value().inds_ptrs().data_handle() + listId, + cuvs_index->inds_ptrs().data_handle() + listId, 1, stream); raft_handle.sync_stream(); @@ -508,7 +511,7 @@ void RaftIVFFlat::addEncodedVectorsToList_( raft::update_device(list_indices_ptr, indices, numVecs, stream); } -RaftIVFFlatCodePackerInterleaved::RaftIVFFlatCodePackerInterleaved( +CuvsIVFFlatCodePackerInterleaved::CuvsIVFFlatCodePackerInterleaved( size_t list_size, uint32_t dim, uint32_t chunk_size) { @@ -519,28 +522,28 @@ RaftIVFFlatCodePackerInterleaved::RaftIVFFlatCodePackerInterleaved( nvec = list_size; code_size = dim * 4; block_size = - utils::roundUp(nvec, raft::neighbors::ivf_flat::kIndexGroupSize); + utils::roundUp(nvec, cuvs::neighbors::ivf_flat::kIndexGroupSize); } -void RaftIVFFlatCodePackerInterleaved::pack_1( +void CuvsIVFFlatCodePackerInterleaved::pack_1( const uint8_t* flat_code, size_t offset, uint8_t* block) const { - raft::neighbors::ivf_flat::codepacker::pack_1( - reinterpret_cast(flat_code), - reinterpret_cast(block), + cuvs::neighbors::ivf_flat::helpers::codepacker::pack_1( + reinterpret_cast(flat_code), + reinterpret_cast(block), dim, chunk_size, static_cast(offset)); } -void RaftIVFFlatCodePackerInterleaved::unpack_1( +void CuvsIVFFlatCodePackerInterleaved::unpack_1( const uint8_t* block, size_t offset, uint8_t* flat_code) const { - raft::neighbors::ivf_flat::codepacker::unpack_1( - reinterpret_cast(block), - reinterpret_cast(flat_code), + cuvs::neighbors::ivf_flat::helpers::codepacker::unpack_1( + reinterpret_cast(block), + reinterpret_cast(flat_code), dim, chunk_size, static_cast(offset)); diff --git a/faiss/gpu/impl/RaftIVFFlat.cuh b/faiss/gpu/impl/CuvsIVFFlat.cuh similarity index 89% rename from faiss/gpu/impl/RaftIVFFlat.cuh rename to faiss/gpu/impl/CuvsIVFFlat.cuh index a2dcef6ce5..72764c8446 100644 --- a/faiss/gpu/impl/RaftIVFFlat.cuh +++ b/faiss/gpu/impl/CuvsIVFFlat.cuh @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ #include #include -#include +#include #include @@ -35,9 +35,9 @@ namespace faiss { namespace gpu { -class RaftIVFFlat : public IVFFlat { +class CuvsIVFFlat : public IVFFlat { public: - RaftIVFFlat( + CuvsIVFFlat( GpuResources* resources, int dim, int nlist, @@ -50,7 +50,7 @@ class RaftIVFFlat : public IVFFlat { IndicesOptions indicesOptions, MemorySpace space); - ~RaftIVFFlat() override; + ~CuvsIVFFlat() override; /// Reserve GPU memory in our inverted lists for this number of vectors void reserveMemory(idx_t numVecs) override; @@ -86,7 +86,7 @@ class RaftIVFFlat : public IVFFlat { Tensor& vecs, Tensor& indices) override; - /// Clear out the Raft index + /// Clear out the cuVS index void reset() override; /// For debugging purposes, return the list length of a particular @@ -100,15 +100,15 @@ class RaftIVFFlat : public IVFFlat { std::vector getListVectorData(idx_t listId, bool gpuFormat) const override; - /// Update our Raft index with this quantizer instance; may be a CPU + /// Update our cuVS index with this quantizer instance; may be a CPU /// or GPU quantizer void updateQuantizer(Index* quantizer) override; /// Copy all inverted lists from a CPU representation to ourselves void copyInvertedListsFrom(const InvertedLists* ivf) override; - /// Replace the Raft index - void setRaftIndex(raft::neighbors::ivf_flat::index&& idx); + /// Replace the cuVS index + void setCuvsIndex(cuvs::neighbors::ivf_flat::index&& idx); private: /// Adds a set of codes and indices to a list, with the representation @@ -127,12 +127,12 @@ class RaftIVFFlat : public IVFFlat { /// this is the size for an entire IVF list size_t getGpuVectorsEncodingSize_(idx_t numVecs) const override; - std::optional> - raft_knn_index{std::nullopt}; + std::shared_ptr> cuvs_index{ + nullptr}; }; -struct RaftIVFFlatCodePackerInterleaved : CodePacker { - RaftIVFFlatCodePackerInterleaved( +struct CuvsIVFFlatCodePackerInterleaved : CodePacker { + CuvsIVFFlatCodePackerInterleaved( size_t list_size, uint32_t dim, uint32_t chuk_size); diff --git a/faiss/gpu/impl/RaftIVFPQ.cu b/faiss/gpu/impl/CuvsIVFPQ.cu similarity index 77% rename from faiss/gpu/impl/RaftIVFPQ.cu rename to faiss/gpu/impl/CuvsIVFPQ.cu index 15f49bbffd..2fc94de0f0 100644 --- a/faiss/gpu/impl/RaftIVFPQ.cu +++ b/faiss/gpu/impl/CuvsIVFPQ.cu @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,13 +22,15 @@ */ #include -#include +#include +#include +#include #include -#include #include -#include -#include +#include +#include +#include #include #include @@ -36,7 +38,7 @@ namespace faiss { namespace gpu { -RaftIVFPQ::RaftIVFPQ( +CuvsIVFPQ::CuvsIVFPQ( GpuResources* resources, int dim, idx_t nlist, @@ -60,44 +62,44 @@ RaftIVFPQ::RaftIVFPQ( useFloat16LookupTables, useMMCodeDistance, interleavedLayout, - // skip ptr allocations in base class (handled by RAFT + // skip ptr allocations in base class (handled by cuVS // internally) false, pqCentroidData, indicesOptions, space) { FAISS_THROW_IF_NOT_MSG( indicesOptions == INDICES_64_BIT, - "only INDICES_64_BIT is supported for RAFT index"); + "only INDICES_64_BIT is supported for cuVS index"); } -RaftIVFPQ::~RaftIVFPQ() {} +CuvsIVFPQ::~CuvsIVFPQ() {} -void RaftIVFPQ::reserveMemory(idx_t numVecs) { +void CuvsIVFPQ::reserveMemory(idx_t numVecs) { fprintf(stderr, - "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with RAFT enabled.\n"); + "WARN: reserveMemory is NOP. Pre-allocation of IVF lists is not supported with cuVS enabled.\n"); } -void RaftIVFPQ::reset() { - raft_knn_index.reset(); +void CuvsIVFPQ::reset() { + cuvs_index.reset(); } -size_t RaftIVFPQ::reclaimMemory() { +size_t CuvsIVFPQ::reclaimMemory() { fprintf(stderr, - "WARN: reclaimMemory is NOP. reclaimMemory is not supported with RAFT enabled.\n"); + "WARN: reclaimMemory is NOP. reclaimMemory is not supported with cuVS enabled.\n"); return 0; } -void RaftIVFPQ::setPrecomputedCodes(Index* quantizer, bool enable) {} +void CuvsIVFPQ::setPrecomputedCodes(Index* quantizer, bool enable) {} -idx_t RaftIVFPQ::getListLength(idx_t listId) const { - FAISS_ASSERT(raft_knn_index.has_value()); +idx_t CuvsIVFPQ::getListLength(idx_t listId) const { + FAISS_ASSERT(cuvs_index); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); uint32_t size; raft::update_host( &size, - raft_knn_index.value().list_sizes().data_handle() + listId, + cuvs_index->list_sizes().data_handle() + listId, 1, raft_handle.get_stream()); raft_handle.sync_stream(); @@ -105,7 +107,7 @@ idx_t RaftIVFPQ::getListLength(idx_t listId) const { return static_cast(size); } -void RaftIVFPQ::updateQuantizer(Index* quantizer) { +void CuvsIVFPQ::updateQuantizer(Index* quantizer) { FAISS_THROW_IF_NOT(quantizer->is_trained); // Must match our basic IVF parameters @@ -116,18 +118,19 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) { const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - raft::neighbors::ivf_pq::index_params pams; - pams.metric = metricFaissToRaft(metric_, false); - pams.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE; + cuvs::neighbors::ivf_pq::index_params pams; + pams.metric = metricFaissToCuvs(metric_, false); + pams.codebook_kind = cuvs::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE; pams.n_lists = numLists_; pams.pq_bits = bitsPerSubQuantizer_; pams.pq_dim = numSubQuantizers_; - raft_knn_index.emplace(raft_handle, pams, static_cast(dim_)); + cuvs_index = std::make_shared>( + raft_handle, pams, static_cast(dim_)); - raft::neighbors::ivf_pq::helpers::reset_index( - raft_handle, &raft_knn_index.value()); - raft::neighbors::ivf_pq::helpers::make_rotation_matrix( - raft_handle, &(raft_knn_index.value()), false); + cuvs::neighbors::ivf_pq::helpers::reset_index( + raft_handle, cuvs_index.get()); + cuvs::neighbors::ivf_pq::helpers::make_rotation_matrix( + raft_handle, cuvs_index.get(), false); // If the index instance is a GpuIndexFlat, then we can use direct access to // the centroids within. @@ -146,9 +149,9 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) { // as float32 and store locally gpuData->reconstruct(0, gpuData->getSize(), centroids); - raft::neighbors::ivf_pq::helpers::set_centers( + cuvs::neighbors::ivf_pq::helpers::set_centers( raft_handle, - &(raft_knn_index.value()), + cuvs_index.get(), raft::make_device_matrix_view( centroids.data(), numLists_, dim_)); } else { @@ -157,9 +160,9 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) { // reference it auto centroids = gpuData->getVectorsFloat32Ref(); - raft::neighbors::ivf_pq::helpers::set_centers( + cuvs::neighbors::ivf_pq::helpers::set_centers( raft_handle, - &(raft_knn_index.value()), + cuvs_index.get(), raft::make_device_matrix_view( centroids.data(), numLists_, dim_)); } @@ -177,9 +180,9 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) { centroids.copyFrom(vecs, stream); - raft::neighbors::ivf_pq::helpers::set_centers( + cuvs::neighbors::ivf_pq::helpers::set_centers( raft_handle, - &(raft_knn_index.value()), + cuvs_index.get(), raft::make_device_matrix_view( centroids.data(), numLists_, dim_)); } @@ -188,8 +191,8 @@ void RaftIVFPQ::updateQuantizer(Index* quantizer) { } /// Return the list indices of a particular list back to the CPU -std::vector RaftIVFPQ::getListIndices(idx_t listId) const { - FAISS_ASSERT(raft_knn_index.has_value()); +std::vector CuvsIVFPQ::getListIndices(idx_t listId) const { + FAISS_ASSERT(cuvs_index); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); auto stream = raft_handle.get_stream(); @@ -203,9 +206,7 @@ std::vector RaftIVFPQ::getListIndices(idx_t listId) const { raft::update_host( &list_indices_ptr, - const_cast( - raft_knn_index.value().inds_ptrs().data_handle()) + - listId, + const_cast(cuvs_index->inds_ptrs().data_handle()) + listId, 1, stream); raft_handle.sync_stream(); @@ -218,7 +219,7 @@ std::vector RaftIVFPQ::getListIndices(idx_t listId) const { /// Performs search when we are already given the IVF cells to look at /// (GpuIndexIVF::search_preassigned implementation) -void RaftIVFPQ::searchPreassigned( +void CuvsIVFPQ::searchPreassigned( Index* coarseQuantizer, Tensor& vecs, Tensor& ivfDistances, @@ -230,19 +231,18 @@ void RaftIVFPQ::searchPreassigned( // TODO: Fill this in! } -size_t RaftIVFPQ::getGpuListEncodingSize_(idx_t listId) { - return static_cast( - raft_knn_index.value().get_list_size_in_bytes(listId)); +size_t CuvsIVFPQ::getGpuListEncodingSize_(idx_t listId) { + return static_cast(cuvs_index->get_list_size_in_bytes(listId)); } /// Return the encoded vectors of a particular list back to the CPU -std::vector RaftIVFPQ::getListVectorData(idx_t listId, bool gpuFormat) +std::vector CuvsIVFPQ::getListVectorData(idx_t listId, bool gpuFormat) const { if (gpuFormat) { FAISS_THROW_MSG( - "gpuFormat should be false for RAFT indices. Unpacked codes are flat."); + "gpuFormat should be false for cuVS indices. Unpacked codes are flat."); } - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); @@ -265,13 +265,14 @@ std::vector RaftIVFPQ::getListVectorData(idx_t listId, bool gpuFormat) auto codes_d = raft::make_device_vector( raft_handle, static_cast(bufferSize)); - raft::neighbors::ivf_pq::helpers::unpack_contiguous_list_data( - raft_handle, - raft_knn_index.value(), - codes_d.data_handle(), - batchSize, - listId, - offset_b); + cuvs::neighbors::ivf_pq::helpers::codepacker:: + unpack_contiguous_list_data( + raft_handle, + *cuvs_index, + codes_d.data_handle(), + batchSize, + listId, + offset_b); // Copy the flat PQ codes to host raft::update_host( @@ -287,7 +288,7 @@ std::vector RaftIVFPQ::getListVectorData(idx_t listId, bool gpuFormat) /// Find the approximate k nearest neighbors for `queries` against /// our database -void RaftIVFPQ::search( +void CuvsIVFPQ::search( Index* coarseQuantizer, Tensor& queries, int nprobe, @@ -296,17 +297,17 @@ void RaftIVFPQ::search( Tensor& outIndices) { uint32_t numQueries = queries.getSize(0); uint32_t cols = queries.getSize(1); - idx_t k_ = std::min(static_cast(k), raft_knn_index.value().size()); + idx_t k_ = std::min(static_cast(k), cuvs_index->size()); // Device is already set in GpuIndex::search - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index); FAISS_ASSERT(numQueries > 0); FAISS_ASSERT(cols == dim_); FAISS_THROW_IF_NOT(nprobe > 0 && nprobe <= numLists_); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); - raft::neighbors::ivf_pq::search_params pams; + cuvs::neighbors::ivf_pq::search_params pams; pams.n_probes = nprobe; pams.lut_dtype = useFloat16LookupTables_ ? CUDA_R_16F : CUDA_R_32F; @@ -317,10 +318,10 @@ void RaftIVFPQ::search( auto out_dists_view = raft::make_device_matrix_view( outDistances.data(), (idx_t)numQueries, (idx_t)k_); - raft::neighbors::ivf_pq::search( + cuvs::neighbors::ivf_pq::search( raft_handle, pams, - raft_knn_index.value(), + *cuvs_index, queries_view, out_inds_view, out_dists_view); @@ -358,15 +359,15 @@ void RaftIVFPQ::search( raft_handle.sync_stream(); } -idx_t RaftIVFPQ::addVectors( +idx_t CuvsIVFPQ::addVectors( Index* coarseQuantizer, Tensor& vecs, Tensor& indices) { /// NB: The coarse quantizer is ignored here. The user is assumed to have - /// called updateQuantizer() to update the RAFT index if the quantizer was + /// called updateQuantizer() to update the cuVS index if the quantizer was /// modified externally - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index); const raft::device_resources& raft_handle = resources_->getRaftHandleCurrentDevice(); @@ -374,19 +375,18 @@ idx_t RaftIVFPQ::addVectors( /// Remove rows containing NaNs idx_t n_rows_valid = inplaceGatherFilteredRows(resources_, vecs, indices); - raft_knn_index.emplace(raft::neighbors::ivf_pq::extend( + cuvs::neighbors::ivf_pq::extend( raft_handle, raft::make_device_matrix_view( vecs.data(), n_rows_valid, dim_), - std::make_optional>( - raft::make_device_vector_view( - indices.data(), n_rows_valid)), - raft_knn_index.value())); + raft::make_device_vector_view( + indices.data(), n_rows_valid), + cuvs_index.get()); return n_rows_valid; } -void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { +void CuvsIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { size_t nlist = ivf ? ivf->nlist : 0; size_t ntotal = ivf ? ivf->compute_ntotal() : 0; @@ -397,12 +397,12 @@ void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { std::vector indices_(ntotal); // the index must already exist - FAISS_ASSERT(raft_knn_index.has_value()); + FAISS_ASSERT(cuvs_index); - auto& raft_lists = raft_knn_index.value().lists(); + auto& cuvs_index_lists = cuvs_index->lists(); // conservative memory alloc for cloning cpu inverted lists - raft::neighbors::ivf_pq::list_spec raft_list_spec{ + cuvs::neighbors::ivf_pq::list_spec ivf_list_spec{ static_cast(bitsPerSubQuantizer_), static_cast(numSubQuantizers_), true}; @@ -421,26 +421,26 @@ void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { // store the list size list_sizes_[i] = static_cast(listSize); - // This RAFT list must currently be empty + // This cuVS list must currently be empty FAISS_ASSERT(getListLength(i) == 0); - raft::neighbors::ivf::resize_list( + cuvs::neighbors::ivf::resize_list( raft_handle, - raft_lists[i], - raft_list_spec, + cuvs_index_lists[i], + ivf_list_spec, static_cast(listSize), static_cast(0)); } raft::update_device( - raft_knn_index.value().list_sizes().data_handle(), + cuvs_index->list_sizes().data_handle(), list_sizes_.data(), nlist, raft_handle.get_stream()); // Update the pointers and the sizes - raft::neighbors::ivf_pq::helpers::recompute_internal_state( - raft_handle, &(raft_knn_index.value())); + cuvs::neighbors::ivf_pq::helpers::recompute_internal_state( + raft_handle, cuvs_index.get()); for (size_t i = 0; i < nlist; ++i) { size_t listSize = ivf->list_size(i); @@ -449,12 +449,13 @@ void RaftIVFPQ::copyInvertedListsFrom(const InvertedLists* ivf) { } } -void RaftIVFPQ::setRaftIndex(raft::neighbors::ivf_pq::index&& idx) { - raft_knn_index.emplace(std::move(idx)); +void CuvsIVFPQ::setCuvsIndex(cuvs::neighbors::ivf_pq::index&& idx) { + cuvs_index = std::make_shared>( + std::move(idx)); setBasePQCentroids_(); } -void RaftIVFPQ::addEncodedVectorsToList_( +void CuvsIVFPQ::addEncodedVectorsToList_( idx_t listId, const void* codes, const idx_t* indices, @@ -490,9 +491,9 @@ void RaftIVFPQ::addEncodedVectorsToList_( bufferSize, stream); - raft::neighbors::ivf_pq::helpers::pack_contiguous_list_data( + cuvs::neighbors::ivf_pq::helpers::codepacker::pack_contiguous_list_data( raft_handle, - &(raft_knn_index.value()), + cuvs_index.get(), codes_d.data_handle(), batchSize, listId, @@ -505,7 +506,7 @@ void RaftIVFPQ::addEncodedVectorsToList_( // fetch the list indices ptr on host raft::update_host( &list_indices_ptr, - raft_knn_index.value().inds_ptrs().data_handle() + listId, + cuvs_index->inds_ptrs().data_handle() + listId, 1, stream); raft_handle.sync_stream(); @@ -513,23 +514,23 @@ void RaftIVFPQ::addEncodedVectorsToList_( raft::update_device(list_indices_ptr, indices, numVecs, stream); } -void RaftIVFPQ::setPQCentroids_() { +void CuvsIVFPQ::setPQCentroids_() { auto stream = resources_->getDefaultStreamCurrentDevice(); raft::copy( - raft_knn_index.value().pq_centers().data_handle(), + cuvs_index->pq_centers().data_handle(), pqCentroidsInnermostCode_.data(), pqCentroidsInnermostCode_.numElements(), stream); } -void RaftIVFPQ::setBasePQCentroids_() { +void CuvsIVFPQ::setBasePQCentroids_() { auto stream = resources_->getDefaultStreamCurrentDevice(); raft::copy( pqCentroidsInnermostCode_.data(), - raft_knn_index.value().pq_centers().data_handle(), - raft_knn_index.value().pq_centers().size(), + cuvs_index->pq_centers().data_handle(), + cuvs_index->pq_centers().size(), stream); DeviceTensor pqCentroidsMiddleCode( diff --git a/faiss/gpu/impl/RaftIVFPQ.cuh b/faiss/gpu/impl/CuvsIVFPQ.cuh similarity index 88% rename from faiss/gpu/impl/RaftIVFPQ.cuh rename to faiss/gpu/impl/CuvsIVFPQ.cuh index 9a54dda79b..e6a3e1edc4 100644 --- a/faiss/gpu/impl/RaftIVFPQ.cuh +++ b/faiss/gpu/impl/CuvsIVFPQ.cuh @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,17 +26,18 @@ #include #include -#include +#include +#include #include #pragma GCC visibility push(default) namespace faiss { namespace gpu { /// Implementing class for IVFPQ on the GPU -class RaftIVFPQ : public IVFPQ { +class CuvsIVFPQ : public IVFPQ { public: - RaftIVFPQ( + CuvsIVFPQ( GpuResources* resources, int dim, idx_t nlist, @@ -51,12 +52,12 @@ class RaftIVFPQ : public IVFPQ { IndicesOptions indicesOptions, MemorySpace space); - ~RaftIVFPQ() override; + ~CuvsIVFPQ() override; /// Reserve GPU memory in our inverted lists for this number of vectors void reserveMemory(idx_t numVecs) override; - /// Clear out the RAFT index + /// Clear out the cuVS index void reset() override; /// After adding vectors, one can call this to reclaim device memory @@ -93,15 +94,15 @@ class RaftIVFPQ : public IVFPQ { std::vector getListVectorData(idx_t listId, bool gpuFormat) const override; - /// Update our Raft index with this quantizer instance; may be a CPU + /// Update our cuVS index with this quantizer instance; may be a CPU /// or GPU quantizer void updateQuantizer(Index* quantizer) override; /// Copy all inverted lists from a CPU representation to ourselves void copyInvertedListsFrom(const InvertedLists* ivf) override; - /// Replace the Raft index - void setRaftIndex(raft::neighbors::ivf_pq::index&& idx); + /// Replace the cuVS index + void setCuvsIndex(cuvs::neighbors::ivf_pq::index&& idx); /// Classify and encode/add vectors to our IVF lists. /// The input data must be on our current device. @@ -133,17 +134,16 @@ class RaftIVFPQ : public IVFPQ { /// Returns the encoding size for a PQ-encoded IVF list size_t getGpuListEncodingSize_(idx_t listId); - /// Copy the PQ centroids to the Raft index. The data is already in the + /// Copy the PQ centroids to the cuVS index. The data is already in the /// preferred format with the transpose performed by the IVFPQ class helper. void setPQCentroids_(); /// Update the product quantizer centroids buffer held in the IVFPQ class. - /// Used when the RAFT index was updated externally. + /// Used when the cuVS index was updated externally. void setBasePQCentroids_(); - /// optional around the Raft IVF-PQ index - std::optional> raft_knn_index{ - std::nullopt}; + /// cuVS IVF-PQ index + std::shared_ptr> cuvs_index{nullptr}; }; } // namespace gpu diff --git a/faiss/gpu/test/CMakeLists.txt b/faiss/gpu/test/CMakeLists.txt index 8c44e5360e..baf7480b51 100644 --- a/faiss/gpu/test/CMakeLists.txt +++ b/faiss/gpu/test/CMakeLists.txt @@ -25,16 +25,15 @@ if(FAISS_ENABLE_ROCM) target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest hip::host) else() find_package(CUDAToolkit REQUIRED) - target_link_libraries(faiss_gpu_test_helper PUBLIC - faiss gtest CUDA::cudart - $<$:raft::raft> - $<$:raft::compiled>) + target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest CUDA::cudart $<$:cuvs::cuvs> $<$:OpenMP::OpenMP_CXX>) endif() macro(faiss_gpu_test file) get_filename_component(test_name ${file} NAME_WE) add_executable(${test_name} ${file}) target_link_libraries(${test_name} PRIVATE faiss_gpu_test_helper) + target_compile_options(${test_name} PRIVATE + $<$:$<$:-Xcompiler=${OpenMP_CXX_FLAGS}>>) gtest_discover_tests(${test_name}) endmacro() @@ -48,8 +47,7 @@ faiss_gpu_test(TestGpuIndexIVFPQ.cpp) faiss_gpu_test(TestGpuIndexIVFScalarQuantizer.cpp) faiss_gpu_test(TestGpuDistance.${GPU_EXT_PREFIX}) faiss_gpu_test(TestGpuSelect.${GPU_EXT_PREFIX}) - -if(FAISS_ENABLE_RAFT) +if(FAISS_ENABLE_CUVS) faiss_gpu_test(TestGpuIndexCagra.cu) endif() diff --git a/faiss/gpu/test/TestGpuDistance.cu b/faiss/gpu/test/TestGpuDistance.cu index 11ed085edd..3915055480 100644 --- a/faiss/gpu/test/TestGpuDistance.cu +++ b/faiss/gpu/test/TestGpuDistance.cu @@ -49,7 +49,7 @@ void evaluate_bfknn( bfKnn(res, args); std::stringstream str; - str << "using raft " << args.use_raft << "metric " << metric + str << "using cuVS " << args.use_cuvs << "metric " << metric << " colMajorVecs " << colMajorVecs << " colMajorQueries " << colMajorQueries; @@ -73,7 +73,7 @@ void testTransposition( bool colMajorVecs, bool colMajorQueries, faiss::MetricType metric, - bool use_raft = false, + bool use_cuvs = false, float metricArg = 0) { using namespace faiss::gpu; @@ -169,12 +169,12 @@ void testTransposition( args.outIndices = gpuIndices.data(); args.device = device; -#if defined USE_NVIDIA_RAFT - args.use_raft = use_raft; +#if defined USE_NVIDIA_CUVS + args.use_cuvs = use_cuvs; #else FAISS_THROW_IF_NOT_MSG( - !use_raft, - "RAFT has not been compiled into the current version so it cannot be used."); + !use_cuvs, + "cuVS has not been compiled into the current version so it cannot be used."); #endif evaluate_bfknn( @@ -197,8 +197,8 @@ TEST(TestGpuDistance, Transposition_RR) { testTransposition(false, false, faiss::MetricType::METRIC_INNER_PRODUCT); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, Transposition_RR) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, Transposition_RR) { testTransposition(false, false, faiss::MetricType::METRIC_L2, true); testTransposition( false, false, faiss::MetricType::METRIC_INNER_PRODUCT, true); @@ -209,8 +209,8 @@ TEST(TestGpuDistance, Transposition_RC) { testTransposition(false, true, faiss::MetricType::METRIC_L2); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, Transposition_RC) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, Transposition_RC) { testTransposition(false, true, faiss::MetricType::METRIC_L2, true); } #endif @@ -219,8 +219,8 @@ TEST(TestGpuDistance, Transposition_CR) { testTransposition(true, false, faiss::MetricType::METRIC_L2); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, Transposition_CR) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, Transposition_CR) { testTransposition(true, false, faiss::MetricType::METRIC_L2, true); } #endif @@ -229,8 +229,8 @@ TEST(TestGpuDistance, Transposition_CC) { testTransposition(true, true, faiss::MetricType::METRIC_L2); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, Transposition_CC) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, Transposition_CC) { testTransposition(true, true, faiss::MetricType::METRIC_L2, true); } #endif @@ -239,8 +239,8 @@ TEST(TestGpuDistance, L1) { testTransposition(false, false, faiss::MetricType::METRIC_L1); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, L1) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, L1) { testTransposition(false, false, faiss::MetricType::METRIC_L1, true); } #endif @@ -250,9 +250,9 @@ TEST(TestGpuDistance, L1_RC) { testTransposition(false, true, faiss::MetricType::METRIC_L1); } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS // Test other transpositions with the general distance kernel -TEST(TestRaftGpuDistance, L1_RC) { +TEST(TestCuvsGpuDistance, L1_RC) { testTransposition(false, true, faiss::MetricType::METRIC_L1, true); } #endif @@ -261,8 +261,8 @@ TEST(TestGpuDistance, L1_CR) { testTransposition(true, false, faiss::MetricType::METRIC_L1); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, L1_CR) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, L1_CR) { testTransposition(true, false, faiss::MetricType::METRIC_L1, true); } #endif @@ -271,8 +271,8 @@ TEST(TestGpuDistance, L1_CC) { testTransposition(true, true, faiss::MetricType::METRIC_L1); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, L1_CC) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, L1_CC) { testTransposition(true, true, faiss::MetricType::METRIC_L1, true); } #endif @@ -282,9 +282,9 @@ TEST(TestGpuDistance, Linf) { testTransposition(false, false, faiss::MetricType::METRIC_Linf); } -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS // Test remainder of metric types -TEST(TestRaftGpuDistance, Linf) { +TEST(TestCuvsGpuDistance, Linf) { testTransposition(false, false, faiss::MetricType::METRIC_Linf, true); } #endif @@ -293,8 +293,8 @@ TEST(TestGpuDistance, Lp) { testTransposition(false, false, faiss::MetricType::METRIC_Lp, false, 3); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, Lp) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, Lp) { testTransposition(false, false, faiss::MetricType::METRIC_Lp, true, 3); } #endif @@ -303,8 +303,8 @@ TEST(TestGpuDistance, Canberra) { testTransposition(false, false, faiss::MetricType::METRIC_Canberra); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, Canberra) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, Canberra) { testTransposition(false, false, faiss::MetricType::METRIC_Canberra, true); } #endif @@ -317,8 +317,8 @@ TEST(TestGpuDistance, JensenShannon) { testTransposition(false, false, faiss::MetricType::METRIC_JensenShannon); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuDistance, JensenShannon) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuDistance, JensenShannon) { testTransposition( false, false, faiss::MetricType::METRIC_JensenShannon, true); } diff --git a/faiss/gpu/test/TestGpuIndexCagra.cu b/faiss/gpu/test/TestGpuIndexCagra.cu index 7f2ba6821a..16b94703c4 100644 --- a/faiss/gpu/test/TestGpuIndexCagra.cu +++ b/faiss/gpu/test/TestGpuIndexCagra.cu @@ -183,7 +183,7 @@ void queryTest(faiss::MetricType metric, double expected_recall) { recall_score.view(), test_dis_mds_opt, ref_dis_mds_opt); - ASSERT_GT(*recall_score.data_handle(), expected_recall); + ASSERT_TRUE(*recall_score.data_handle() > expected_recall); } } @@ -331,7 +331,7 @@ void copyToTest( recall_score.view(), copy_ref_dis_mds_opt, ref_dis_mds_opt); - ASSERT_GT(*recall_score.data_handle(), expected_recall); + ASSERT_TRUE(*recall_score.data_handle() > expected_recall); } } @@ -453,7 +453,7 @@ void copyFromTest(faiss::MetricType metric, double expected_recall) { recall_score.view(), copy_test_dis_mds_opt, test_dis_mds_opt); - ASSERT_GT(*recall_score.data_handle(), expected_recall); + ASSERT_TRUE(*recall_score.data_handle() > expected_recall); } } diff --git a/faiss/gpu/test/TestGpuIndexFlat.cpp b/faiss/gpu/test/TestGpuIndexFlat.cpp index 0398606bb6..3146e786c0 100644 --- a/faiss/gpu/test/TestGpuIndexFlat.cpp +++ b/faiss/gpu/test/TestGpuIndexFlat.cpp @@ -29,7 +29,7 @@ struct TestFlatOptions { numQueriesOverride(-1), kOverride(-1), dimOverride(-1), - use_raft(false) {} + use_cuvs(false) {} faiss::MetricType metric; float metricArg; @@ -39,7 +39,7 @@ struct TestFlatOptions { int numQueriesOverride; int kOverride; int dimOverride; - bool use_raft; + bool use_cuvs; }; void testFlat(const TestFlatOptions& opt) { @@ -75,7 +75,7 @@ void testFlat(const TestFlatOptions& opt) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; config.useFloat16 = opt.useFloat16; - config.use_raft = opt.use_raft; + config.use_cuvs = opt.use_cuvs; faiss::gpu::GpuIndexFlat gpuIndex(&res, dim, opt.metric, config); gpuIndex.metric_arg = opt.metricArg; @@ -114,8 +114,8 @@ TEST(TestGpuIndexFlat, IP_Float32) { testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -128,8 +128,8 @@ TEST(TestGpuIndexFlat, L1_Float32) { testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -141,8 +141,8 @@ TEST(TestGpuIndexFlat, Lp_Float32) { opt.useFloat16 = false; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -155,8 +155,8 @@ TEST(TestGpuIndexFlat, L2_Float32) { opt.useFloat16 = false; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -173,8 +173,8 @@ TEST(TestGpuIndexFlat, L2_k_2048) { opt.numVecsOverride = 10000; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -189,8 +189,8 @@ TEST(TestGpuIndexFlat, L2_Float32_K1) { opt.kOverride = 1; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -203,8 +203,8 @@ TEST(TestGpuIndexFlat, IP_Float16) { opt.useFloat16 = true; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -217,8 +217,8 @@ TEST(TestGpuIndexFlat, L2_Float16) { opt.useFloat16 = true; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -233,8 +233,8 @@ TEST(TestGpuIndexFlat, L2_Float16_K1) { opt.kOverride = 1; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -254,8 +254,8 @@ TEST(TestGpuIndexFlat, L2_Tiling) { opt.kOverride = 64; testFlat(opt); -#if defined USE_NVIDIA_RAFT - opt.use_raft = true; +#if defined USE_NVIDIA_CUVS + opt.use_cuvs = true; testFlat(opt); #endif } @@ -268,7 +268,7 @@ TEST(TestGpuIndexFlat, QueryEmpty) { faiss::gpu::GpuIndexFlatConfig config; config.device = 0; config.useFloat16 = false; - config.use_raft = false; + config.use_cuvs = false; int dim = 128; faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config); @@ -292,7 +292,7 @@ TEST(TestGpuIndexFlat, QueryEmpty) { } } -void testCopyFrom(bool use_raft) { +void testCopyFrom(bool use_cuvs) { int numVecs = faiss::gpu::randVal(100, 200); int dim = faiss::gpu::randVal(1, 1000); @@ -310,7 +310,7 @@ void testCopyFrom(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; config.useFloat16 = useFloat16; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; // Fill with garbage values faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config); @@ -343,13 +343,13 @@ TEST(TestGpuIndexFlat, CopyFrom) { testCopyFrom(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, CopyFrom) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, CopyFrom) { testCopyFrom(true); } #endif -void testCopyTo(bool use_raft) { +void testCopyTo(bool use_cuvs) { faiss::gpu::StandardGpuResources res; res.noTempMemory(); @@ -363,7 +363,7 @@ void testCopyTo(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; config.useFloat16 = useFloat16; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config); gpuIndex.add(numVecs, vecs.data()); @@ -394,13 +394,13 @@ TEST(TestGpuIndexFlat, CopyTo) { testCopyTo(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, CopyTo) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, CopyTo) { testCopyTo(true); } #endif -void testUnifiedMemory(bool use_raft) { +void testUnifiedMemory(bool use_cuvs) { // Construct on a random device to test multi-device, if we have // multiple devices int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); @@ -426,7 +426,7 @@ void testUnifiedMemory(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; config.memorySpace = faiss::gpu::MemorySpace::Unified; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config); @@ -452,13 +452,13 @@ TEST(TestGpuIndexFlat, UnifiedMemory) { testUnifiedMemory(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, UnifiedMemory) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, UnifiedMemory) { testUnifiedMemory(true); } #endif -void testLargeIndex(bool use_raft) { +void testLargeIndex(bool use_cuvs) { // Construct on a random device to test multi-device, if we have // multiple devices int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); @@ -489,7 +489,7 @@ void testLargeIndex(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config); cpuIndexL2.add(nb, xb.data()); @@ -513,13 +513,13 @@ TEST(TestGpuIndexFlat, LargeIndex) { testLargeIndex(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, LargeIndex) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, LargeIndex) { testLargeIndex(true); } #endif -void testResidual(bool use_raft) { +void testResidual(bool use_cuvs) { // Construct on a random device to test multi-device, if we have // multiple devices int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); @@ -529,7 +529,7 @@ void testResidual(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; int dim = 32; faiss::IndexFlat cpuIndex(dim, faiss::MetricType::METRIC_L2); @@ -566,13 +566,13 @@ TEST(TestGpuIndexFlat, Residual) { testResidual(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, Residual) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, Residual) { testResidual(true); } #endif -void testReconstruct(bool use_raft) { +void testReconstruct(bool use_cuvs) { // Construct on a random device to test multi-device, if we have // multiple devices int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); @@ -589,7 +589,7 @@ void testReconstruct(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; config.useFloat16 = useFloat16; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; faiss::gpu::GpuIndexFlat gpuIndex( &res, dim, faiss::MetricType::METRIC_L2, config); @@ -657,13 +657,13 @@ void testReconstruct(bool use_raft) { TEST(TestGpuIndexFlat, Reconstruct) { testReconstruct(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, Reconstruct) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, Reconstruct) { testReconstruct(true); } #endif -void testSearchAndReconstruct(bool use_raft) { +void testSearchAndReconstruct(bool use_cuvs) { // Construct on a random device to test multi-device, if we have // multiple devices int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); @@ -683,7 +683,7 @@ void testSearchAndReconstruct(bool use_raft) { faiss::gpu::GpuIndexFlatConfig config; config.device = device; - config.use_raft = use_raft; + config.use_cuvs = use_cuvs; faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config); cpuIndex.add(nb, xb.data()); @@ -754,8 +754,8 @@ TEST(TestGpuIndexFlat, SearchAndReconstruct) { testSearchAndReconstruct(false); } -#if defined USE_NVIDIA_RAFT -TEST(TestRaftGpuIndexFlat, SearchAndReconstruct) { +#if defined USE_NVIDIA_CUVS +TEST(TestCuvsGpuIndexFlat, SearchAndReconstruct) { testSearchAndReconstruct(true); } #endif diff --git a/faiss/gpu/test/TestGpuIndexIVFFlat.cpp b/faiss/gpu/test/TestGpuIndexIVFFlat.cpp index df71e9e3fe..a7d0007b5b 100644 --- a/faiss/gpu/test/TestGpuIndexIVFFlat.cpp +++ b/faiss/gpu/test/TestGpuIndexIVFFlat.cpp @@ -58,7 +58,7 @@ struct Options { device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); - useRaft = false; + useCuvs = false; } std::string toString() const { @@ -66,7 +66,7 @@ struct Options { str << "IVFFlat device " << device << " numVecs " << numAdd << " dim " << dim << " numCentroids " << numCentroids << " nprobe " << nprobe << " numQuery " << numQuery << " k " << k << " indicesOpt " - << indicesOpt << " useRaft " << useRaft; + << indicesOpt << " useCuvs " << useCuvs; return str.str(); } @@ -80,7 +80,7 @@ struct Options { int k; int device; faiss::gpu::IndicesOptions indicesOpt; - bool useRaft; + bool useCuvs; }; void queryTest( @@ -111,7 +111,7 @@ void queryTest( config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.use_raft = opt.useRaft; + config.use_cuvs = opt.useCuvs; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config); @@ -138,9 +138,10 @@ void queryTest( void addTest( faiss::MetricType metricType, bool useFloat16CoarseQuantizer, - bool useRaft) { + bool useCuvs) { for (int tries = 0; tries < 2; ++tries) { Options opt; + opt.useCuvs = useCuvs; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); @@ -163,9 +164,9 @@ void addTest( faiss::gpu::GpuIndexIVFFlatConfig config; config.device = opt.device; config.indicesOptions = - useRaft ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt; + opt.useCuvs ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.use_raft = useRaft; + config.use_cuvs = opt.useCuvs; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config); @@ -189,7 +190,7 @@ void addTest( } } -void copyToTest(bool useFloat16CoarseQuantizer, bool useRaft) { +void copyToTest(bool useFloat16CoarseQuantizer, bool useCuvs) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); @@ -200,9 +201,9 @@ void copyToTest(bool useFloat16CoarseQuantizer, bool useRaft) { faiss::gpu::GpuIndexIVFFlatConfig config; config.device = opt.device; config.indicesOptions = - useRaft ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt; + useCuvs ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.use_raft = useRaft; + config.use_cuvs = useCuvs; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config); @@ -242,7 +243,7 @@ void copyToTest(bool useFloat16CoarseQuantizer, bool useRaft) { compFloat16 ? 0.30f : 0.015f); } -void copyFromTest(bool useFloat16CoarseQuantizer, bool useRaft) { +void copyFromTest(bool useFloat16CoarseQuantizer, bool useCuvs) { Options opt; std::vector trainVecs = faiss::gpu::randVecs(opt.numTrain, opt.dim); std::vector addVecs = faiss::gpu::randVecs(opt.numAdd, opt.dim); @@ -261,9 +262,9 @@ void copyFromTest(bool useFloat16CoarseQuantizer, bool useRaft) { faiss::gpu::GpuIndexIVFFlatConfig config; config.device = opt.device; config.indicesOptions = - useRaft ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt; + useCuvs ? faiss::gpu::INDICES_64_BIT : opt.indicesOpt; config.flatConfig.useFloat16 = useFloat16CoarseQuantizer; - config.use_raft = useRaft; + config.use_cuvs = useCuvs; faiss::gpu::GpuIndexIVFFlat gpuIndex(&res, 1, 1, faiss::METRIC_L2, config); gpuIndex.nprobe = 1; @@ -297,7 +298,7 @@ void copyFromTest(bool useFloat16CoarseQuantizer, bool useRaft) { TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) { addTest(faiss::METRIC_L2, false, false); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS addTest(faiss::METRIC_L2, false, true); #endif } @@ -305,7 +306,7 @@ TEST(TestGpuIndexIVFFlat, Float32_32_Add_L2) { TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) { addTest(faiss::METRIC_INNER_PRODUCT, false, false); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS addTest(faiss::METRIC_INNER_PRODUCT, false, true); #endif } @@ -313,7 +314,7 @@ TEST(TestGpuIndexIVFFlat, Float32_32_Add_IP) { TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) { addTest(faiss::METRIC_L2, true, false); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS addTest(faiss::METRIC_L2, true, true); #endif } @@ -321,7 +322,7 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Add_L2) { TEST(TestGpuIndexIVFFlat, Float16_32_Add_IP) { addTest(faiss::METRIC_INNER_PRODUCT, true, false); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS addTest(faiss::METRIC_INNER_PRODUCT, true, true); #endif } @@ -334,8 +335,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_L2) { Options opt; queryTest(opt, faiss::METRIC_L2, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_L2, false); #endif @@ -345,8 +346,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP) { Options opt; queryTest(opt, faiss::METRIC_INNER_PRODUCT, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_INNER_PRODUCT, false); #endif @@ -358,8 +359,8 @@ TEST(TestGpuIndexIVFFlat, LargeBatch) { opt.numQuery = 100000; queryTest(opt, faiss::METRIC_L2, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_L2, false); #endif @@ -371,8 +372,8 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_L2) { Options opt; queryTest(opt, faiss::METRIC_L2, true); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_L2, true); #endif @@ -382,8 +383,8 @@ TEST(TestGpuIndexIVFFlat, Float16_32_Query_IP) { Options opt; queryTest(opt, faiss::METRIC_INNER_PRODUCT, true); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_INNER_PRODUCT, true); #endif @@ -399,8 +400,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_L2_64) { opt.dim = 64; queryTest(opt, faiss::METRIC_L2, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_L2, false); #endif @@ -411,8 +412,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_64) { opt.dim = 64; queryTest(opt, faiss::METRIC_INNER_PRODUCT, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_INNER_PRODUCT, false); #endif @@ -423,8 +424,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_L2_128) { opt.dim = 128; queryTest(opt, faiss::METRIC_L2, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_L2, false); #endif @@ -435,8 +436,8 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) { opt.dim = 128; queryTest(opt, faiss::METRIC_INNER_PRODUCT, false); -#if defined USE_NVIDIA_RAFT - opt.useRaft = true; +#if defined USE_NVIDIA_CUVS + opt.useCuvs = true; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; queryTest(opt, faiss::METRIC_INNER_PRODUCT, false); #endif @@ -449,7 +450,7 @@ TEST(TestGpuIndexIVFFlat, Float32_Query_IP_128) { TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) { copyToTest(false, false); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS copyToTest(false, true); #endif } @@ -457,7 +458,7 @@ TEST(TestGpuIndexIVFFlat, Float32_32_CopyTo) { TEST(TestGpuIndexIVFFlat, Float32_32_CopyFrom) { copyFromTest(false, false); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS copyFromTest(false, true); #endif } @@ -500,7 +501,7 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) { faiss::gpu::GpuIndexIVFFlatConfig config; config.device = opt.device; config.indicesOptions = opt.indicesOpt; - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config); @@ -523,19 +524,19 @@ TEST(TestGpuIndexIVFFlat, Float32_negative) { compFloat16 ? 0.99f : 0.1f, compFloat16 ? 0.65f : 0.015f); -#if defined USE_NVIDIA_RAFT - config.use_raft = true; +#if defined USE_NVIDIA_CUVS + config.use_cuvs = true; config.indicesOptions = faiss::gpu::INDICES_64_BIT; - faiss::gpu::GpuIndexIVFFlat raftGpuIndex( + faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex( &res, cpuIndex.d, cpuIndex.nlist, cpuIndex.metric_type, config); - raftGpuIndex.copyFrom(&cpuIndex); - raftGpuIndex.nprobe = opt.nprobe; + cuvsGpuIndex.copyFrom(&cpuIndex); + cuvsGpuIndex.nprobe = opt.nprobe; faiss::gpu::compareIndices( queryVecs, cpuIndex, - raftGpuIndex, + cuvsGpuIndex, opt.numQuery, opt.dim, opt.k, @@ -573,7 +574,7 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) { config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = faiss::gpu::randBool(); - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config); @@ -594,19 +595,19 @@ TEST(TestGpuIndexIVFFlat, QueryNaN) { } } -#if defined USE_NVIDIA_RAFT - config.use_raft = true; +#if defined USE_NVIDIA_CUVS + config.use_cuvs = true; config.indicesOptions = faiss::gpu::INDICES_64_BIT; std::fill(distances.begin(), distances.end(), 0); std::fill(indices.begin(), indices.end(), 0); - faiss::gpu::GpuIndexIVFFlat raftGpuIndex( + faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex( &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config); - raftGpuIndex.nprobe = opt.nprobe; + cuvsGpuIndex.nprobe = opt.nprobe; - raftGpuIndex.train(opt.numTrain, trainVecs.data()); - raftGpuIndex.add(opt.numAdd, addVecs.data()); + cuvsGpuIndex.train(opt.numTrain, trainVecs.data()); + cuvsGpuIndex.add(opt.numAdd, addVecs.data()); - raftGpuIndex.search( + cuvsGpuIndex.search( numQuery, nans.data(), opt.k, distances.data(), indices.data()); for (int q = 0; q < numQuery; ++q) { @@ -642,7 +643,7 @@ TEST(TestGpuIndexIVFFlat, AddNaN) { config.device = opt.device; config.indicesOptions = opt.indicesOpt; config.flatConfig.useFloat16 = faiss::gpu::randBool(); - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config); gpuIndex.nprobe = opt.nprobe; @@ -664,20 +665,20 @@ TEST(TestGpuIndexIVFFlat, AddNaN) { distance.data(), indices.data()); -#if defined USE_NVIDIA_RAFT - config.use_raft = true; +#if defined USE_NVIDIA_CUVS + config.use_cuvs = true; config.indicesOptions = faiss::gpu::INDICES_64_BIT; - faiss::gpu::GpuIndexIVFFlat raftGpuIndex( + faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex( &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config); - raftGpuIndex.nprobe = opt.nprobe; - raftGpuIndex.train(opt.numTrain, trainVecs.data()); + cuvsGpuIndex.nprobe = opt.nprobe; + cuvsGpuIndex.train(opt.numTrain, trainVecs.data()); // should not crash - EXPECT_EQ(raftGpuIndex.ntotal, 0); - raftGpuIndex.add(numNans, nans.data()); + EXPECT_EQ(cuvsGpuIndex.ntotal, 0); + cuvsGpuIndex.add(numNans, nans.data()); // should not crash - raftGpuIndex.search( + cuvsGpuIndex.search( opt.numQuery, queryVecs.data(), opt.k, @@ -724,7 +725,7 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) { faiss::gpu::GpuIndexIVFFlatConfig config; config.device = device; config.memorySpace = faiss::gpu::MemorySpace::Unified; - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, dim, numCentroids, faiss::METRIC_L2, config); @@ -742,17 +743,17 @@ TEST(TestGpuIndexIVFFlat, UnifiedMemory) { 0.1f, 0.015f); -#if defined USE_NVIDIA_RAFT - config.use_raft = true; +#if defined USE_NVIDIA_CUVS + config.use_cuvs = true; config.indicesOptions = faiss::gpu::INDICES_64_BIT; - faiss::gpu::GpuIndexIVFFlat raftGpuIndex( + faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex( &res, dim, numCentroids, faiss::METRIC_L2, config); - raftGpuIndex.copyFrom(&cpuIndex); - raftGpuIndex.nprobe = nprobe; + cuvsGpuIndex.copyFrom(&cpuIndex); + cuvsGpuIndex.nprobe = nprobe; faiss::gpu::compareIndices( cpuIndex, - raftGpuIndex, + cuvsGpuIndex, numQuery, dim, k, @@ -802,7 +803,7 @@ TEST(TestGpuIndexIVFFlat, LongIVFList) { faiss::gpu::GpuIndexIVFFlatConfig config; config.device = device; - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, dim, numCentroids, faiss::METRIC_L2, config); @@ -821,18 +822,18 @@ TEST(TestGpuIndexIVFFlat, LongIVFList) { 0.1f, 0.015f); -#if defined USE_NVIDIA_RAFT - config.use_raft = true; +#if defined USE_NVIDIA_CUVS + config.use_cuvs = true; config.indicesOptions = faiss::gpu::INDICES_64_BIT; - faiss::gpu::GpuIndexIVFFlat raftGpuIndex( + faiss::gpu::GpuIndexIVFFlat cuvsGpuIndex( &res, dim, numCentroids, faiss::METRIC_L2, config); - raftGpuIndex.train(numTrain, trainVecs.data()); - raftGpuIndex.add(numAdd, addVecs.data()); - raftGpuIndex.nprobe = 1; + cuvsGpuIndex.train(numTrain, trainVecs.data()); + cuvsGpuIndex.add(numAdd, addVecs.data()); + cuvsGpuIndex.nprobe = 1; faiss::gpu::compareIndices( cpuIndex, - raftGpuIndex, + cuvsGpuIndex, numQuery, dim, k, @@ -862,7 +863,7 @@ TEST(TestGpuIndexIVFFlat, Reconstruct_n) { faiss::gpu::GpuIndexIVFFlatConfig config; config.device = opt.device; config.indicesOptions = faiss::gpu::INDICES_64_BIT; - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFFlat gpuIndex( &res, opt.dim, opt.numCentroids, faiss::METRIC_L2, config); diff --git a/faiss/gpu/test/TestGpuIndexIVFPQ.cpp b/faiss/gpu/test/TestGpuIndexIVFPQ.cpp index 134a4b449d..cf1af7f0a2 100644 --- a/faiss/gpu/test/TestGpuIndexIVFPQ.cpp +++ b/faiss/gpu/test/TestGpuIndexIVFPQ.cpp @@ -35,7 +35,7 @@ void pickEncoding(int& codes, int& dim) { } } -void pickRaftEncoding(int& codes, int& dim, int bitsPerCode) { +void pickCuvsEncoding(int& codes, int& dim, int bitsPerCode) { // Above 32 doesn't work with no precomputed codes std::vector dimSizes{4, 8, 10, 12, 16, 20, 24, 28, 32}; @@ -85,7 +85,7 @@ struct Options { device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); interleavedLayout = false; - useRaft = false; + useCuvs = false; } std::string toString() const { @@ -126,7 +126,7 @@ struct Options { bool useFloat16; int device; bool interleavedLayout; - bool useRaft; + bool useCuvs; }; void queryTest(Options opt, faiss::MetricType metricType) { @@ -156,7 +156,7 @@ void queryTest(Options opt, faiss::MetricType metricType) { config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; config.interleavedLayout = opt.interleavedLayout; - config.use_raft = opt.useRaft; + config.use_cuvs = opt.useCuvs; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.nprobe = opt.nprobe; @@ -235,7 +235,7 @@ void testMMCodeDistance(faiss::MetricType mt) { config.usePrecomputedTables = false; config.useMMCodeDistance = true; config.indicesOptions = opt.indicesOpt; - config.use_raft = false; + config.use_cuvs = false; // Make sure that the float16 version works as well config.useFloat16LookupTables = (tries % 2 == 0); @@ -286,7 +286,7 @@ void testMMCodeDistance(faiss::MetricType mt) { config.device = opt.device; config.usePrecomputedTables = false; config.indicesOptions = opt.indicesOpt; - config.use_raft = false; + config.use_cuvs = false; // Make sure that the float16 version works as well config.useFloat16LookupTables = (dimPerSubQ == 7); @@ -340,7 +340,7 @@ TEST(TestGpuIndexIVFPQ, Float16Coarse) { config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.nprobe = opt.nprobe; @@ -386,7 +386,7 @@ void addTest(Options opt, faiss::MetricType metricType) { config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; config.interleavedLayout = opt.interleavedLayout; - config.use_raft = opt.useRaft; + config.use_cuvs = opt.useCuvs; faiss::gpu::GpuIndexIVFPQ gpuIndex(&res, &cpuIndex, config); gpuIndex.nprobe = opt.nprobe; @@ -436,7 +436,7 @@ void copyToTest(Options opt) { config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; config.interleavedLayout = opt.interleavedLayout; - config.use_raft = opt.useRaft; + config.use_cuvs = opt.useCuvs; faiss::gpu::GpuIndexIVFPQ gpuIndex( &res, @@ -513,7 +513,7 @@ void copyFromTest(Options opt) { config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; config.interleavedLayout = opt.interleavedLayout; - config.use_raft = opt.useRaft; + config.use_cuvs = opt.useCuvs; // Use garbage values to see if we overwrite them faiss::gpu::GpuIndexIVFPQ gpuIndex( @@ -567,8 +567,8 @@ void queryNaNTest(Options opt) { config.usePrecomputedTables = opt.usePrecomputed; config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; - config.use_raft = opt.useRaft; - config.interleavedLayout = opt.useRaft ? true : opt.interleavedLayout; + config.use_cuvs = opt.useCuvs; + config.interleavedLayout = opt.useCuvs ? true : opt.interleavedLayout; faiss::gpu::GpuIndexIVFPQ gpuIndex( &res, @@ -606,7 +606,7 @@ void queryNaNTest(Options opt) { TEST(TestGpuIndexIVFPQ, QueryNaN) { Options opt; - opt.useRaft = false; + opt.useCuvs = false; queryNaNTest(opt); } @@ -620,7 +620,7 @@ void addNaNTest(Options opt) { config.indicesOptions = opt.indicesOpt; config.useFloat16LookupTables = opt.useFloat16; config.interleavedLayout = opt.interleavedLayout; - config.use_raft = opt.useRaft; + config.use_cuvs = opt.useCuvs; faiss::gpu::GpuIndexIVFPQ gpuIndex( &res, @@ -664,46 +664,46 @@ void addNaNTest(Options opt) { TEST(TestGpuIndexIVFPQ, AddNaN) { Options opt; - opt.useRaft = false; + opt.useCuvs = false; addNaNTest(opt); } -#if defined USE_NVIDIA_RAFT -TEST(TestGpuIndexIVFPQ, Query_L2_Raft) { +#if defined USE_NVIDIA_CUVS +TEST(TestGpuIndexIVFPQ, Query_L2_Cuvs) { for (int tries = 0; tries < 2; ++tries) { Options opt; opt.bitsPerCode = faiss::gpu::randVal(4, 8); - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); queryTest(opt, faiss::MetricType::METRIC_L2); } } -TEST(TestGpuIndexIVFPQ, Query_IP_Raft) { +TEST(TestGpuIndexIVFPQ, Query_IP_Cuvs) { for (int tries = 0; tries < 2; ++tries) { Options opt; opt.bitsPerCode = faiss::gpu::randVal(4, 8); - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); queryTest(opt, faiss::MetricType::METRIC_INNER_PRODUCT); } } // Large batch sizes (>= 65536) should also work -TEST(TestGpuIndexIVFPQ, LargeBatch_Raft) { +TEST(TestGpuIndexIVFPQ, LargeBatch_Cuvs) { Options opt; // override for large sizes opt.dim = 4; opt.numQuery = 100000; opt.codes = 2; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.usePrecomputed = false; opt.useFloat16 = false; @@ -713,73 +713,73 @@ TEST(TestGpuIndexIVFPQ, LargeBatch_Raft) { queryTest(opt, faiss::MetricType::METRIC_L2); } -TEST(TestGpuIndexIVFPQ, CopyFrom_Raft) { +TEST(TestGpuIndexIVFPQ, CopyFrom_Cuvs) { Options opt; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.bitsPerCode = faiss::gpu::randVal(4, 8); opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); copyFromTest(opt); } -TEST(TestGpuIndexIVFPQ, Add_L2_Raft) { +TEST(TestGpuIndexIVFPQ, Add_L2_Cuvs) { for (int tries = 0; tries < 2; ++tries) { Options opt; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.bitsPerCode = faiss::gpu::randVal(4, 8); opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); addTest(opt, faiss::METRIC_L2); } } -TEST(TestGpuIndexIVFPQ, Add_IP_Raft) { +TEST(TestGpuIndexIVFPQ, Add_IP_Cuvs) { for (int tries = 0; tries < 2; ++tries) { Options opt; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.bitsPerCode = faiss::gpu::randVal(4, 8); opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); addTest(opt, faiss::METRIC_INNER_PRODUCT); } } -TEST(TestGpuIndexIVFPQ, QueryNaN_Raft) { +TEST(TestGpuIndexIVFPQ, QueryNaN_Cuvs) { Options opt; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.bitsPerCode = faiss::gpu::randVal(4, 8); opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); queryNaNTest(opt); } -TEST(TestGpuIndexIVFPQ, AddNaN_Raft) { +TEST(TestGpuIndexIVFPQ, AddNaN_Cuvs) { Options opt; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.bitsPerCode = faiss::gpu::randVal(4, 8); opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); addNaNTest(opt); } -TEST(TestGpuIndexIVFPQ, CopyTo_Raft) { +TEST(TestGpuIndexIVFPQ, CopyTo_Cuvs) { Options opt; - opt.useRaft = true; + opt.useCuvs = true; opt.interleavedLayout = true; opt.bitsPerCode = faiss::gpu::randVal(4, 8); opt.usePrecomputed = false; opt.indicesOpt = faiss::gpu::INDICES_64_BIT; - pickRaftEncoding(opt.codes, opt.dim, opt.bitsPerCode); + pickCuvsEncoding(opt.codes, opt.dim, opt.bitsPerCode); copyToTest(opt); } #endif @@ -824,7 +824,7 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) { faiss::gpu::GpuIndexIVFPQConfig config; config.device = device; config.memorySpace = faiss::gpu::MemorySpace::Unified; - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexIVFPQ gpuIndex( &res, @@ -848,12 +848,12 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) { 0.1f, 0.015f); -#if defined USE_NVIDIA_RAFT +#if defined USE_NVIDIA_CUVS config.interleavedLayout = true; - config.use_raft = true; + config.use_cuvs = true; config.indicesOptions = faiss::gpu::INDICES_64_BIT; - faiss::gpu::GpuIndexIVFPQ raftGpuIndex( + faiss::gpu::GpuIndexIVFPQ cuvsGpuIndex( &res, dim, numCentroids, @@ -861,12 +861,12 @@ TEST(TestGpuIndexIVFPQ, UnifiedMemory) { bitsPerCode, faiss::METRIC_L2, config); - raftGpuIndex.copyFrom(&cpuIndex); - raftGpuIndex.nprobe = nprobe; + cuvsGpuIndex.copyFrom(&cpuIndex); + cuvsGpuIndex.nprobe = nprobe; faiss::gpu::compareIndices( cpuIndex, - raftGpuIndex, + cuvsGpuIndex, numQuery, dim, k, diff --git a/faiss/gpu/test/TestGpuMemoryException.cpp b/faiss/gpu/test/TestGpuMemoryException.cpp index b70b9cc538..d0f23acde4 100644 --- a/faiss/gpu/test/TestGpuMemoryException.cpp +++ b/faiss/gpu/test/TestGpuMemoryException.cpp @@ -31,7 +31,7 @@ TEST(TestGpuMemoryException, AddException) { faiss::gpu::GpuIndexFlatConfig config; config.device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1); - config.use_raft = false; + config.use_cuvs = false; faiss::gpu::GpuIndexFlatL2 gpuIndexL2Broken( &res, (int)brokenAddDims, config); diff --git a/faiss/gpu/test/test_cagra.py b/faiss/gpu/test/test_cagra.py index 5324379fc5..20eb8ece16 100644 --- a/faiss/gpu/test/test_cagra.py +++ b/faiss/gpu/test/test_cagra.py @@ -11,8 +11,8 @@ @unittest.skipIf( - "RAFT" not in faiss.get_compile_options(), - "only if RAFT is compiled in") + "CUVS" not in faiss.get_compile_options(), + "only if cuVS is compiled in") class TestComputeGT(unittest.TestCase): def do_compute_GT(self, metric): @@ -36,8 +36,8 @@ def test_compute_GT_IP(self): self.do_compute_GT(faiss.METRIC_INNER_PRODUCT) @unittest.skipIf( - "RAFT" not in faiss.get_compile_options(), - "only if RAFT is compiled in") + "CUVS" not in faiss.get_compile_options(), + "only if cuVS is compiled in") class TestInterop(unittest.TestCase): def do_interop(self, metric): diff --git a/faiss/gpu/test/test_raft.py b/faiss/gpu/test/test_cuvs.py similarity index 82% rename from faiss/gpu/test/test_raft.py rename to faiss/gpu/test/test_cuvs.py index d08faaa57b..538e265209 100644 --- a/faiss/gpu/test/test_raft.py +++ b/faiss/gpu/test/test_cuvs.py @@ -11,8 +11,8 @@ @unittest.skipIf( - "RAFT" not in faiss.get_compile_options(), - "only if RAFT is compiled in") + "CUVS" not in faiss.get_compile_options(), + "only if CUVS is compiled in") class TestBfKnn(unittest.TestCase): def test_bfKnn(self): @@ -25,14 +25,14 @@ def test_bfKnn(self): # Faiss internal implementation Dnew, Inew = faiss.knn_gpu( - res, ds.get_queries(), ds.get_database(), 12, use_raft=False) - np.testing.assert_allclose(Dref, Dnew, atol=1e-5) + res, ds.get_queries(), ds.get_database(), 12, use_cuvs=False) + np.testing.assert_allclose(Dref, Dnew, atol=1e-4) np.testing.assert_array_equal(Iref, Inew) - # RAFT version + # cuVS version Dnew, Inew = faiss.knn_gpu( - res, ds.get_queries(), ds.get_database(), 12, use_raft=True) - np.testing.assert_allclose(Dref, Dnew, atol=1e-5) + res, ds.get_queries(), ds.get_database(), 12, use_cuvs=True) + np.testing.assert_allclose(Dref, Dnew, atol=1e-4) np.testing.assert_array_equal(Iref, Inew) def test_IndexFlat(self): @@ -46,7 +46,7 @@ def test_IndexFlat(self): res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() - co.use_raft = True + co.use_cuvs = True index_gpu = faiss.index_cpu_to_gpu(res, 0, index, co) Dnew, Inew = index_gpu.search(ds.get_queries(), 13) np.testing.assert_allclose(Dref, Dnew, atol=1e-5) diff --git a/faiss/gpu/test/test_gpu_index.py b/faiss/gpu/test/test_gpu_index.py index f85f6ff913..05d4ed02c6 100755 --- a/faiss/gpu/test/test_gpu_index.py +++ b/faiss/gpu/test/test_gpu_index.py @@ -25,7 +25,7 @@ def test_ivfflat_search_preassigned(self): k = 50 config = faiss.GpuIndexIVFFlatConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) idx_gpu.nprobe = nprobe @@ -59,7 +59,7 @@ def test_ivfpq_search_preassigned(self): k = 50 config = faiss.GpuIndexIVFPQConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFPQ(res, d, nlist, 4, 8, faiss.METRIC_L2, config) idx_gpu.nprobe = nprobe @@ -141,7 +141,7 @@ def test_ivfflat_cpu_coarse(self): # construct a GPU index using the same trained coarse quantizer # from the CPU index config = faiss.GpuIndexIVFFlatConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFFlat(res, q, d, nlist, faiss.METRIC_L2, config) assert(idx_gpu.is_trained) idx_gpu.add(xb) @@ -233,7 +233,7 @@ def test_ivfpq_cpu_coarse(self): # construct a GPU index using the same trained coarse quantizer # from the CPU index config = faiss.GpuIndexIVFPQConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFPQ( res, idx_coarse_cpu, d, nlist_lvl_2, 4, 8, faiss.METRIC_L2, config) assert(not idx_gpu.is_trained) @@ -414,7 +414,7 @@ def test_indices_ivfflat(self): # Store values using 32-bit indices instead config.indicesOptions = faiss.INDICES_32_BIT - config.use_raft = False + config.use_cuvs = False idx = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) idx.train(xb) idx.add_with_ids(xb, xb_indices) @@ -439,7 +439,7 @@ def test_indices_ivfpq(self): xb_indices = (xb_indices_base + 4294967296).astype('int64') config = faiss.GpuIndexIVFPQConfig() - config.use_raft = False + config.use_cuvs = False idx = faiss.GpuIndexIVFPQ(res, d, nlist, M, nbits, faiss.METRIC_L2, config) idx.train(xb) @@ -501,7 +501,7 @@ def test_sq_cpu_to_gpu(self): index = faiss.index_factory(32, "SQfp16") index.add(np.random.rand(1000, 32).astype(np.float32)) config = faiss.GpuClonerOptions() - config.use_raft = False + config.use_cuvs = False gpu_index = faiss.index_cpu_to_gpu(res, 0, index, config) self.assertIsInstance(gpu_index, faiss.GpuIndexFlat) diff --git a/faiss/gpu/test/test_gpu_index_ivfflat.py b/faiss/gpu/test/test_gpu_index_ivfflat.py index 1dbf9b694d..2d4b70f764 100644 --- a/faiss/gpu/test/test_gpu_index_ivfflat.py +++ b/faiss/gpu/test/test_gpu_index_ivfflat.py @@ -18,7 +18,7 @@ def test_reconstruct_n(self): res = faiss.StandardGpuResources() res.noTempMemory() config = faiss.GpuIndexIVFFlatConfig() - config.use_raft = False + config.use_cuvs = False index2 = faiss.GpuIndexIVFFlat(res, index, config) recons = index2.reconstruct_n(0, 10) diff --git a/faiss/gpu/test/test_gpu_index_ivfsq.py b/faiss/gpu/test/test_gpu_index_ivfsq.py index 6600d46661..99239152d3 100755 --- a/faiss/gpu/test/test_gpu_index_ivfsq.py +++ b/faiss/gpu/test/test_gpu_index_ivfsq.py @@ -28,7 +28,7 @@ def make_indices_copy_from_cpu(nlist, d, qtype, by_residual, metric, clamp): res = faiss.StandardGpuResources() res.noTempMemory() config = faiss.GpuIndexIVFScalarQuantizerConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, idx_cpu, config) return idx_cpu, idx_gpu @@ -40,7 +40,7 @@ def make_indices_copy_from_gpu(nlist, d, qtype, by_residual, metric, clamp): res = faiss.StandardGpuResources() res.noTempMemory() config = faiss.GpuIndexIVFScalarQuantizerConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, qtype, metric, by_residual, config) idx_gpu.train(to_train) @@ -68,7 +68,7 @@ def make_indices_train(nlist, d, qtype, by_residual, metric, clamp): res = faiss.StandardGpuResources() res.noTempMemory() config = faiss.GpuIndexIVFScalarQuantizerConfig() - config.use_raft = False + config.use_cuvs = False idx_gpu = faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, qtype, metric, by_residual, config) assert(by_residual == idx_gpu.by_residual) diff --git a/faiss/gpu/test/test_gpu_index_serialize.py b/faiss/gpu/test/test_gpu_index_serialize.py index fcd62f573c..3dafcfdce3 100644 --- a/faiss/gpu/test/test_gpu_index_serialize.py +++ b/faiss/gpu/test/test_gpu_index_serialize.py @@ -35,7 +35,7 @@ def test_serialize(self): # IVFSQ config = faiss.GpuIndexIVFScalarQuantizerConfig() - config.use_raft = False + config.use_cuvs = False indexes.append(faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, faiss.ScalarQuantizer.QT_fp16, faiss.METRIC_L2, True, config)) # IVFPQ @@ -52,7 +52,7 @@ def test_serialize(self): gpu_cloner_options = faiss.GpuClonerOptions() if isinstance(index, faiss.GpuIndexIVFScalarQuantizer): - gpu_cloner_options.use_raft = False + gpu_cloner_options.use_cuvs = False gpu_index_restore = faiss.index_cpu_to_gpu(res, 0, cpu_index, gpu_cloner_options) restore_d, restore_i = gpu_index_restore.search(query, k) diff --git a/faiss/gpu/test/test_index_cpu_to_gpu.py b/faiss/gpu/test/test_index_cpu_to_gpu.py index 82a9fdbf05..7b043b31cf 100644 --- a/faiss/gpu/test/test_index_cpu_to_gpu.py +++ b/faiss/gpu/test/test_index_cpu_to_gpu.py @@ -28,13 +28,13 @@ def create_index(self, factory_string): def create_and_clone(self, factory_string, allowCpuCoarseQuantizer=None, - use_raft=None): + use_cuvs=None): idx = self.create_index(factory_string) config = faiss.GpuClonerOptions() if allowCpuCoarseQuantizer is not None: config.allowCpuCoarseQuantizer = allowCpuCoarseQuantizer - if use_raft is not None: - config.use_raft = use_raft + if use_cuvs is not None: + config.use_cuvs = use_cuvs faiss.index_cpu_to_gpu(self.res, 0, idx, config) def verify_throws_not_implemented_exception(self, factory_string): @@ -47,12 +47,12 @@ def verify_throws_not_implemented_exception(self, factory_string): def verify_clones_successfully(self, factory_string, allowCpuCoarseQuantizer=None, - use_raft=None): + use_cuvs=None): try: self.create_and_clone( factory_string, allowCpuCoarseQuantizer=allowCpuCoarseQuantizer, - use_raft=use_raft) + use_cuvs=use_cuvs) except Exception as e: self.fail("Unexpected exception thrown factory_string: " "%s; error message: %s." % (factory_string, str(e))) @@ -74,10 +74,10 @@ def test_implemented_indices(self): self.verify_clones_successfully("PCA32,IVF32,PQ8") self.verify_clones_successfully("PCA32,IVF32,PQ8np") - # set use_raft to false, these index types are not supported on RAFT - self.verify_clones_successfully("IVF32,SQ8", use_raft=False) + # set use_cuvs to false, these index types are not supported on cuVS + self.verify_clones_successfully("IVF32,SQ8", use_cuvs=False) self.verify_clones_successfully( - "PCA32,IVF32,SQ8", use_raft=False) + "PCA32,IVF32,SQ8", use_cuvs=False) def test_with_flag(self): self.verify_clones_successfully("IVF32_HNSW,Flat", diff --git a/faiss/gpu/test/test_multi_gpu.py b/faiss/gpu/test/test_multi_gpu.py index aca0e988d6..6ec2079ff0 100644 --- a/faiss/gpu/test/test_multi_gpu.py +++ b/faiss/gpu/test/test_multi_gpu.py @@ -29,7 +29,7 @@ def test_sharded(self): co = faiss.GpuMultipleClonerOptions() co.shard = True - co.use_raft = False + co.use_cuvs = False index = faiss.index_cpu_to_all_gpus(index_cpu, co, ngpu=2) index.add(xb) @@ -72,7 +72,7 @@ def do_test_sharded_ivf(self, index_key): co = faiss.GpuMultipleClonerOptions() co.shard = True co.common_ivf_quantizer = True - co.use_raft = False + co.use_cuvs = False index = faiss.index_cpu_to_all_gpus(index, co, ngpu=2) index.quantizer # make sure there is indeed a quantizer @@ -113,7 +113,7 @@ def test_binary_clone(self, ngpu=1, shard=False): co = faiss.GpuMultipleClonerOptions() co.shard = shard - co.use_raft = False + co.use_cuvs = False # index2 = faiss.index_cpu_to_all_gpus(index, ngpu=ngpu) res = faiss.StandardGpuResources() @@ -192,7 +192,7 @@ def do_cpu_to_gpu(self, index_key): res = faiss.StandardGpuResources() co = faiss.GpuClonerOptions() - co.use_raft = False + co.use_cuvs = False gpu_index = faiss.index_cpu_to_gpu(res, 0, index, co) ts.append(time.time()) @@ -222,7 +222,7 @@ def do_cpu_to_gpu(self, index_key): res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard - co.use_raft = False + co.use_cuvs = False gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) diff --git a/faiss/gpu/test/torch_test_contrib_gpu.py b/faiss/gpu/test/torch_test_contrib_gpu.py index 8bb9d58120..e24e511401 100644 --- a/faiss/gpu/test/torch_test_contrib_gpu.py +++ b/faiss/gpu/test/torch_test_contrib_gpu.py @@ -80,7 +80,11 @@ def test_train_add_with_ids(self): res = faiss.StandardGpuResources() res.noTempMemory() - index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2) + config = faiss.GpuIndexIVFFlatConfig() + # FIXME: triage failure when use_cuvs is set to True (issue #3968) + config.use_cuvs = False + + index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) xb = torch.rand(1000, d, device=torch.device('cuda', 0), dtype=torch.float32) index.train(xb) @@ -167,7 +171,7 @@ def test_ivfflat_reconstruct(self): res = faiss.StandardGpuResources() res.noTempMemory() config = faiss.GpuIndexIVFFlatConfig() - config.use_raft = False + config.use_cuvs = False index = faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2, config) @@ -253,7 +257,7 @@ def test_sa_encode_decode(self): return class TestTorchUtilsKnnGpu(unittest.TestCase): - def test_knn_gpu(self, use_raft=False): + def test_knn_gpu(self, use_cuvs=False): torch.manual_seed(10) d = 32 nb = 1024 @@ -290,7 +294,7 @@ def test_knn_gpu(self, use_raft=False): else: xb_c = xb_np - D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_raft=use_raft) + D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_cuvs=use_cuvs) self.assertTrue(torch.equal(torch.from_numpy(I), gt_I)) self.assertLess((torch.from_numpy(D) - gt_D).abs().max(), 1e-4) @@ -316,7 +320,7 @@ def test_knn_gpu(self, use_raft=False): xb_c = to_column_major_torch(xb) assert not xb_c.is_contiguous() - D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_raft=use_raft) + D, I = faiss.knn_gpu(res, xq_c, xb_c, k, use_cuvs=use_cuvs) self.assertTrue(torch.equal(I.cpu(), gt_I)) self.assertLess((D.cpu() - gt_D).abs().max(), 1e-4) @@ -324,7 +328,7 @@ def test_knn_gpu(self, use_raft=False): # test on subset try: # This internally uses the current pytorch stream - D, I = faiss.knn_gpu(res, xq_c[6:8], xb_c, k, use_raft=use_raft) + D, I = faiss.knn_gpu(res, xq_c[6:8], xb_c, k, use_cuvs=use_cuvs) except TypeError: if not xq_row_major: # then it is expected @@ -336,12 +340,12 @@ def test_knn_gpu(self, use_raft=False): self.assertLess((D.cpu() - gt_D[6:8]).abs().max(), 1e-4) @unittest.skipUnless( - "RAFT" in faiss.get_compile_options(), - "only if RAFT is compiled in") - def test_knn_gpu_raft(self): - self.test_knn_gpu(use_raft=True) + "CUVS" in faiss.get_compile_options(), + "only if CUVS is compiled in") + def test_knn_gpu_cuvs(self): + self.test_knn_gpu(use_cuvs=True) - def test_knn_gpu_datatypes(self, use_raft=False): + def test_knn_gpu_datatypes(self, use_cuvs=False): torch.manual_seed(10) d = 10 nb = 1024 @@ -364,7 +368,7 @@ def test_knn_gpu_datatypes(self, use_raft=False): D = torch.zeros(nq, k, device=xb_c.device, dtype=torch.float32) I = torch.zeros(nq, k, device=xb_c.device, dtype=torch.int32) - faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_raft=use_raft) + faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_cuvs=use_cuvs) self.assertTrue(torch.equal(I.long().cpu(), gt_I)) self.assertLess((D.float().cpu() - gt_D).abs().max(), 1.5e-3) @@ -376,7 +380,7 @@ def test_knn_gpu_datatypes(self, use_raft=False): xb_c = xb.half().numpy() xq_c = xq.half().numpy() - faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_raft=use_raft) + faiss.knn_gpu(res, xq_c, xb_c, k, D, I, use_cuvs=use_cuvs) self.assertTrue(torch.equal(torch.from_numpy(I).long(), gt_I)) self.assertLess((torch.from_numpy(D) - gt_D).abs().max(), 1.5e-3) diff --git a/faiss/gpu/utils/RaftUtils.cu b/faiss/gpu/utils/CuvsUtils.cu similarity index 97% rename from faiss/gpu/utils/RaftUtils.cu rename to faiss/gpu/utils/CuvsUtils.cu index a759336eb9..1ec32179c6 100644 --- a/faiss/gpu/utils/RaftUtils.cu +++ b/faiss/gpu/utils/CuvsUtils.cu @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ */ #include -#include +#include #include #include #include diff --git a/faiss/gpu/utils/RaftUtils.h b/faiss/gpu/utils/CuvsUtils.h similarity index 76% rename from faiss/gpu/utils/RaftUtils.h rename to faiss/gpu/utils/CuvsUtils.h index 59ee1442f0..e44e5f12d5 100644 --- a/faiss/gpu/utils/RaftUtils.h +++ b/faiss/gpu/utils/CuvsUtils.h @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,32 +27,32 @@ #include #include -#include +#include #pragma GCC visibility push(default) namespace faiss { namespace gpu { -inline raft::distance::DistanceType metricFaissToRaft( +inline cuvsDistanceType metricFaissToCuvs( MetricType metric, bool exactDistance) { switch (metric) { case MetricType::METRIC_INNER_PRODUCT: - return raft::distance::DistanceType::InnerProduct; + return cuvsDistanceType::InnerProduct; case MetricType::METRIC_L2: - return raft::distance::DistanceType::L2Expanded; + return cuvsDistanceType::L2Expanded; case MetricType::METRIC_L1: - return raft::distance::DistanceType::L1; + return cuvsDistanceType::L1; case MetricType::METRIC_Linf: - return raft::distance::DistanceType::Linf; + return cuvsDistanceType::Linf; case MetricType::METRIC_Lp: - return raft::distance::DistanceType::LpUnexpanded; + return cuvsDistanceType::LpUnexpanded; case MetricType::METRIC_Canberra: - return raft::distance::DistanceType::Canberra; + return cuvsDistanceType::Canberra; case MetricType::METRIC_BrayCurtis: - return raft::distance::DistanceType::BrayCurtis; + return cuvsDistanceType::BrayCurtis; case MetricType::METRIC_JensenShannon: - return raft::distance::DistanceType::JensenShannon; + return cuvsDistanceType::JensenShannon; default: RAFT_FAIL("Distance type not supported"); } diff --git a/faiss/python/CMakeLists.txt b/faiss/python/CMakeLists.txt index 4813176bb7..41e08eb348 100644 --- a/faiss/python/CMakeLists.txt +++ b/faiss/python/CMakeLists.txt @@ -42,9 +42,9 @@ macro(configure_swigfaiss source) COMPILE_DEFINITIONS FAISS_ENABLE_ROCM ) endif() - if (FAISS_ENABLE_RAFT) + if (FAISS_ENABLE_CUVS) set_property(SOURCE ${source} APPEND PROPERTY - COMPILE_DEFINITIONS FAISS_ENABLE_RAFT + COMPILE_DEFINITIONS FAISS_ENABLE_CUVS ) endif() endif() @@ -191,21 +191,13 @@ if(FAISS_ENABLE_GPU) target_link_libraries(faiss_example_external_module PRIVATE hip::host) else() find_package(CUDAToolkit REQUIRED) - if(FAISS_ENABLE_RAFT) - find_package(raft COMPONENTS compiled distributed) + if(FAISS_ENABLE_CUVS) + find_package(cuvs) endif() - target_link_libraries(swigfaiss PRIVATE CUDA::cudart - $<$:raft::raft> - $<$:nvidia::cutlass::cutlass>) - target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart - $<$:raft::raft> - $<$:nvidia::cutlass::cutlass>) - target_link_libraries(swigfaiss_avx512 PRIVATE CUDA::cudart - $<$:raft::raft> - $<$:nvidia::cutlass::cutlass>) - target_link_libraries(swigfaiss_sve PRIVATE CUDA::cudart - $<$:raft::raft> - $<$:nvidia::cutlass::cutlass>) + target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$:cuvs::cuvs>) + target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$:cuvs::cuvs>) + target_link_libraries(swigfaiss_avx512 PRIVATE CUDA::cudart $<$:cuvs::cuvs>) + target_link_libraries(swigfaiss_sve PRIVATE CUDA::cudart $<$:cuvs::cuvs>) endif() endif() diff --git a/faiss/python/gpu_wrappers.py b/faiss/python/gpu_wrappers.py index 6e788511d2..4945722f6c 100644 --- a/faiss/python/gpu_wrappers.py +++ b/faiss/python/gpu_wrappers.py @@ -56,7 +56,7 @@ def index_cpu_to_gpus_list(index, co=None, gpus=None, ngpu=-1): # allows numpy ndarray usage with bfKnn -def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2, device=-1, use_raft=False, vectorsMemoryLimit=0, queriesMemoryLimit=0): +def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2, device=-1, use_cuvs=False, vectorsMemoryLimit=0, queriesMemoryLimit=0): """ Compute the k nearest neighbors of a vector on one GPU without constructing an index @@ -178,7 +178,7 @@ def knn_gpu(res, xq, xb, k, D=None, I=None, metric=METRIC_L2, device=-1, use_raf args.outIndices = I_ptr args.outIndicesType = I_type args.device = device - args.use_raft = use_raft + args.use_cuvs = use_cuvs # no stream synchronization needed, inputs and outputs are guaranteed to # be on the CPU (numpy arrays) diff --git a/faiss/python/swigfaiss.swig b/faiss/python/swigfaiss.swig index b13e23963d..493e42ef0e 100644 --- a/faiss/python/swigfaiss.swig +++ b/faiss/python/swigfaiss.swig @@ -680,7 +680,7 @@ struct faiss::simd16uint16 {}; %include %include %include -#ifdef FAISS_ENABLE_RAFT +#ifdef FAISS_ENABLE_CUVS %include #endif %include @@ -797,7 +797,7 @@ struct faiss::simd16uint16 {}; DOWNCAST ( IndexRowwiseMinMax ) DOWNCAST ( IndexRowwiseMinMaxFP16 ) #ifdef GPU_WRAPPER -#ifdef FAISS_ENABLE_RAFT +#ifdef FAISS_ENABLE_CUVS DOWNCAST_GPU ( GpuIndexCagra ) #endif DOWNCAST_GPU ( GpuIndexIVFPQ ) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6202a2cbf9..63c1729d16 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -81,7 +81,7 @@ find_package(GTest CONFIG REQUIRED) target_link_libraries(faiss_test PRIVATE OpenMP::OpenMP_CXX GTest::gtest_main - $<$:raft::raft> + $<$:cuvs::cuvs> $<$:hip::host> )