Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interop between CAGRA and HNSW #3252

Closed
wants to merge 62 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
753a109
start integration of cagra
divyegala Oct 5, 2023
6ce2467
merge upstream
divyegala Jan 23, 2024
f21c1f1
add public API layer
divyegala Jan 30, 2024
11c0c54
merge upstream
divyegala Jan 30, 2024
656f493
write tests, figure out a way to compare
divyegala Feb 1, 2024
de67ca6
Merge remote-tracking branch 'upstream/main' into raft-cagra
divyegala Feb 1, 2024
ed32954
passing tests
divyegala Feb 7, 2024
42ca862
remove cpp test file
divyegala Feb 7, 2024
2fdfc6f
Merge remote-tracking branch 'upstream/main' into raft-cagra
divyegala Feb 7, 2024
2c9e965
style check
divyegala Feb 7, 2024
2e434fe
add required methods
divyegala Feb 7, 2024
382c178
conditionally compile cagra
divyegala Feb 8, 2024
8675974
copyTo and copyFrom
divyegala Feb 14, 2024
c7fcf4a
style check
divyegala Feb 14, 2024
eae832d
Merge branch 'main' into raft-cagra-hnsw
divyegala Feb 14, 2024
4b76e5f
Merge branch 'main' into raft-cagra-hnsw
divyegala Feb 16, 2024
065f912
add read/write
divyegala Feb 20, 2024
301f429
Merge remote-tracking branch 'origin/raft-cagra-hnsw' into raft-cagra…
divyegala Feb 20, 2024
2b0ea76
add destructor
divyegala Feb 20, 2024
8c83bd2
destructor body, copyto reset
divyegala Feb 21, 2024
39fb35a
remove destructor
divyegala Feb 21, 2024
49e2610
move cmake sources around
divyegala Feb 21, 2024
11bf6b2
merge upstream
divyegala Feb 21, 2024
d4434bb
more protections for copying
divyegala Feb 21, 2024
ac65c2d
support default constructed IndexHnswCagra in copyTo
divyegala Feb 22, 2024
619c376
fix failing binary hnsw tests
divyegala Feb 22, 2024
e25f8a4
link faiss_gpu target to OpenMP
divyegala Feb 23, 2024
e835150
raft still can't find openmp
divyegala Feb 23, 2024
aeabe12
openmp flags and uint32 IndexType
divyegala Feb 26, 2024
4e80586
forgot conditional check in index_read
divyegala Feb 26, 2024
c4bcaba
minor changes
divyegala Mar 7, 2024
341a3fc
api change
divyegala Mar 7, 2024
0ae7702
Merge branch 'raft-api-changes' into raft-cagra-hnsw
divyegala Mar 7, 2024
172aa65
working python
divyegala Mar 20, 2024
0cd684e
compile option to swig
divyegala Mar 21, 2024
7ff8b3b
expose ivf pq params
divyegala Apr 3, 2024
66d236f
update comments style
divyegala Apr 22, 2024
f697eac
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala Apr 22, 2024
1d6e6b1
use raft::runtime where possible
divyegala Apr 22, 2024
4a01ad4
format
divyegala Apr 22, 2024
949e634
format properly
divyegala Apr 22, 2024
bccd54a
InnerProduct
divyegala Apr 30, 2024
320654c
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala Apr 30, 2024
2aaa6e9
passing ip tests
divyegala May 7, 2024
70b0ab8
address review
divyegala May 9, 2024
e5756cc
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala May 9, 2024
8f72ce4
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala May 14, 2024
4148fea
base level only search
divyegala May 21, 2024
c13bcff
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala May 21, 2024
24a555d
fix virtual functions and serialization
divyegala May 23, 2024
51227b1
invert conditional
divyegala May 23, 2024
579a301
debug msg
divyegala May 23, 2024
ae0b8ba
more debug prints
divyegala May 23, 2024
4170a3e
fix efSearch setting in base search
divyegala May 23, 2024
75808b1
re-negate ip distances in search_level
divyegala May 23, 2024
09fb95b
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala May 23, 2024
9bd1039
fix format
divyegala May 23, 2024
ea8028d
re-up minimum recall for base only IP distance
divyegala May 23, 2024
fc31351
add python tests
divyegala May 30, 2024
3e2d343
Merge remote-tracking branch 'upstream/main' into raft-cagra-hnsw
divyegala May 30, 2024
03ee1fb
ifdef guards in gpu cloner
divyegala May 30, 2024
2e9cbc8
option to exclude dataset store on index
divyegala Jun 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ project(faiss
LANGUAGES ${FAISS_LANGUAGES})
include(GNUInstallDirs)

set(CMAKE_INSTALL_PREFIX "$ENV{CONDA_PREFIX}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure we want to keep this as Faiss is not necessarily compiled in a conda env.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry. This was just for development purposes - I will remove it.


set(CMAKE_CXX_STANDARD 17)

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
Expand Down
124 changes: 119 additions & 5 deletions faiss/IndexHNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@
#include <cstdlib>
#include <cstring>

#include <limits>
#include <memory>
#include <queue>
#include <random>
#include <unordered_set>

#include <sys/stat.h>
#include <sys/types.h>
#include <cstdint>
#include "impl/HNSW.h"

#include <faiss/Index2Layer.h>
#include <faiss/IndexFlat.h>
Expand Down Expand Up @@ -146,7 +150,9 @@ void hnsw_add_vertices(

int i1 = n;

for (int pt_level = hist.size() - 1; pt_level >= 0; pt_level--) {
for (int pt_level = hist.size() - 1;
pt_level >= !index_hnsw.init_level0;
pt_level--) {
int i0 = i1 - hist[pt_level];

if (verbose) {
Expand Down Expand Up @@ -182,7 +188,13 @@ void hnsw_add_vertices(
continue;
}

hnsw.add_with_locks(*dis, pt_level, pt_id, locks, vt);
hnsw.add_with_locks(
*dis,
pt_level,
pt_id,
locks,
vt,
index_hnsw.keep_max_size_level0 && (pt_level == 0));

if (prev_display >= 0 && i - i0 > prev_display + 10000) {
prev_display = i - i0;
Expand All @@ -202,7 +214,11 @@ void hnsw_add_vertices(
}
i1 = i0;
}
FAISS_ASSERT(i1 == 0);
if (index_hnsw.init_level0) {
FAISS_ASSERT(i1 == 0);
} else {
FAISS_ASSERT((i1 - hist[0]) == 0);
}
}
if (verbose) {
printf("Done in %.3f ms\n", getmillisecs() - t0);
Expand Down Expand Up @@ -405,10 +421,18 @@ void IndexHNSW::search_level_0(
float* distances,
idx_t* labels,
int nprobe,
int search_type) const {
int search_type,
const SearchParameters* params_in) const {
FAISS_THROW_IF_NOT(k > 0);
FAISS_THROW_IF_NOT(nprobe > 0);

const SearchParametersHNSW* params = nullptr;

if (params_in) {
params = dynamic_cast<const SearchParametersHNSW*>(params_in);
FAISS_THROW_IF_NOT_MSG(params, "params type invalid");
}

storage_idx_t ntotal = hnsw.levels.size();

using RH = HeapBlockResultHandler<HNSW::C>;
Expand All @@ -435,13 +459,21 @@ void IndexHNSW::search_level_0(
nearest_d + i * nprobe,
search_type,
search_stats,
vt);
vt,
params);
res.end();
vt.advance();
}
#pragma omp critical
{ hnsw_stats.combine(search_stats); }
}
if (is_similarity_metric(this->metric_type)) {
// we need to revert the negated distances
#pragma omp parallel for
for (size_t i = 0; i < k * n; i++) {
distances[i] = -distances[i];
}
}
}

void IndexHNSW::init_level_0_from_knngraph(
Expand Down Expand Up @@ -864,4 +896,86 @@ void IndexHNSW2Level::flip_to_ivf() {
delete storage2l;
}

/**************************************************************
* IndexHNSWCagra implementation
**************************************************************/

IndexHNSWCagra::IndexHNSWCagra() {
is_trained = true;
}

IndexHNSWCagra::IndexHNSWCagra(int d, int M, MetricType metric)
: IndexHNSW(
(metric == METRIC_L2)
? static_cast<IndexFlat*>(new IndexFlatL2(d))
: static_cast<IndexFlat*>(new IndexFlatIP(d)),
M) {
FAISS_THROW_IF_NOT_MSG(
((metric == METRIC_L2) || (metric == METRIC_INNER_PRODUCT)),
"unsupported metric type for IndexHNSWCagra");
own_fields = true;
is_trained = true;
init_level0 = true;
keep_max_size_level0 = true;
}

void IndexHNSWCagra::add(idx_t n, const float* x) {
FAISS_THROW_IF_NOT_MSG(
!base_level_only,
"Cannot add vectors when base_level_only is set to True");

IndexHNSW::add(n, x);
}

void IndexHNSWCagra::search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
const SearchParameters* params) const {
if (!base_level_only) {
IndexHNSW::search(n, x, k, distances, labels, params);
} else {
std::vector<storage_idx_t> nearest(n);
std::vector<float> nearest_d(n);

#pragma omp for
for (idx_t i = 0; i < n; i++) {
std::unique_ptr<DistanceComputer> dis(
storage_distance_computer(this->storage));
dis->set_query(x + i * d);
nearest[i] = -1;
nearest_d[i] = std::numeric_limits<float>::max();

std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<idx_t> distrib(0, this->ntotal);

for (idx_t j = 0; j < num_base_level_search_entrypoints; j++) {
auto idx = distrib(gen);
auto distance = (*dis)(idx);
if (distance < nearest_d[i]) {
nearest[i] = idx;
nearest_d[i] = distance;
}
}
FAISS_THROW_IF_NOT_MSG(
nearest[i] >= 0, "Could not find a valid entrypoint.");
}

search_level_0(
n,
x,
k,
nearest.data(),
nearest_d.data(),
distances,
labels,
1, // n_probes
1, // search_type
params);
}
}

} // namespace faiss
44 changes: 43 additions & 1 deletion faiss/IndexHNSW.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ struct IndexHNSW : Index {
bool own_fields = false;
Index* storage = nullptr;

// When set to false, level 0 in the knn graph is not initialized.
// This option is used by GpuIndexCagra::copyTo(IndexHNSWCagra*)
// as level 0 knn graph is copied over from the index built by
// GpuIndexCagra.
bool init_level0 = true;
divyegala marked this conversation as resolved.
Show resolved Hide resolved

// When set to true, all neighbors in level 0 are filled up
// to the maximum size allowed (2 * M). This option is used by
// IndexHHNSWCagra to create a full base layer graph that is
// used when GpuIndexCagra::copyFrom(IndexHNSWCagra*) is invoked.
bool keep_max_size_level0 = false;

explicit IndexHNSW(int d = 0, int M = 32, MetricType metric = METRIC_L2);
explicit IndexHNSW(Index* storage, int M = 32);

Expand Down Expand Up @@ -81,7 +93,8 @@ struct IndexHNSW : Index {
float* distances,
idx_t* labels,
int nprobe = 1,
int search_type = 1) const;
int search_type = 1,
const SearchParameters* params = nullptr) const;

/// alternative graph building
void init_level_0_from_knngraph(int k, const float* D, const idx_t* I);
Expand Down Expand Up @@ -148,4 +161,33 @@ struct IndexHNSW2Level : IndexHNSW {
const SearchParameters* params = nullptr) const override;
};

struct IndexHNSWCagra : IndexHNSW {
IndexHNSWCagra();
IndexHNSWCagra(int d, int M, MetricType metric = METRIC_L2);

/// When set to true, the index is immutable.
/// This option is used to copy the knn graph from GpuIndexCagra
/// to the base level of IndexHNSWCagra without adding upper levels.
/// Doing so enables to search the HNSW index, but removes the
/// ability to add vectors.
bool base_level_only = false;

/// When `base_level_only` is set to `True`, the search function
/// searches only the base level knn graph of the HNSW index.
/// This parameter selects the entry point by randomly selecting
/// some points and using the best one.
int num_base_level_search_entrypoints = 32;

void add(idx_t n, const float* x) override;

/// entry point for search
void search(
idx_t n,
const float* x,
idx_t k,
float* distances,
idx_t* labels,
const SearchParameters* params = nullptr) const override;
};

} // namespace faiss
8 changes: 6 additions & 2 deletions faiss/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,15 @@ generate_ivf_interleaved_code()

if(FAISS_ENABLE_RAFT)
list(APPEND FAISS_GPU_HEADERS
GpuIndexCagra.h
impl/RaftCagra.cuh
impl/RaftFlatIndex.cuh
impl/RaftIVFFlat.cuh
impl/RaftIVFPQ.cuh
utils/RaftUtils.h)
list(APPEND FAISS_GPU_SRC
GpuIndexCagra.cu
impl/RaftCagra.cu
impl/RaftFlatIndex.cu
impl/RaftIVFFlat.cu
impl/RaftIVFPQ.cu
Expand Down Expand Up @@ -316,5 +320,5 @@ __nv_relfatbin : { *(__nv_relfatbin) }
target_link_options(faiss_gpu PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")

find_package(CUDAToolkit REQUIRED)
target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr>)
target_link_libraries(faiss_gpu PRIVATE CUDA::cudart CUDA::cublas $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::compiled> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass> $<$<BOOL:${FAISS_ENABLE_RAFT}>:OpenMP::OpenMP_CXX>)
target_compile_options(faiss_gpu PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xfatbin=-compress-all --expt-extended-lambda --expt-relaxed-constexpr $<$<BOOL:${FAISS_ENABLE_RAFT}>:-Xcompiler=${OpenMP_CXX_FLAGS}>>)
29 changes: 27 additions & 2 deletions faiss/gpu/GpuCloner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#include <faiss/IndexBinaryFlat.h>
#include <faiss/IndexFlat.h>
#if defined USE_NVIDIA_RAFT
#include <faiss/IndexHNSW.h>
#endif
#include <faiss/IndexIVF.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
Expand All @@ -24,6 +27,9 @@
#include <faiss/MetaIndexes.h>
#include <faiss/gpu/GpuIndex.h>
#include <faiss/gpu/GpuIndexBinaryFlat.h>
#if defined USE_NVIDIA_RAFT
#include <faiss/gpu/GpuIndexCagra.h>
#endif
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/GpuIndexIVFPQ.h>
Expand Down Expand Up @@ -85,7 +91,15 @@ Index* ToCPUCloner::clone_Index(const Index* index) {
// objective is to make a single component out of them
// (inverse op of ToGpuClonerMultiple)

} else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
}
#if defined USE_NVIDIA_RAFT
else if (auto icg = dynamic_cast<const GpuIndexCagra*>(index)) {
IndexHNSWCagra* res = new IndexHNSWCagra();
icg->copyTo(res);
return res;
}
#endif
else if (auto ish = dynamic_cast<const IndexShards*>(index)) {
int nshard = ish->count();
FAISS_ASSERT(nshard > 0);
Index* res = clone_Index(ish->at(0));
Expand Down Expand Up @@ -215,7 +229,18 @@ Index* ToGpuCloner::clone_Index(const Index* index) {
}

return res;
} else {
}
#if defined USE_NVIDIA_RAFT
else if (auto icg = dynamic_cast<const faiss::IndexHNSWCagra*>(index)) {
GpuIndexCagraConfig config;
config.device = device;
GpuIndexCagra* res =
new GpuIndexCagra(provider, icg->d, icg->metric_type, config);
res->copyFrom(icg);
return res;
}
#endif
else {
// use CPU cloner for IDMap and PreTransform
auto index_idmap = dynamic_cast<const IndexIDMap*>(index);
auto index_pt = dynamic_cast<const IndexPreTransform*>(index);
Expand Down
Loading
Loading