Skip to content

Commit

Permalink
Merge branch 'main' into export-D66074156
Browse files Browse the repository at this point in the history
  • Loading branch information
asadoughi authored Nov 25, 2024
2 parents ff15eef + 37f52dc commit bccfdb4
Show file tree
Hide file tree
Showing 14 changed files with 63 additions and 79 deletions.
8 changes: 4 additions & 4 deletions .github/actions/build_conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ runs:
- name: Install conda build tools
shell: ${{ steps.choose_shell.outputs.shell }}
run: |
conda update -y -q conda
conda install -y -q conda-build
conda install -y -q "conda!=24.11.0"
conda install -y -q "conda-build!=24.11.0"
- name: Enable anaconda uploads
if: inputs.label != ''
shell: ${{ steps.choose_shell.outputs.shell }}
Expand Down Expand Up @@ -80,7 +80,7 @@ runs:
working-directory: conda
run: |
conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
-c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge
-c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia
- name: Conda build (GPU w/ cuVS) w/ anaconda upload
if: inputs.label != '' && inputs.cuda != '' && inputs.cuvs != ''
shell: ${{ steps.choose_shell.outputs.shell }}
Expand All @@ -89,4 +89,4 @@ runs:
PACKAGE_TYPE: ${{ inputs.label }}
run: |
conda build faiss-gpu-cuvs --variants '{ "cudatoolkit": "${{ inputs.cuda }}" }' \
--user pytorch --label ${{ inputs.label }} -c pytorch -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge
--user pytorch --label ${{ inputs.label }} -c pytorch -c rapidsai -c rapidsai-nightly -c conda-forge -c nvidia/label/cuda-${{ inputs.cuda }} -c nvidia
6 changes: 3 additions & 3 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -238,8 +238,8 @@ jobs:
with:
label: main
cuda: "12.1.1"
linux-x86_64-GPU-CUVS-packages-CUDA12-1-1:
name: Linux x86_64 GPU w/ cuVS packages (CUDA 12.1.1)
linux-x86_64-GPU-CUVS-packages-CUDA12-4-0:
name: Linux x86_64 GPU w/ cuVS packages (CUDA 12.4.0)
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
runs-on: 4-core-ubuntu-gpu-t4
env:
Expand All @@ -257,7 +257,7 @@ jobs:
with:
label: main
cuvs: "ON"
cuda: "12.1.1"
cuda: "12.4.0"
windows-x86_64-packages:
name: Windows x86_64 packages
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ jobs:
with:
label: nightly
cuda: "12.1.1"
linux-x86_64-GPU-CUVS-CUDA12-1-1-nightly:
name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 12.1.1)
linux-x86_64-GPU-CUVS-CUDA12-4-0-nightly:
name: Linux x86_64 GPU w/ cuVS nightlies (CUDA 12.4.0)
runs-on: 4-core-ubuntu-gpu-t4
env:
CUDA_ARCHS: "70-real;72-real;75-real;80;86-real"
Expand All @@ -90,7 +90,7 @@ jobs:
with:
label: nightly
cuvs: "ON"
cuda: "12.1.1"
cuda: "12.4.0"
windows-x86_64-nightly:
name: Windows x86_64 nightlies
runs-on: windows-2019
Expand Down
4 changes: 2 additions & 2 deletions conda/faiss-gpu-cuvs/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
{% if cudatoolkit == '11.8.0' %}
{% set cuda_constraints=">=11.8,<12" %}
{% set libcublas_constraints=">=11.11,<12" %}
{% elif cudatoolkit == '12.1.1' %}
{% elif cudatoolkit == '12.4.0' %}
{% set cuda_constraints=">=12.1,<13" %}
{% set libcublas_constraints=">=12.1,<13" %}
{% endif %}
Expand Down Expand Up @@ -48,7 +48,7 @@ outputs:
- {{ compiler('cxx') }}
- sysroot_linux-64 # [linux64]
- llvm-openmp # [osx]
- cmake >=3.24.0
- cmake >=3.26.4
- make # [not win]
- _openmp_mutex =4.5=2_kmp_llvm # [x86_64]
- mkl =2023 # [x86_64]
Expand Down
5 changes: 0 additions & 5 deletions faiss/gpu/GpuDistance.cu
Original file line number Diff line number Diff line change
Expand Up @@ -402,16 +402,11 @@ void bfKnn(GpuResourcesProvider* prov, const GpuDistanceParams& args) {
} else if (args.vectorType == DistanceDataType::F16) {
bfKnnConvert<half>(prov, args);
} else if (args.vectorType == DistanceDataType::BF16) {
// no bf16 support for AMD
#ifndef USE_AMD_ROCM
if (prov->getResources()->supportsBFloat16CurrentDevice()) {
bfKnnConvert<__nv_bfloat16>(prov, args);
} else {
FAISS_THROW_MSG("not compiled with bfloat16 support");
}
#else
FAISS_THROW_MSG("no AMD bfloat16 support");
#endif
} else {
FAISS_THROW_MSG("unknown vectorType");
}
Expand Down
43 changes: 39 additions & 4 deletions faiss/gpu/hipify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,46 @@
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Usage: ./gpu/hipify.sh
#

function hipify_dir()
{
# print dir name
cd "$1" || exit
echo "Hipifying $(pwd)"

if [ -d ./gpu-tmp ]; then
#Clearing out any leftover files and directories
echo "Removing old ./gpu-tmp"
rm -rf ./gpu-tmp
fi

if [ -d ./gpu ]; then
#Making a temp directory to implement pre hipify rules
echo "Creating ./gpu-tmp"
cp -r ./gpu ./gpu-tmp

# adjust __nv_bfloat162 before hipify because of inaccurate conversions
# adjust __nv_bfloat16 before hipify because of inaccurate conversions
for ext in hip cuh h cpp c cu cuh
do
while IFS= read -r -d '' src
do
sed -i 's@__nv_bfloat162@__hip_bfloat162@' "$src"
sed -i 's@__nv_bfloat16@__hip_bfloat16@' "$src"
done < <(find ./gpu-tmp -name "*.$ext" -print0)
done
else
echo "Can't find the gpu/ dir"
exit
fi

# create all destination directories for hipified files into sibling 'gpu-rocm' directory
while IFS= read -r -d '' src
do
dst="${src//gpu/gpu-rocm}"
dst="${src//gpu-tmp/gpu-rocm}"

if [ -d $dst ]; then
#Clearing out any leftover files and directories
Expand All @@ -24,17 +53,17 @@ function hipify_dir()
#Making directories
echo "Creating $dst"
mkdir -p "$dst"
done < <(find ./gpu -type d -print0)
done < <(find ./gpu-tmp -type d -print0)

# run hipify-perl against all *.cu *.cuh *.h *.cpp files, no renaming
# run all files in parallel to speed up
for ext in cu cuh h cpp c
do
while IFS= read -r -d '' src
do
dst="${src//\.\/gpu/\.\/gpu-rocm}"
dst="${src//\.\/gpu-tmp/\.\/gpu-rocm}"
hipify-perl -o="$dst.tmp" "$src" &
done < <(find ./gpu -name "*.$ext" -print0)
done < <(find ./gpu-tmp -name "*.$ext" -print0)
done
wait

Expand All @@ -45,6 +74,12 @@ function hipify_dir()
mv "$src" "$dst"
done < <(find ./gpu-rocm -name "*.cu.tmp" -print0)

if [ -d ./gpu-tmp ]; then
#Clearing out any leftover files and directories
echo "Removing ./gpu-tmp"
rm -rf ./gpu-tmp
fi

# replace header include statements "<faiss/gpu/" with "<faiss/gpu-rocm"
# replace thrust::cuda::par with thrust::hip::par
# adjust header path location for hipblas.h to avoid unnecessary deprecation warnings
Expand Down
12 changes: 0 additions & 12 deletions faiss/gpu/impl/Distance.cu
Original file line number Diff line number Diff line change
Expand Up @@ -504,8 +504,6 @@ void runAllPairwiseL2Distance(
outDistances);
}

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runAllPairwiseL2Distance(
GpuResources* res,
cudaStream_t stream,
Expand All @@ -526,7 +524,6 @@ void runAllPairwiseL2Distance(
queriesRowMajor,
outDistances);
}
#endif // USE_AMD_ROCM

void runAllPairwiseIPDistance(
GpuResources* res,
Expand Down Expand Up @@ -568,8 +565,6 @@ void runAllPairwiseIPDistance(
outDistances);
}

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runAllPairwiseIPDistance(
GpuResources* res,
cudaStream_t stream,
Expand All @@ -589,7 +584,6 @@ void runAllPairwiseIPDistance(
queriesRowMajor,
outDistances);
}
#endif // USE_AMD_ROCM

void runL2Distance(
GpuResources* res,
Expand Down Expand Up @@ -643,8 +637,6 @@ void runL2Distance(
ignoreOutDistances);
}

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runL2Distance(
GpuResources* res,
cudaStream_t stream,
Expand All @@ -670,7 +662,6 @@ void runL2Distance(
outIndices,
ignoreOutDistances);
}
#endif // USE_AMD_ROCM

void runIPDistance(
GpuResources* res,
Expand Down Expand Up @@ -716,8 +707,6 @@ void runIPDistance(
outIndices);
}

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runIPDistance(
GpuResources* res,
cudaStream_t stream,
Expand All @@ -739,7 +728,6 @@ void runIPDistance(
outDistances,
outIndices);
}
#endif // USE_AMD_ROCM

} // namespace gpu
} // namespace faiss
12 changes: 0 additions & 12 deletions faiss/gpu/impl/Distance.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ void runAllPairwiseL2Distance(
bool queriesRowMajor,
Tensor<float, 2, true>& outDistances);

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runAllPairwiseL2Distance(
GpuResources* res,
cudaStream_t stream,
Expand All @@ -52,7 +50,6 @@ void runAllPairwiseL2Distance(
Tensor<__nv_bfloat16, 2, true>& queries,
bool queriesRowMajor,
Tensor<float, 2, true>& outDistances);
#endif // USE_AMD_ROCM

void runAllPairwiseIPDistance(
GpuResources* res,
Expand All @@ -72,8 +69,6 @@ void runAllPairwiseIPDistance(
bool queriesRowMajor,
Tensor<float, 2, true>& outDistances);

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runAllPairwiseIPDistance(
GpuResources* res,
cudaStream_t stream,
Expand All @@ -82,7 +77,6 @@ void runAllPairwiseIPDistance(
Tensor<__nv_bfloat16, 2, true>& queries,
bool queriesRowMajor,
Tensor<float, 2, true>& outDistances);
#endif // USE_AMD_ROCM

/// Calculates brute-force L2 distance between `vectors` and
/// `queries`, returning the k closest results seen
Expand Down Expand Up @@ -116,8 +110,6 @@ void runL2Distance(
Tensor<idx_t, 2, true>& outIndices,
bool ignoreOutDistances = false);

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runL2Distance(
GpuResources* resources,
cudaStream_t stream,
Expand All @@ -130,7 +122,6 @@ void runL2Distance(
Tensor<float, 2, true>& outDistances,
Tensor<idx_t, 2, true>& outIndices,
bool ignoreOutDistances = false);
#endif // USE_AMD_ROCM

/// Calculates brute-force inner product distance between `vectors`
/// and `queries`, returning the k closest results seen
Expand All @@ -156,8 +147,6 @@ void runIPDistance(
Tensor<float, 2, true>& outDistances,
Tensor<idx_t, 2, true>& outIndices);

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runIPDistance(
GpuResources* resources,
cudaStream_t stream,
Expand All @@ -168,7 +157,6 @@ void runIPDistance(
int k,
Tensor<float, 2, true>& outDistances,
Tensor<idx_t, 2, true>& outIndices);
#endif // USE_AMD_ROCM

//
// General distance implementation, assumes that all arguments are on the
Expand Down
3 changes: 0 additions & 3 deletions faiss/gpu/impl/L2Norm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,6 @@ void runL2Norm(
runL2Norm<half, half2>(input, inputRowMajor, output, normSquared, stream);
}

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runL2Norm(
Tensor<__nv_bfloat16, 2, true>& input,
bool inputRowMajor,
Expand All @@ -286,7 +284,6 @@ void runL2Norm(
runL2Norm<__nv_bfloat16, __nv_bfloat162>(
input, inputRowMajor, output, normSquared, stream);
}
#endif

} // namespace gpu
} // namespace faiss
3 changes: 0 additions & 3 deletions faiss/gpu/impl/L2Norm.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,12 @@ void runL2Norm(
bool normSquared,
cudaStream_t stream);

// no bf16 support for AMD
#ifndef USE_AMD_ROCM
void runL2Norm(
Tensor<__nv_bfloat16, 2, true>& input,
bool inputRowMajor,
Tensor<float, 1, true>& output,
bool normSquared,
cudaStream_t stream);
#endif

} // namespace gpu
} // namespace faiss
8 changes: 0 additions & 8 deletions faiss/gpu/utils/ConversionOperators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,9 @@ struct ConvertTo<float> {
static inline __device__ float to(half v) {
return __half2float(v);
}

#ifndef USE_AMD_ROCM
static inline __device__ float to(__nv_bfloat16 v) {
return __bfloat162float(v);
}
#endif // !USE_AMD_ROCM
};

template <>
Expand Down Expand Up @@ -96,9 +93,6 @@ struct ConvertTo<Half4> {
}
};

// no bf16 support for AMD
#ifndef USE_AMD_ROCM

template <>
struct ConvertTo<__nv_bfloat16> {
static inline __device__ __nv_bfloat16 to(float v) {
Expand All @@ -112,8 +106,6 @@ struct ConvertTo<__nv_bfloat16> {
}
};

#endif // USE_AMD_ROCM

template <typename From, typename To>
struct Convert {
inline __device__ To operator()(From v) const {
Expand Down
Loading

0 comments on commit bccfdb4

Please sign in to comment.