PowerPC, improve code generation for function fvec_L2sqr (#3416)

Summary: The code generated for function fvec_L2sqr generated by OpenXL do not perform as good as the codes generated by gcc on Power. The macros to enable imprecise floating point operation don’t cover Power with OpenXL. This patch adds the OpenXL compiler options for the PowerPC macros to achieve better performance. Pull Request resolved: #3416 Reviewed By: asadoughi Differential Revision: D57210015 Pulled By: mdouze fbshipit-source-id: 6b838a2fa4d4996fe52c9f1105827004626fe720
facebookresearch · May 10, 2024 · e1e4ad0 · e1e4ad0
1 parent 34fa2ae
commit e1e4ad0
Showing 1 changed file with 8 additions and 0 deletions.
diff --git a/faiss/impl/platform_macros.h b/faiss/impl/platform_macros.h
@@ -127,6 +127,13 @@ inline int __builtin_clzll(uint64_t x) {
     __pragma(float_control(precise, off, push))
 #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END __pragma(float_control(pop))
 #elif defined(__clang__)
+#if defined(__PPC__)
+#define FAISS_PRAGMA_IMPRECISE_LOOP \
+    _Pragma("clang loop vectorize_width(4) interleave_count(8)")
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN \
+    _Pragma("float_control(precise, off, push)")
+#define FAISS_PRAGMA_IMPRECISE_FUNCTION_END _Pragma("float_control(pop)")
+#else
 #define FAISS_PRAGMA_IMPRECISE_LOOP \
     _Pragma("clang loop vectorize(enable) interleave(enable)")
 
@@ -144,6 +151,7 @@ inline int __builtin_clzll(uint64_t x) {
 #define FAISS_PRAGMA_IMPRECISE_FUNCTION_BEGIN
 #define FAISS_PRAGMA_IMPRECISE_FUNCTION_END
 #endif
+#endif
 #elif defined(__GNUC__)
 // Unfortunately, GCC does not provide a pragma for detecting it.
 // So, we have to stick to GNUC, which is defined by MANY compilers.