Skip to content

Commit

Permalink
Use +sme for Apple
Browse files Browse the repository at this point in the history
Signed-off-by: Taiju Yamada <[email protected]>
  • Loading branch information
cielavenir committed Nov 8, 2024
1 parent 496255c commit b504f2e
Show file tree
Hide file tree
Showing 17 changed files with 81 additions and 21 deletions.
10 changes: 5 additions & 5 deletions erasure_code/aarch64/ec_aarch64_dispatcher.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(gf_vect_dot_prod_sve);
return PROVIDER_INFO(gf_vect_dot_prod_neon);
#endif
Expand All @@ -55,7 +55,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mad)
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mad_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(gf_vect_mad_sve);
return PROVIDER_INFO(gf_vect_mad_neon);
#endif
Expand All @@ -72,7 +72,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data)
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(ec_encode_data_sve);
return PROVIDER_INFO(ec_encode_data_neon);
#endif
Expand All @@ -89,7 +89,7 @@ DEFINE_INTERFACE_DISPATCHER(ec_encode_data_update)
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(ec_encode_data_update_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(ec_encode_data_update_sve);
return PROVIDER_INFO(ec_encode_data_update_neon);
#endif
Expand All @@ -106,7 +106,7 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
if (auxval & HWCAP_ASIMD)
return PROVIDER_INFO(gf_vect_mul_neon);
#elif defined(__APPLE__)
if (sysctlEnabled(SYSCTL_SVE_KEY))
if (sysctlEnabled(SYSCTL_SME_KEY))
return PROVIDER_INFO(gf_vect_mul_sve);
return PROVIDER_INFO(gf_vect_mul_neon);
#endif
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_2vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -98,7 +102,7 @@ cdecl(gf_2vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_2vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -104,7 +108,7 @@ cdecl(gf_2vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

/* prefetch dest data */
prfb pldl2strm, p0, [x_dest1, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_3vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -107,7 +111,7 @@ cdecl(gf_3vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_3vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -115,7 +119,7 @@ cdecl(gf_3vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

/* dest data prefetch */
prfb pldl2strm, p0, [x_dest1, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_4vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -115,7 +119,7 @@ cdecl(gf_4vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_4vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -126,7 +130,7 @@ cdecl(gf_4vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_5vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -128,7 +132,7 @@ cdecl(gf_5vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_5vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -137,7 +141,7 @@ cdecl(gf_5vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_6vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -137,7 +141,7 @@ cdecl(gf_6vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_6vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -148,7 +152,7 @@ cdecl(gf_6vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

prfb pldl2strm, p0, [x_dest1, x_pos]
prfb pldl2strm, p0, [x_dest2, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_7vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -148,7 +152,7 @@ cdecl(gf_7vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_8vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -160,7 +164,7 @@ cdecl(gf_8vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov x_vec_i, #0 /* clear x_vec_i */
ldr x_ptr, [x_src, x_vec_i] /* x_ptr: src base addr. */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_vect_dot_prod_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -82,7 +86,7 @@ cdecl(gf_vect_dot_prod_sve):
/* Loop 1: x_len, vector length */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

mov z_dest.b, #0 /* clear z_dest */
mov x_vec_i, #0 /* clear x_vec_i */
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_vect_mad_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -87,7 +91,7 @@ cdecl(gf_vect_mad_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

/* prefetch dest data */
prfb pldl2strm, p0, [x_dest, x_pos]
Expand Down
6 changes: 5 additions & 1 deletion erasure_code/aarch64/gf_vect_mul_sve.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
**********************************************************************/
.text
.align 6
#ifdef __APPLE__
.arch armv8-a+sme
#else
.arch armv8-a+sve
#endif

#include "../include/aarch64_label.h"

Expand Down Expand Up @@ -92,7 +96,7 @@ cdecl(gf_vect_mul_sve):
/* vector length agnostic */
.Lloopsve_vl:
whilelo p0.b, x_pos, x_len
b.none .return_pass
b.eq .return_pass

/* load src data, governed by p0 */
ld1b z_src.b, p0/z, [x_src, x_pos]
Expand Down
2 changes: 1 addition & 1 deletion include/aarch64_multibinary.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@
#elif defined(__APPLE__)
#define SYSCTL_PMULL_KEY "hw.optional.arm.FEAT_PMULL" // from macOS 12 FEAT_* sysctl infos are available
#define SYSCTL_CRC32_KEY "hw.optional.armv8_crc32"
#define SYSCTL_SVE_KEY "hw.optional.arm.FEAT_SVE" // this one is just a guess and need to check macOS update
#define SYSCTL_SME_KEY "hw.optional.arm.FEAT_SME"
#include <sys/sysctl.h>
#include <stddef.h>
static inline int sysctlEnabled(const char* name){
Expand Down

0 comments on commit b504f2e

Please sign in to comment.