Skip to content

Commit

Permalink
[mono][interp] Add instrinsics for common Vector128 operations (#81782)
Browse files Browse the repository at this point in the history
* [mono][interp] Move defines from transform.c

* [mono][interp] Move more defines from transform.c

* [mono][interp] Add intrinsics for most common V128 operations

We add intrinsics for Vector128 intrinsics that are actively used within our bcl.

We declare a set of simd method names, the same way we do it with jit, in `simd-methods.def`. In `transform-simd.c` we lookup method names in the list of supported intrinsics for `Vector128` and `Vector128<T>`. Once we find a supported instrinsic, we generate code for it, typically a `MINT_SIMD_INTRINS_*` opcode. In order to avoid adding too many new opcodes to the interpreter, simd intrinsics are grouped by signature. So all simd intrinsics that receive a single argument and return a value, will be called through `MINT_SIMD_INTRINS_P_P`. This instruction will receive an index to get the intrinsic implementation and calls it indirectly.

Some of the intrinsics are implemented using the standard vector intrinsics, supported by gcc and clang. These do not fully expose the SIMD capabilities, so some intrinsics are implemented naively. This should still be faster than using nonvectorized approach from managed code. In the future we can add better implmentation, on platforms where we have low level support. This would both be faster and reduce code size.

* [mono][interp] Add option to disable simd intrinsics

* [mono][interp] Disable simd intrinsics by default on wasm

These intrinsics are not yet implemented on jiterpreter, making it slighty slower instead.

* [mono][interp] Replace v128_create with v128_ldc if possible

v128_create receives as an argument every single element of the vector. This method is typically used with constants. For a Vector128<short> this means that creating a constant vector required 8 ldc.i4 and a v128_create. We can instead use a single instruction and embed the vector value in the code stream directly.

* [mono][interp] Remove op_Division

It is actually not used in bcl, it is not really vectorized on any platforms and the codegen for the interp implementation is massive and inefficient.

* [mono][interp] Don't emit intrinsics for unsupported vector types

* [mono][interp] Vector extensions used in these intrinsics are a GNUC extension
  • Loading branch information
BrzVlad authored Feb 28, 2023
1 parent 555dc47 commit 30283df
Show file tree
Hide file tree
Showing 13 changed files with 1,577 additions and 187 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/mini/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ set(interp_sources
interp/interp.h
interp/interp-internals.h
interp/interp.c
interp/interp-simd.c
interp/interp-intrins.h
interp/interp-intrins.c
interp/mintops.h
Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
// This alignment provides us with straight forward support for Vector128
#define MINT_STACK_ALIGNMENT (2 * MINT_STACK_SLOT_SIZE)
#define MINT_SIMD_ALIGNMENT (MINT_STACK_ALIGNMENT)
#define SIZEOF_V128 16

#define INTERP_STACK_SIZE (1024*1024)
#define INTERP_REDZONE_SIZE (8*1024)
Expand Down Expand Up @@ -101,6 +102,10 @@ typedef enum {

#define PROFILE_INTERP 0

#if !HOST_BROWSER && __GNUC__
#define INTERP_ENABLE_SIMD
#endif

#define INTERP_IMETHOD_TAG_1(im) ((gpointer)((mono_u)(im) | 1))
#define INTERP_IMETHOD_IS_TAGGED_1(im) ((mono_u)(im) & 1)
#define INTERP_IMETHOD_UNTAG_1(im) ((InterpMethod*)((mono_u)(im) & ~1))
Expand Down
81 changes: 81 additions & 0 deletions src/mono/mono/mini/interp/interp-simd-intrins.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_ADD, interp_v128_i1_op_addition)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_ADD, interp_v128_i2_op_addition)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_ADD, interp_v128_i4_op_addition)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SUB, interp_v128_i1_op_subtraction)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SUB, interp_v128_i2_op_subtraction)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SUB, interp_v128_i4_op_subtraction)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_AND, interp_v128_op_bitwise_and)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_OR, interp_v128_op_bitwise_or)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_EQUALITY, interp_v128_op_bitwise_equality)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_BITWISE_INEQUALITY, interp_v128_op_bitwise_inequality)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_EXCLUSIVE_OR, interp_v128_op_exclusive_or)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_MULTIPLY, interp_v128_i1_op_multiply)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_MULTIPLY, interp_v128_i2_op_multiply)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_MULTIPLY, interp_v128_i4_op_multiply)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_NEGATION, interp_v128_i1_op_negation)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_NEGATION, interp_v128_i2_op_negation)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_NEGATION, interp_v128_i4_op_negation)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_LEFT_SHIFT, interp_v128_i1_op_left_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_LEFT_SHIFT, interp_v128_i2_op_left_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_LEFT_SHIFT, interp_v128_i4_op_left_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_LEFT_SHIFT, interp_v128_i8_op_left_shift)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_RIGHT_SHIFT, interp_v128_i1_op_right_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_RIGHT_SHIFT, interp_v128_i2_op_right_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_RIGHT_SHIFT, interp_v128_i4_op_right_shift)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_URIGHT_SHIFT, interp_v128_i1_op_uright_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_URIGHT_SHIFT, interp_v128_i2_op_uright_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_URIGHT_SHIFT, interp_v128_i4_op_uright_shift)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_URIGHT_SHIFT, interp_v128_i8_op_uright_shift)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_ONES_COMPLEMENT, interp_v128_op_ones_complement)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_U2_WIDEN_LOWER, interp_v128_u2_widen_lower)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_U2_WIDEN_UPPER, interp_v128_u2_widen_upper)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_NARROW, interp_v128_u1_narrow)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_GREATER_THAN, interp_v128_u1_greater_than)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_LESS_THAN, interp_v128_i1_less_than)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U1_LESS_THAN, interp_v128_u1_less_than)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_LESS_THAN, interp_v128_i2_less_than)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_EQUALS, interp_v128_i1_equals)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_EQUALS, interp_v128_i2_equals)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_EQUALS, interp_v128_i4_equals)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_EQUALS, interp_v128_i8_equals)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE_SCALAR, interp_v128_i1_create_scalar)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE_SCALAR, interp_v128_i2_create_scalar)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE_SCALAR, interp_v128_i4_create_scalar)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE_SCALAR, interp_v128_i8_create_scalar)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_EXTRACT_MSB, interp_v128_i1_extract_msb)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_EXTRACT_MSB, interp_v128_i2_extract_msb)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_EXTRACT_MSB, interp_v128_i4_extract_msb)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_EXTRACT_MSB, interp_v128_i8_extract_msb)

INTERP_SIMD_INTRINSIC_P_PPP (INTERP_SIMD_INTRINSIC_V128_CONDITIONAL_SELECT, interp_v128_conditional_select)

INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I1_CREATE, interp_v128_i1_create)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I2_CREATE, interp_v128_i2_create)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I4_CREATE, interp_v128_i4_create)
INTERP_SIMD_INTRINSIC_P_P (INTERP_SIMD_INTRINSIC_V128_I8_CREATE, interp_v128_i8_create)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_AND_NOT, interp_v128_and_not)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_U2_LESS_THAN_EQUAL, interp_v128_u2_less_than_equal)

INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I1_SHUFFLE, interp_v128_i1_shuffle)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I2_SHUFFLE, interp_v128_i2_shuffle)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I4_SHUFFLE, interp_v128_i4_shuffle)
INTERP_SIMD_INTRINSIC_P_PP (INTERP_SIMD_INTRINSIC_V128_I8_SHUFFLE, interp_v128_i8_shuffle)
Loading

0 comments on commit 30283df

Please sign in to comment.