Skip to content

Commit

Permalink
Adding i1 mask attention e2e tests
Browse files Browse the repository at this point in the history
* New tests are aimed at testing with option
`--iree-experimental-packed-i1-storage` turned on, which allows real
packed i1 datatype in memory.
* Only certain shapes are correct at this moment as upstream patches for
  emulating unaligned vector stores are not yet merged.

Signed-off-by: Alan Li <[email protected]>
  • Loading branch information
lialan committed Nov 29, 2024
1 parent 32d4f28 commit 35f7356
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 4 deletions.
20 changes: 17 additions & 3 deletions tests/e2e/linalg_ext_ops/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ ALL_SRCS = enforce_glob(
"winograd_output.mlir",
],
include = ["*.mlir"],
exclude = [
"attention_i1_mask.mlir",
],
)

iree_check_single_backend_test_suite(
Expand All @@ -39,11 +42,17 @@ iree_check_single_backend_test_suite(
target_backend = "llvm-cpu",
)

iree_check_single_backend_test_suite(
name = "check_llvm-cpu_local-task",
srcs = [
I1_SRCS = enforce_glob(
# keep sorted
[
"attention_i1_mask.mlir",
],
include = ["*.mlir"],
)

iree_check_single_backend_test_suite(
name = "check_llvm-cpu_local-task",
srcs = I1_SRCS,
compiler_flags = [
"--iree-llvmcpu-target-cpu=generic",
"--iree-experimental-packed-i1-storage",
Expand All @@ -68,6 +77,7 @@ VMVX_SRCS = enforce_glob(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand All @@ -91,6 +101,7 @@ LLVM_GPU_SRCS = enforce_glob(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand Down Expand Up @@ -123,6 +134,7 @@ ROCM_HIP_SRCS = enforce_glob(
exclude = [
"top-k.mlir",
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand All @@ -147,6 +159,7 @@ iree_check_single_backend_test_suite(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
"top-k.mlir",
],
),
Expand All @@ -168,6 +181,7 @@ iree_check_single_backend_test_suite(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
"top-k.mlir",
],
),
Expand Down
42 changes: 42 additions & 0 deletions tests/e2e/linalg_ext_ops/attention.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,48 @@ func.func @causal_attention1x3x4() {
return
}

func.func @attention1x4x4_i1_mask_all_ones() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%mask = util.unfoldable_constant dense<[[[true, true, true, true],
[true, true, true, true],
[true, true, true, true],
[true, true, true, true]]]> : tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
[0.941939, 1.04194, 1.14194, 1.24194],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @softcap_attention1x3x4() {
%init = tensor.empty() : tensor<1x3x4xf32>
Expand Down
84 changes: 83 additions & 1 deletion tests/e2e/linalg_ext_ops/attention_i1_mask.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func.func @truncate_i1_2() {
return
}

func.func @attention1x4x4_i1_all_ones() {
func.func @attention1x4x4_i1_mask() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
Expand Down Expand Up @@ -109,3 +109,85 @@ func.func @attention1x4x4_i1_all_ones() {
) : tensor<1x4x4xf32>
return
}

func.func @attention1x4x4_i1_mask_all_ones() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[255, 255]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
[0.941939, 1.04194, 1.14194, 1.24194],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @attention1x4x4_i1_mask_tril() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[140, 239]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[1.11993, 1.21993, 1.31993, 1.41993],
[1.3, 1.4, 1.5, 1.6],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.15549, 1.25549, 1.35549, 1.45549]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

0 comments on commit 35f7356

Please sign in to comment.