Adding i1 mask attention e2e tests

* New tests are aimed at testing with option `--iree-experimental-packed-i1-storage` turned on, which allows real packed i1 datatype in memory. * Only certain shapes are correct at this moment as upstream patches for emulating unaligned vector stores are not yet merged. Signed-off-by: Alan Li <[email protected]>
iree-org · Nov 29, 2024 · 35f7356 · 35f7356
1 parent 32d4f28
commit 35f7356
Show file tree

Hide file tree

Showing 3 changed files with 142 additions and 4 deletions.
diff --git a/tests/e2e/linalg_ext_ops/BUILD.bazel b/tests/e2e/linalg_ext_ops/BUILD.bazel
@@ -24,6 +24,9 @@ ALL_SRCS = enforce_glob(
         "winograd_output.mlir",
     ],
     include = ["*.mlir"],
+    exclude = [
+        "attention_i1_mask.mlir",
+    ],
 )
 
 iree_check_single_backend_test_suite(
@@ -39,11 +42,17 @@ iree_check_single_backend_test_suite(
     target_backend = "llvm-cpu",
 )
 
-iree_check_single_backend_test_suite(
-    name = "check_llvm-cpu_local-task",
-    srcs = [
+I1_SRCS = enforce_glob(
+    # keep sorted
+    [
         "attention_i1_mask.mlir",
     ],
+    include = ["*.mlir"],
+)
+
+iree_check_single_backend_test_suite(
+    name = "check_llvm-cpu_local-task",
+    srcs = I1_SRCS,
     compiler_flags = [
         "--iree-llvmcpu-target-cpu=generic",
         "--iree-experimental-packed-i1-storage",
@@ -68,6 +77,7 @@ VMVX_SRCS = enforce_glob(
     include = ["*.mlir"],
     exclude = [
         "attention.mlir",
+        "attention_i1_mask.mlir",
     ],
 )
 
@@ -91,6 +101,7 @@ LLVM_GPU_SRCS = enforce_glob(
     include = ["*.mlir"],
     exclude = [
         "attention.mlir",
+        "attention_i1_mask.mlir",
     ],
 )
 
@@ -123,6 +134,7 @@ ROCM_HIP_SRCS = enforce_glob(
     exclude = [
         "top-k.mlir",
         "attention.mlir",
+        "attention_i1_mask.mlir",
     ],
 )
 
@@ -147,6 +159,7 @@ iree_check_single_backend_test_suite(
         include = ["*.mlir"],
         exclude = [
             "attention.mlir",
+            "attention_i1_mask.mlir",
             "top-k.mlir",
         ],
     ),
@@ -168,6 +181,7 @@ iree_check_single_backend_test_suite(
         include = ["*.mlir"],
         exclude = [
             "attention.mlir",
+            "attention_i1_mask.mlir",
             "top-k.mlir",
         ],
     ),

diff --git a/tests/e2e/linalg_ext_ops/attention.mlir b/tests/e2e/linalg_ext_ops/attention.mlir
@@ -75,6 +75,48 @@ func.func @causal_attention1x3x4() {
   return
 }
 
+func.func @attention1x4x4_i1_mask_all_ones() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %mask = util.unfoldable_constant dense<[[[true, true, true, true],
+                                           [true, true, true, true],
+                                           [true, true, true, true],
+                                           [true, true, true, true]]]> : tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
+              [0.941939, 1.04194, 1.14194, 1.24194],
+              [1.05371, 1.15371, 1.25371, 1.35371],
+              [1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}
 
 func.func @softcap_attention1x3x4() {
   %init = tensor.empty() : tensor<1x3x4xf32>

diff --git a/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir b/tests/e2e/linalg_ext_ops/attention_i1_mask.mlir
@@ -69,7 +69,7 @@ func.func @truncate_i1_2() {
   return
 }
 
-func.func @attention1x4x4_i1_all_ones() {
+func.func @attention1x4x4_i1_mask() {
   %init = tensor.empty() : tensor<1x4x4xf32>
   %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
                                             [0.5, 0.6, 0.7, 0.8],
@@ -109,3 +109,85 @@ func.func @attention1x4x4_i1_all_ones() {
   ) : tensor<1x4x4xf32>
   return
 }
+
+func.func @attention1x4x4_i1_mask_all_ones() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %i8mask = util.unfoldable_constant dense<[255, 255]> : tensor<2xi8>
+  %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
+              [0.941939, 1.04194, 1.14194, 1.24194],
+              [1.05371, 1.15371, 1.25371, 1.35371],
+              [1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}
+
+func.func @attention1x4x4_i1_mask_tril() {
+  %init = tensor.empty() : tensor<1x4x4xf32>
+  %query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                          [0.5, 0.6, 0.7, 0.8],
+                                          [0.9, 1.0, 1.1, 1.2],
+                                          [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+  %value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
+                                            [0.5, 0.6, 0.7, 0.8],
+                                            [0.9, 1.0, 1.1, 1.2],
+                                            [1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
+
+  %i8mask = util.unfoldable_constant dense<[140, 239]> : tensor<2xi8>
+  %mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>
+
+  %scale = arith.constant 0.5 : f32
+  %1 = iree_linalg_ext.attention  {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> ()>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
+                     affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
+                     ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
+        tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
+          ^bb0(%arg0: f32):
+          iree_linalg_ext.yield %arg0 : f32
+        } -> tensor<1x4x4xf32>
+  check.expect_almost_eq_const(
+      %1,
+      dense<[[[1.11993, 1.21993, 1.31993, 1.41993],
+              [1.3, 1.4, 1.5, 1.6],
+              [1.05371, 1.15371, 1.25371, 1.35371],
+              [1.15549, 1.25549, 1.35549, 1.45549]]]> : tensor<1x4x4xf32>
+  ) : tensor<1x4x4xf32>
+  return
+}