diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index 48401ba5ea42a9..b702eece37002b 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -511,12 +511,10 @@ async def main() -> None: ) invocation.append("-list-checks") invocation.append("-") - if args.quiet: - # Even with -quiet we still want to check if we can call clang-tidy. - with open(os.devnull, "w") as dev_null: - subprocess.check_call(invocation, stdout=dev_null) - else: - subprocess.check_call(invocation) + # Even with -quiet we still want to check if we can call clang-tidy. + subprocess.check_call( + invocation, stdout=subprocess.DEVNULL if args.quiet else None + ) except: print("Unable to run clang-tidy.", file=sys.stderr) sys.exit(1) diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst index 92074bd4dae8ba..d5303418b859b2 100644 --- a/clang-tools-extra/docs/clang-tidy/Contributing.rst +++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst @@ -127,14 +127,15 @@ Writing a clang-tidy Check So you have an idea of a useful check for :program:`clang-tidy`. -First, if you're not familiar with LLVM development, read through the `Getting -Started with LLVM`_ document for instructions on setting up your workflow and +First, if you're not familiar with LLVM development, read through the `Getting Started +with the LLVM System`_ document for instructions on setting up your workflow and the `LLVM Coding Standards`_ document to familiarize yourself with the coding -style used in the project. For code reviews we mostly use `LLVM Phabricator`_. +style used in the project. For code reviews we currently use `LLVM Github`_, +though historically we used Phabricator. -.. _Getting Started with LLVM: https://llvm.org/docs/GettingStarted.html +.. _Getting Started with the LLVM System: https://llvm.org/docs/GettingStarted.html .. _LLVM Coding Standards: https://llvm.org/docs/CodingStandards.html -.. _LLVM Phabricator: https://llvm.org/docs/Phabricator.html +.. _LLVM Github: https://github.com/llvm/llvm-project Next, you need to decide which module the check belongs to. Modules are located in subdirectories of `clang-tidy/ @@ -336,13 +337,25 @@ a starting point for your test cases. A rough outline of the process looks like The quickest way to prototype your matcher is to use :program:`clang-query` to interactively build up your matcher. For complicated matchers, build up a matching expression incrementally and use :program:`clang-query`'s ``let`` command to save named -matching expressions to simplify your matcher. Just like breaking up a huge function -into smaller chunks with intention-revealing names can help you understand a complex -algorithm, breaking up a matcher into smaller matchers with intention-revealing names -can help you understand a complicated matcher. Once you have a working matcher, the -C++ API will be virtually identical to your interactively constructed matcher. You can -use local variables to preserve your intention-revealing names that you applied to -nested matchers. +matching expressions to simplify your matcher. + +.. code-block:: console + + clang-query> let c1 cxxRecordDecl() + clang-query> match c1 + +Alternatively, pressing the tab key after a previous matcher's open parentheses would also +show which matchers can be chained with the previous matcher, though some matchers that work +may not be listed. + +Just like breaking up a huge function into smaller chunks with intention-revealing names +can help you understand a complex algorithm, breaking up a matcher into smaller matchers +with intention-revealing names can help you understand a complicated matcher. + +Once you have a working clang-query matcher, the C++ API matchers will be the same or similar +to your interactively constructed matcher (there can be cases where they differ slightly). +You can use local variables to preserve your intention-revealing names that you applied +to nested matchers. Creating private matchers ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -646,10 +659,13 @@ directory. The path to this directory is available in a lit test with the varia Out-of-tree check plugins ------------------------- + Developing an out-of-tree check as a plugin largely follows the steps -outlined above. The plugin is a shared library whose code lives outside +outlined above, including creating a new module and doing the hacks to +register the module. The plugin is a shared library whose code lives outside the clang-tidy build system. Build and link this shared library against -LLVM as done for other kinds of Clang plugins. +LLVM as done for other kinds of Clang plugins. If using CMake, use the keyword +``MODULE`` while invoking ``add_library`` or ``llvm_add_library``. The plugin can be loaded by passing `-load` to `clang-tidy` in addition to the names of the checks to enable. @@ -664,6 +680,19 @@ compiled against the version of clang-tidy that will be loading the plugin. The plugins can use threads, TLS, or any other facilities available to in-tree code which is accessible from the external headers. +Note that testing out-of-tree checks might involve getting ``llvm-lit`` from an LLVM +installation compiled from source. See `Getting Started with the LLVM System`_ for ways +to do so. + +Alternatively, get `lit`_ following the `test-suite guide`_ and get the `FileCheck`_ binary, +and write a version of `check_clang_tidy.py`_ to suit your needs. + +.. _Getting Started with the LLVM System: https://llvm.org/docs/GettingStarted.html +.. _test-suite guide: https://llvm.org/docs/TestSuiteGuide.html +.. _lit: https://llvm.org/docs/CommandGuide/lit.html +.. _FileCheck: https://llvm.org/docs/CommandGuide/FileCheck.html +.. _check_clang_tidy.py: https://github.com/llvm/llvm-project/blob/main/clang-tools-extra/test/clang-tidy/check_clang_tidy.py + Running clang-tidy on LLVM -------------------------- @@ -688,10 +717,10 @@ warnings and errors. The script provides multiple configuration flags. * To restrict the files examined you can provide one or more regex arguments that the file names are matched against. - ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze clang-tidy + ``run-clang-tidy.py clang-tidy/.*Check\.cpp`` will only analyze `clang-tidy` checks. It may also be necessary to restrict the header files that warnings - are displayed from using the ``-header-filter`` flag. It has the same behavior - as the corresponding :program:`clang-tidy` flag. + are displayed from by using the ``-header-filter`` and ``-exclude-header-filter`` flags. + They have the same behavior as the corresponding :program:`clang-tidy` flags. * To apply suggested fixes ``-fix`` can be passed as an argument. This gathers all changes in a temporary directory and applies them. Passing ``-format`` @@ -758,4 +787,4 @@ There is only one argument that controls profile storage: * If you run :program:`clang-tidy` from within ``/foo`` directory, and specify ``-store-check-profile=.``, then the profile will still be saved to - ``/foo/-example.cpp.json`` + ``/foo/-example.cpp.json`` \ No newline at end of file diff --git a/clang/docs/HLSL/ExpectedDifferences.rst b/clang/docs/HLSL/ExpectedDifferences.rst index 4782eb3cda754a..e143c5b71575aa 100644 --- a/clang/docs/HLSL/ExpectedDifferences.rst +++ b/clang/docs/HLSL/ExpectedDifferences.rst @@ -54,6 +54,19 @@ HLSL 202x based on proposal and `0008 `_. +The largest difference between Clang and DXC's overload resolution is the +algorithm used for identifying best-match overloads. There are more details +about the algorithmic differences in the :ref:`multi_argument_overloads` section +below. There are three high level differences that should be highlighted: + +* **There should be no cases** where DXC and Clang both successfully + resolve an overload where the resolved overload is different between the two. +* There are cases where Clang will successfully resolve an overload that DXC + wouldn't because we've trimmed the overload set in Clang to remove ambiguity. +* There are cases where DXC will successfully resolve an overload that Clang + will not for two reasons: (1) DXC only generates partial overload sets for + builtin functions and (2) DXC resolves cases that probably should be ambiguous. + Clang's implementation extends standard overload resolution rules to HLSL library functionality. This causes subtle changes in overload resolution behavior between Clang and DXC. Some examples include: @@ -71,18 +84,23 @@ behavior between Clang and DXC. Some examples include: uint U; int I; float X, Y, Z; - double3 A, B; + double3 R, G; } - void twoParams(int, int); - void twoParams(float, float); + void takesSingleDouble(double); + void takesSingleDouble(vector); + + void scalarOrVector(double); + void scalarOrVector(vector); export void call() { - halfOrInt16(U); // DXC: Fails with call ambiguous between int16_t and uint16_t overloads - // Clang: Resolves to halfOrInt16(uint16_t). - halfOrInt16(I); // All: Resolves to halfOrInt16(int16_t). half H; + halfOrInt16(I); // All: Resolves to halfOrInt16(int16_t). + #ifndef IGNORE_ERRORS + halfOrInt16(U); // All: Fails with call ambiguous between int16_t and uint16_t + // overloads + // asfloat16 is a builtin with overloads for half, int16_t, and uint16_t. H = asfloat16(I); // DXC: Fails to resolve overload for int. // Clang: Resolves to asfloat16(int16_t). @@ -94,21 +112,28 @@ behavior between Clang and DXC. Some examples include: takesDoubles(X, Y, Z); // Works on all compilers #ifndef IGNORE_ERRORS - fma(X, Y, Z); // DXC: Fails to resolve no known conversion from float to double. + fma(X, Y, Z); // DXC: Fails to resolve no known conversion from float to + // double. // Clang: Resolves to fma(double,double,double). - #endif - double D = dot(A, B); // DXC: Resolves to dot(double3, double3), fails DXIL Validation. + double D = dot(R, G); // DXC: Resolves to dot(double3, double3), fails DXIL Validation. // FXC: Expands to compute double dot product with fmul/fadd - // Clang: Resolves to dot(float3, float3), emits conversion warnings. + // Clang: Fails to resolve as ambiguous against + // dot(half, half) or dot(float, float) + #endif #ifndef IGNORE_ERRORS tan(B); // DXC: resolves to tan(float). // Clang: Fails to resolve, ambiguous between integer types. - twoParams(I, X); // DXC: resolves twoParams(int, int). - // Clang: Fails to resolve ambiguous conversions. #endif + + double D; + takesSingleDouble(D); // All: Fails to resolve ambiguous conversions. + takesSingleDouble(R); // All: Fails to resolve ambiguous conversions. + + scalarOrVector(D); // All: Resolves to scalarOrVector(double). + scalarOrVector(R); // All: Fails to resolve ambiguous conversions. } .. note:: @@ -119,3 +144,75 @@ behavior between Clang and DXC. Some examples include: diagnostic notifying the user of the conversion rather than silently altering precision relative to the other overloads (as FXC does) or generating code that will fail validation (as DXC does). + +.. _multi_argument_overloads: + +Multi-Argument Overloads +------------------------ + +In addition to the differences in single-element conversions, Clang and DXC +differ dramatically in multi-argument overload resolution. C++ multi-argument +overload resolution behavior (or something very similar) is required to +implement +`non-member operator overloading `_. + +Clang adopts the C++ inspired language from the +`draft HLSL specification `_, +where an overload ``f1`` is a better candidate than ``f2`` if for all arguments the +conversion sequences is not worse than the corresponding conversion sequence and +for at least one argument it is better. + +.. code-block:: c++ + + cbuffer CB { + int I; + float X; + float4 V; + } + + void twoParams(int, int); + void twoParams(float, float); + void threeParams(float, float, float); + void threeParams(float4, float4, float4); + + export void call() { + twoParams(I, X); // DXC: resolves twoParams(int, int). + // Clang: Fails to resolve ambiguous conversions. + + threeParams(X, V, V); // DXC: resolves threeParams(float4, float4, float4). + // Clang: Fails to resolve ambiguous conversions. + } + +For the examples above since ``twoParams`` called with mixed parameters produces +implicit conversion sequences that are { ExactMatch, FloatingIntegral } and { +FloatingIntegral, ExactMatch }. In both cases an argument has a worse conversion +in the other sequence, so the overload is ambiguous. + +In the ``threeParams`` example the sequences are { ExactMatch, VectorTruncation, +VectorTruncation } or { VectorSplat, ExactMatch, ExactMatch }, again in both +cases at least one parameter has a worse conversion in the other sequence, so +the overload is ambiguous. + +.. note:: + + The behavior of DXC documented below is undocumented so this is gleaned from + observation and a bit of reading the source. + +DXC's approach for determining the best overload produces an integer score value +for each implicit conversion sequence for each argument expression. Scores for +casts are based on a bitmask construction that is complicated to reverse +engineer. It seems that: + +* Exact match is 0 +* Dimension increase is 1 +* Promotion is 2 +* Integral -> Float conversion is 4 +* Float -> Integral conversion is 8 +* Cast is 16 + +The masks are or'd against each other to produce a score for the cast. + +The scores of each conversion sequence are then summed to generate a score for +the overload candidate. The overload candidate with the lowest score is the best +candidate. If more than one overload are matched for the lowest score the call +is ambiguous. diff --git a/clang/docs/tools/generate_formatted_state.py b/clang/docs/tools/generate_formatted_state.py index 66cebbf7af33a4..2de43dc383f557 100755 --- a/clang/docs/tools/generate_formatted_state.py +++ b/clang/docs/tools/generate_formatted_state.py @@ -78,8 +78,6 @@ def get_style(count, passed): - {style2}`{percent}%` """ -FNULL = open(os.devnull, "w") - with open(DOC_FILE, "wb") as output: cleanfiles = open(CLEAN_FILE, "wb") @@ -101,8 +99,8 @@ def get_style(count, passed): # interested in it, just the return code. git_check = subprocess.Popen( ["git", "ls-files", "--error-unmatch", act_sub_dir], - stdout=FNULL, - stderr=FNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, ) if git_check.wait() != 0: print("Skipping directory: ", act_sub_dir) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 034d32c6291b3d..2e80eef2c8b9bc 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -124,6 +124,7 @@ TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_abs_f16x8, "V8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128") @@ -140,6 +141,10 @@ TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_ceil_f16x8, "V8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_floor_f16x8, "V8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_trunc_f16x8, "V8hV8h", "nc", "fp16") +TARGET_BUILTIN(__builtin_wasm_nearest_f16x8, "V8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128") @@ -151,9 +156,13 @@ TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_sqrt_f16x8, "V8hV8h", "nc", "fp16") TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i16x8_f16x8, "V8sV8h", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i16x8_f16x8, "V8sV8h", "nc", "simd128") + TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128") diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 6a77323d939791..9bd77edb0a550f 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5324,11 +5324,11 @@ bool Compiler::VisitVectorUnaryOperator(const UnaryOperator *E) { auto UnaryOp = E->getOpcode(); if (UnaryOp != UO_Plus && UnaryOp != UO_Minus && UnaryOp != UO_LNot && - UnaryOp != UO_Not) + UnaryOp != UO_Not && UnaryOp != UO_AddrOf) return this->emitInvalid(E); // Nothing to do here. - if (UnaryOp == UO_Plus) + if (UnaryOp == UO_Plus || UnaryOp == UO_AddrOf) return this->delegate(SubExpr); if (!Initializing) { diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index b0256a8ce9ed04..d6ec26af80aadd 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -350,6 +350,7 @@ void SourceManager::clearIDTables() { LastLineNoContentCache = nullptr; LastFileIDLookup = FileID(); + IncludedLocMap.clear(); if (LineTable) LineTable->clear(); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 4204c8ff276ab1..c9f21f9ded24f4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -21211,6 +21211,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); @@ -21222,6 +21223,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: + case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); @@ -21269,6 +21271,10 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_ceil_f16x8: + case WebAssembly::BI__builtin_wasm_floor_f16x8: + case WebAssembly::BI__builtin_wasm_trunc_f16x8: + case WebAssembly::BI__builtin_wasm_nearest_f16x8: case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f32x4: @@ -21279,18 +21285,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_nearest_f64x2: { unsigned IntNo; switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_ceil_f16x8: case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_ceil_f64x2: IntNo = Intrinsic::ceil; break; + case WebAssembly::BI__builtin_wasm_floor_f16x8: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_floor_f64x2: IntNo = Intrinsic::floor; break; + case WebAssembly::BI__builtin_wasm_trunc_f16x8: case WebAssembly::BI__builtin_wasm_trunc_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f64x2: IntNo = Intrinsic::trunc; break; + case WebAssembly::BI__builtin_wasm_nearest_f16x8: case WebAssembly::BI__builtin_wasm_nearest_f32x4: case WebAssembly::BI__builtin_wasm_nearest_f64x2: IntNo = Intrinsic::nearbyint; @@ -21489,12 +21499,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } + case WebAssembly::BI__builtin_wasm_abs_f16x8: case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } + case WebAssembly::BI__builtin_wasm_sqrt_f16x8: case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h index 2327bec52522d2..67d12f6f2cf419 100644 --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -33,6 +33,7 @@ typedef unsigned long long __u64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16))); +typedef __fp16 __f16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned char __u8x8 @@ -1878,6 +1879,152 @@ wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) { (__i8x16)__a, (__i8x16)__b, (__i32x4)__c); } +// FP16 intrinsics +#define __FP16_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("fp16"), \ + __min_vector_width__(128))) + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) { + return (v128_t)__builtin_wasm_splat_f16x8(__a); +} + +static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a, + int __i) + __REQUIRE_CONSTANT(__i) { + return __builtin_wasm_extract_lane_f16x8((__f16x8)__a, __i); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a, + int __i, + float __b) + __REQUIRE_CONSTANT(__i) { + return (v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)__a, __i, __b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) { + return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_neg(v128_t __a) { + return (v128_t)(-(__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sqrt(v128_t __a) { + return (v128_t)__builtin_wasm_sqrt_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ceil(v128_t __a) { + return (v128_t)__builtin_wasm_ceil_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_floor(v128_t __a) { + return (v128_t)__builtin_wasm_floor_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_trunc(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_nearest(v128_t __a) { + return (v128_t)__builtin_wasm_nearest_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_eq(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a == (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ne(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a != (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_lt(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a < (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_gt(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a > (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_le(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a <= (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ge(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a >= (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_add(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a + (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sub(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a - (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_mul(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a * (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_div(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a / (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_min(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_min_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_max(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_max_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmin(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmin_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmax(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmax_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS +wasm_i16x8_trunc_sat_f16x8(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_saturate_s_i16x8_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS +wasm_u16x8_trunc_sat_f16x8(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_saturate_u_i16x8_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_i16x8(v128_t __a) { + return (v128_t) __builtin_convertvector((__i16x8)__a, __f16x8); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) { + return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a, + v128_t __b, + v128_t __c) { + return (v128_t)__builtin_wasm_relaxed_madd_f16x8((__f16x8)__a, (__f16x8)__b, + (__f16x8)__c); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_nmadd(v128_t __a, + v128_t __b, + v128_t __c) { + return (v128_t)__builtin_wasm_relaxed_nmadd_f16x8((__f16x8)__a, (__f16x8)__b, + (__f16x8)__c); +} + // Deprecated intrinsics static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle") diff --git a/clang/test/CodeGen/address-safety-attr-flavors.cpp b/clang/test/CodeGen/address-safety-attr-flavors.cpp index 04d540d471dc8f..ef815555059db8 100644 --- a/clang/test/CodeGen/address-safety-attr-flavors.cpp +++ b/clang/test/CodeGen/address-safety-attr-flavors.cpp @@ -28,8 +28,8 @@ int HasSanitizeAddress() { return 1; } // CHECK-NOASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-ASAN: Function Attrs: mustprogress noinline nounwind sanitize_address // CHECK-KASAN: Function Attrs: mustprogress noinline nounwind sanitize_address -// CHECK-HWASAN: Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress -// CHECK-KHWASAN: Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress +// CHECK-HWASAN: Function Attrs: mustprogress noinline nounwind sanitize_hwaddress +// CHECK-KHWASAN: Function Attrs: mustprogress noinline nounwind sanitize_hwaddress __attribute__((no_sanitize("address"))) int NoSanitizeQuoteAddress() { return 0; @@ -37,15 +37,15 @@ __attribute__((no_sanitize("address"))) int NoSanitizeQuoteAddress() { // CHECK-NOASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-ASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-KASAN: {{Function Attrs: mustprogress noinline nounwind$}} -// CHECK-HWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress$}} -// CHECK-KHWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress$}} +// CHECK-HWASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_hwaddress$}} +// CHECK-KHWASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_hwaddress$}} __attribute__((no_sanitize_address)) int NoSanitizeAddress() { return 0; } // CHECK-NOASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-ASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-KASAN: {{Function Attrs: mustprogress noinline nounwind$}} -// CHECK-HWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress$}} -// CHECK-KHWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress$}} +// CHECK-HWASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_hwaddress$}} +// CHECK-KHWASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_hwaddress$}} __attribute__((no_sanitize("kernel-address"))) int NoSanitizeKernelAddress() { return 0; @@ -53,8 +53,8 @@ __attribute__((no_sanitize("kernel-address"))) int NoSanitizeKernelAddress() { // CHECK-NOASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-ASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-KASAN: {{Function Attrs: mustprogress noinline nounwind$}} -// CHECK-HWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress$}} -// CHECK-KHWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind sanitize_hwaddress$}} +// CHECK-HWASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_hwaddress$}} +// CHECK-KHWASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_hwaddress$}} __attribute__((no_sanitize("hwaddress"))) int NoSanitizeHWAddress() { return 0; @@ -62,8 +62,8 @@ __attribute__((no_sanitize("hwaddress"))) int NoSanitizeHWAddress() { // CHECK-NOASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-ASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_address$}} // CHECK-KASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_address$}} -// CHECK-HWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind$}} -// CHECK-KHWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind$}} +// CHECK-HWASAN: {{Function Attrs: mustprogress noinline nounwind$}} +// CHECK-KHWASAN: {{Function Attrs: mustprogress noinline nounwind$}} __attribute__((no_sanitize("kernel-hwaddress"))) int NoSanitizeKernelHWAddress() { return 0; @@ -71,8 +71,8 @@ __attribute__((no_sanitize("kernel-hwaddress"))) int NoSanitizeKernelHWAddress() // CHECK-NOASAN: {{Function Attrs: mustprogress noinline nounwind$}} // CHECK-ASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_address$}} // CHECK-KASAN: {{Function Attrs: mustprogress noinline nounwind sanitize_address$}} -// CHECK-HWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind$}} -// CHECK-KHWASAN: {{Function Attrs: mustprogress nobuiltin noinline nounwind$}} +// CHECK-HWASAN: {{Function Attrs: mustprogress noinline nounwind$}} +// CHECK-KHWASAN: {{Function Attrs: mustprogress noinline nounwind$}} __attribute__((disable_sanitizer_instrumentation)) int DisableSanitizerInstrumentation() { return 0; @@ -80,5 +80,5 @@ __attribute__((disable_sanitizer_instrumentation)) int DisableSanitizerInstrumen // CHECK-NOASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress noinline nounwind$}} // CHECK-ASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress noinline nounwind$}} // CHECK-KASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress noinline nounwind$}} -// CHECK-HWASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress nobuiltin noinline nounwind$}} -// CHECK-KHWASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress nobuiltin noinline nounwind$}} +// CHECK-HWASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress noinline nounwind$}} +// CHECK-KHWASAN: {{Function Attrs: disable_sanitizer_instrumentation mustprogress noinline nounwind$}} diff --git a/clang/test/CodeGen/compound-literal.c b/clang/test/CodeGen/compound-literal.c index 5b3cebb7c6ae6a..5fe9594c0f954f 100644 --- a/clang/test/CodeGen/compound-literal.c +++ b/clang/test/CodeGen/compound-literal.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-apple-darwin -fexperimental-new-constant-interpreter -emit-llvm %s -o - | FileCheck %s // Capture the type and name so matching later is cleaner. struct CompoundTy { int a; }; diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index 6ee10976f12079..9d202e0d046822 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -2361,198 +2361,395 @@ extern "C" __device__ double test_modf(double x, double* y) { return modf(x, y); } -// CHECK-LABEL: @test_nanf( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] -// CHECK: if.then.i.i: -// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ -// CHECK-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] -// CHECK-NEXT: i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]] -// CHECK-NEXT: ] -// CHECK: while.cond.i30.i.i.preheader: -// CHECK-NEXT: br label [[WHILE_COND_I30_I_I:%.*]] -// CHECK: while.cond.i30.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ] -// CHECK-NEXT: [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]] -// CHECK: while.body.i34.i.i: -// CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 -// CHECK-NEXT: [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] -// CHECK: if.else.i.i.i: -// CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 -// CHECK-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// CHECK-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// CHECK: if.else17.i.i.i: -// CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 -// CHECK-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// CHECK-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]] -// CHECK: if.end31.i.i.i: -// CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4 -// CHECK-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 -// CHECK-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] -// CHECK-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 -// CHECK-NEXT: br label [[CLEANUP_I36_I_I]] -// CHECK: cleanup.i36.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] -// CHECK: while.cond.i.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] -// CHECK-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] -// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// CHECK: while.body.i.i.i: -// CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 -// CHECK-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 -// CHECK-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]] -// CHECK: if.then.i.i.i: -// CHECK-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 -// CHECK-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 -// CHECK-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 -// CHECK-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 -// CHECK-NEXT: br label [[CLEANUP_I_I_I]] -// CHECK: cleanup.i.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] -// CHECK-NEXT: [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] -// CHECK-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]] -// CHECK: while.cond.i14.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] -// CHECK-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ] -// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]] -// CHECK: while.body.i18.i.i: -// CHECK-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 -// CHECK-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]] -// CHECK: if.then.i24.i.i: -// CHECK-NEXT: [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 -// CHECK-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 -// CHECK-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 -// CHECK-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 -// CHECK-NEXT: br label [[CLEANUP_I20_I_I]] -// CHECK: cleanup.i20.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] -// CHECK-NEXT: [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ] -// CHECK-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]] -// CHECK: _ZL4nanfPKc.exit: -// CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] -// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 -// CHECK-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 -// CHECK-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 -// CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float -// CHECK-NEXT: ret float [[TMP10]] +// DEFAULT-LABEL: @test_nanf( +// DEFAULT-NEXT: entry: +// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// DEFAULT-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] +// DEFAULT: if.then.i.i: +// DEFAULT-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 +// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ +// DEFAULT-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] +// DEFAULT-NEXT: i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]] +// DEFAULT-NEXT: ] +// DEFAULT: while.cond.i30.i.i.preheader: +// DEFAULT-NEXT: br label [[WHILE_COND_I30_I_I:%.*]] +// DEFAULT: while.cond.i30.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// DEFAULT-NEXT: br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]] +// DEFAULT: while.body.i34.i.i: +// DEFAULT-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// DEFAULT-NEXT: [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// DEFAULT-NEXT: br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] +// DEFAULT: if.else.i.i.i: +// DEFAULT-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// DEFAULT-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// DEFAULT-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] +// DEFAULT: if.else17.i.i.i: +// DEFAULT-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// DEFAULT-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// DEFAULT-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]] +// DEFAULT: if.end31.i.i.i: +// DEFAULT-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4 +// DEFAULT-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// DEFAULT-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] +// DEFAULT-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] +// DEFAULT-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 +// DEFAULT-NEXT: br label [[CLEANUP_I36_I_I]] +// DEFAULT: cleanup.i36.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] +// DEFAULT: while.cond.i.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] +// DEFAULT-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] +// DEFAULT-NEXT: [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] +// DEFAULT: while.body.i.i.i: +// DEFAULT-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// DEFAULT-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]] +// DEFAULT: if.then.i.i.i: +// DEFAULT-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 +// DEFAULT-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// DEFAULT-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 +// DEFAULT-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] +// DEFAULT-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 +// DEFAULT-NEXT: br label [[CLEANUP_I_I_I]] +// DEFAULT: cleanup.i.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// DEFAULT-NEXT: [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]] +// DEFAULT: while.cond.i14.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] +// DEFAULT-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ] +// DEFAULT-NEXT: [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]] +// DEFAULT: while.body.i18.i.i: +// DEFAULT-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// DEFAULT-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]] +// DEFAULT: if.then.i24.i.i: +// DEFAULT-NEXT: [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 +// DEFAULT-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// DEFAULT-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 +// DEFAULT-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] +// DEFAULT-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 +// DEFAULT-NEXT: br label [[CLEANUP_I20_I_I]] +// DEFAULT: cleanup.i20.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] +// DEFAULT-NEXT: [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ] +// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]] +// DEFAULT: _ZL4nanfPKc.exit: +// DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// DEFAULT-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 +// DEFAULT-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 +// DEFAULT-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 +// DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float +// DEFAULT-NEXT: ret float [[TMP10]] +// +// FINITEONLY-LABEL: @test_nanf( +// FINITEONLY-NEXT: entry: +// FINITEONLY-NEXT: ret float poison +// +// APPROX-LABEL: @test_nanf( +// APPROX-NEXT: entry: +// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// APPROX-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] +// APPROX: if.then.i.i: +// APPROX-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 +// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ +// APPROX-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] +// APPROX-NEXT: i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]] +// APPROX-NEXT: ] +// APPROX: while.cond.i30.i.i.preheader: +// APPROX-NEXT: br label [[WHILE_COND_I30_I_I:%.*]] +// APPROX: while.cond.i30.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ] +// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// APPROX-NEXT: br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]] +// APPROX: while.body.i34.i.i: +// APPROX-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// APPROX-NEXT: [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// APPROX-NEXT: br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] +// APPROX: if.else.i.i.i: +// APPROX-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// APPROX-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// APPROX-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] +// APPROX: if.else17.i.i.i: +// APPROX-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// APPROX-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// APPROX-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]] +// APPROX: if.end31.i.i.i: +// APPROX-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4 +// APPROX-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// APPROX-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] +// APPROX-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] +// APPROX-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 +// APPROX-NEXT: br label [[CLEANUP_I36_I_I]] +// APPROX: cleanup.i36.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]] +// APPROX: while.cond.i.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] +// APPROX-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] +// APPROX-NEXT: [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] +// APPROX: while.body.i.i.i: +// APPROX-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// APPROX-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]] +// APPROX: if.then.i.i.i: +// APPROX-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 +// APPROX-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// APPROX-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 +// APPROX-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] +// APPROX-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 +// APPROX-NEXT: br label [[CLEANUP_I_I_I]] +// APPROX: cleanup.i.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// APPROX-NEXT: [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]] +// APPROX: while.cond.i14.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] +// APPROX-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ] +// APPROX-NEXT: [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]] +// APPROX: while.body.i18.i.i: +// APPROX-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// APPROX-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]] +// APPROX: if.then.i24.i.i: +// APPROX-NEXT: [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 +// APPROX-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// APPROX-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 +// APPROX-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] +// APPROX-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 +// APPROX-NEXT: br label [[CLEANUP_I20_I_I]] +// APPROX: cleanup.i20.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] +// APPROX-NEXT: [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ] +// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]] +// APPROX: _ZL4nanfPKc.exit: +// APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// APPROX-NEXT: [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32 +// APPROX-NEXT: [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303 +// APPROX-NEXT: [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344 +// APPROX-NEXT: [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float +// APPROX-NEXT: ret float [[TMP10]] // extern "C" __device__ float test_nanf(const char *tag) { return nanf(tag); } -// CHECK-LABEL: @test_nan( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 -// CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] -// CHECK: if.then.i.i: -// CHECK-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ -// CHECK-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] -// CHECK-NEXT: i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]] -// CHECK-NEXT: ] -// CHECK: while.cond.i30.i.i.preheader: -// CHECK-NEXT: br label [[WHILE_COND_I30_I_I:%.*]] -// CHECK: while.cond.i30.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ] -// CHECK-NEXT: [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ] -// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]] -// CHECK: while.body.i34.i.i: -// CHECK-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 -// CHECK-NEXT: [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] -// CHECK: if.else.i.i.i: -// CHECK-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 -// CHECK-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 -// CHECK-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] -// CHECK: if.else17.i.i.i: -// CHECK-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 -// CHECK-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 -// CHECK-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]] -// CHECK: if.end31.i.i.i: -// CHECK-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4 -// CHECK-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 -// CHECK-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] -// CHECK-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 -// CHECK-NEXT: br label [[CLEANUP_I36_I_I]] -// CHECK: cleanup.i36.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] -// CHECK-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] -// CHECK: while.cond.i.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] -// CHECK-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] -// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] -// CHECK: while.body.i.i.i: -// CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 -// CHECK-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 -// CHECK-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]] -// CHECK: if.then.i.i.i: -// CHECK-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 -// CHECK-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 -// CHECK-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 -// CHECK-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 -// CHECK-NEXT: br label [[CLEANUP_I_I_I]] -// CHECK: cleanup.i.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] -// CHECK-NEXT: [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] -// CHECK-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]] -// CHECK: while.cond.i14.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] -// CHECK-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ] -// CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]] -// CHECK-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 -// CHECK-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]] -// CHECK: while.body.i18.i.i: -// CHECK-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 -// CHECK-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 -// CHECK-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]] -// CHECK: if.then.i24.i.i: -// CHECK-NEXT: [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 -// CHECK-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 -// CHECK-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 -// CHECK-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] -// CHECK-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 -// CHECK-NEXT: br label [[CLEANUP_I20_I_I]] -// CHECK: cleanup.i20.i.i: -// CHECK-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] -// CHECK-NEXT: [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ] -// CHECK-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]] -// CHECK: _ZL3nanPKc.exit: -// CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] -// CHECK-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 -// CHECK-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 -// CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double -// CHECK-NEXT: ret double [[TMP10]] +// DEFAULT-LABEL: @test_nan( +// DEFAULT-NEXT: entry: +// DEFAULT-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// DEFAULT-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] +// DEFAULT: if.then.i.i: +// DEFAULT-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 +// DEFAULT-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ +// DEFAULT-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] +// DEFAULT-NEXT: i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]] +// DEFAULT-NEXT: ] +// DEFAULT: while.cond.i30.i.i.preheader: +// DEFAULT-NEXT: br label [[WHILE_COND_I30_I_I:%.*]] +// DEFAULT: while.cond.i30.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ] +// DEFAULT-NEXT: [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// DEFAULT-NEXT: br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]] +// DEFAULT: while.body.i34.i.i: +// DEFAULT-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// DEFAULT-NEXT: [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// DEFAULT-NEXT: br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] +// DEFAULT: if.else.i.i.i: +// DEFAULT-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// DEFAULT-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// DEFAULT-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] +// DEFAULT: if.else17.i.i.i: +// DEFAULT-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// DEFAULT-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// DEFAULT-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]] +// DEFAULT: if.end31.i.i.i: +// DEFAULT-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4 +// DEFAULT-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// DEFAULT-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] +// DEFAULT-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] +// DEFAULT-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 +// DEFAULT-NEXT: br label [[CLEANUP_I36_I_I]] +// DEFAULT: cleanup.i36.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] +// DEFAULT-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] +// DEFAULT: while.cond.i.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] +// DEFAULT-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] +// DEFAULT-NEXT: [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// DEFAULT-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] +// DEFAULT: while.body.i.i.i: +// DEFAULT-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// DEFAULT-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]] +// DEFAULT: if.then.i.i.i: +// DEFAULT-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 +// DEFAULT-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// DEFAULT-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 +// DEFAULT-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] +// DEFAULT-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 +// DEFAULT-NEXT: br label [[CLEANUP_I_I_I]] +// DEFAULT: cleanup.i.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// DEFAULT-NEXT: [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// DEFAULT-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]] +// DEFAULT: while.cond.i14.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] +// DEFAULT-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ] +// DEFAULT-NEXT: [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]] +// DEFAULT-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// DEFAULT-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]] +// DEFAULT: while.body.i18.i.i: +// DEFAULT-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// DEFAULT-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]] +// DEFAULT: if.then.i24.i.i: +// DEFAULT-NEXT: [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 +// DEFAULT-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// DEFAULT-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 +// DEFAULT-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] +// DEFAULT-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 +// DEFAULT-NEXT: br label [[CLEANUP_I20_I_I]] +// DEFAULT: cleanup.i20.i.i: +// DEFAULT-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] +// DEFAULT-NEXT: [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ] +// DEFAULT-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]] +// DEFAULT: _ZL3nanPKc.exit: +// DEFAULT-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// DEFAULT-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 +// DEFAULT-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 +// DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double +// DEFAULT-NEXT: ret double [[TMP10]] +// +// FINITEONLY-LABEL: @test_nan( +// FINITEONLY-NEXT: entry: +// FINITEONLY-NEXT: ret double poison +// +// APPROX-LABEL: @test_nan( +// APPROX-NEXT: entry: +// APPROX-NEXT: [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48 +// APPROX-NEXT: br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]] +// APPROX: if.then.i.i: +// APPROX-NEXT: [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds i8, ptr [[TAG]], i64 1 +// APPROX-NEXT: [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [ +// APPROX-NEXT: i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]] +// APPROX-NEXT: i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]] +// APPROX-NEXT: ] +// APPROX: while.cond.i30.i.i.preheader: +// APPROX-NEXT: br label [[WHILE_COND_I30_I_I:%.*]] +// APPROX: while.cond.i30.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ] +// APPROX-NEXT: [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ] +// APPROX-NEXT: [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0 +// APPROX-NEXT: br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]] +// APPROX: while.body.i34.i.i: +// APPROX-NEXT: [[TMP3:%.*]] = add i8 [[TMP2]], -48 +// APPROX-NEXT: [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10 +// APPROX-NEXT: br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]] +// APPROX: if.else.i.i.i: +// APPROX-NEXT: [[TMP4:%.*]] = add i8 [[TMP2]], -97 +// APPROX-NEXT: [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6 +// APPROX-NEXT: br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]] +// APPROX: if.else17.i.i.i: +// APPROX-NEXT: [[TMP5:%.*]] = add i8 [[TMP2]], -65 +// APPROX-NEXT: [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6 +// APPROX-NEXT: br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]] +// APPROX: if.end31.i.i.i: +// APPROX-NEXT: [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4 +// APPROX-NEXT: [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64 +// APPROX-NEXT: [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]] +// APPROX-NEXT: [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]] +// APPROX-NEXT: [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1 +// APPROX-NEXT: br label [[CLEANUP_I36_I_I]] +// APPROX: cleanup.i36.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ] +// APPROX-NEXT: br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]] +// APPROX: while.cond.i.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ] +// APPROX-NEXT: [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ] +// APPROX-NEXT: [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0 +// APPROX-NEXT: br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]] +// APPROX: while.body.i.i.i: +// APPROX-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -8 +// APPROX-NEXT: [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48 +// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]] +// APPROX: if.then.i.i.i: +// APPROX-NEXT: [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3 +// APPROX-NEXT: [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64 +// APPROX-NEXT: [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48 +// APPROX-NEXT: [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]] +// APPROX-NEXT: [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1 +// APPROX-NEXT: br label [[CLEANUP_I_I_I]] +// APPROX: cleanup.i.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// APPROX-NEXT: [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ] +// APPROX-NEXT: br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]] +// APPROX: while.cond.i14.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ] +// APPROX-NEXT: [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ] +// APPROX-NEXT: [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]] +// APPROX-NEXT: [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0 +// APPROX-NEXT: br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]] +// APPROX: while.body.i18.i.i: +// APPROX-NEXT: [[TMP9:%.*]] = add i8 [[TMP8]], -48 +// APPROX-NEXT: [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10 +// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]] +// APPROX: if.then.i24.i.i: +// APPROX-NEXT: [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10 +// APPROX-NEXT: [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64 +// APPROX-NEXT: [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48 +// APPROX-NEXT: [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]] +// APPROX-NEXT: [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1 +// APPROX-NEXT: br label [[CLEANUP_I20_I_I]] +// APPROX: cleanup.i20.i.i: +// APPROX-NEXT: [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ] +// APPROX-NEXT: [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ] +// APPROX-NEXT: br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]] +// APPROX: _ZL3nanPKc.exit: +// APPROX-NEXT: [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ] +// APPROX-NEXT: [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247 +// APPROX-NEXT: [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560 +// APPROX-NEXT: [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double +// APPROX-NEXT: ret double [[TMP10]] // extern "C" __device__ double test_nan(const char *tag) { return nan(tag); diff --git a/clang/test/SemaTemplate/default-arguments.cpp b/clang/test/SemaTemplate/default-arguments.cpp index d5d9687cc90f49..3b1fbda414c12b 100644 --- a/clang/test/SemaTemplate/default-arguments.cpp +++ b/clang/test/SemaTemplate/default-arguments.cpp @@ -229,3 +229,55 @@ namespace unevaluated { template int f(int = a); // expected-warning 0-1{{extension}} int k = sizeof(f()); } + +#if __cplusplus >= 201103L +namespace GH68490 { + +template struct S { + template + constexpr int SizeOfU(int param = sizeof(U)) const; + + template + constexpr int SizeOfT(int param = sizeof(T)) const; +}; + +template struct S { + template + constexpr int SizeOfU(int param = sizeof(U)) const; + + template + constexpr int SizeOfT(int param = sizeof(T *)) const; +}; + +template +template +constexpr int S::SizeOfU(int param) const { + return param; +} + +template +template +constexpr int S::SizeOfT(int param) const { + return param; +} + +template <> +template +constexpr int S::SizeOfU(int param) const { + return param; +} + +template <> +template +constexpr int S::SizeOfT(int param) const { + return param; +} + +static_assert(S().SizeOfU() == sizeof(char), ""); +static_assert(S().SizeOfT() == sizeof(int), ""); +static_assert(S().SizeOfU() == sizeof(char), ""); +static_assert(S().SizeOfT() == sizeof(short *), ""); + +} // namespace GH68490 + +#endif diff --git a/clang/test/SemaTemplate/default-parm-init.cpp b/clang/test/SemaTemplate/default-parm-init.cpp deleted file mode 100644 index 73ba8998df6a98..00000000000000 --- a/clang/test/SemaTemplate/default-parm-init.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -std=c++17 -verify %s -// RUN: %clang_cc1 -fsyntax-only -std=c++20 -verify %s -// expected-no-diagnostics - -namespace std { - -template class function; - -template class invoker_base { -public: - virtual ~invoker_base() { } - virtual R invoke(Args...) = 0; - virtual invoker_base* clone() = 0; -}; - -template -class functor_invoker : public invoker_base { -public: - explicit functor_invoker(const F& f) : f(f) { } - R invoke(Args... args) { return f(args...); } - functor_invoker* clone() { return new functor_invoker(f); } - -private: - F f; -}; - -template -class function { -public: - typedef R result_type; - function() : invoker (0) { } - function(const function& other) : invoker(0) { - if (other.invoker) - invoker = other.invoker->clone(); - } - - template function(const F& f) : invoker(0) { - invoker = new functor_invoker(f); - } - - ~function() { - if (invoker) - delete invoker; - } - - function& operator=(const function& other) { - function(other).swap(*this); - return *this; - } - - template - function& operator=(const F& f) { - function(f).swap(*this); - return *this; - } - - void swap(function& other) { - invoker_base* tmp = invoker; - invoker = other.invoker; - other.invoker = tmp; - } - - result_type operator()(Args... args) const { - return invoker->invoke(args...); - } - -private: - invoker_base* invoker; -}; - -} - -template -struct Problem { - template - constexpr int FuncAlign(int param = alignof(FunctionTemplateParam)); - - template - constexpr int FuncSizeof(int param = sizeof(FunctionTemplateParam)); - - template - constexpr int FuncAlign2(int param = alignof(TemplateParam)); - - template - constexpr int FuncSizeof2(int param = sizeof(TemplateParam)); -}; - -template -struct Problem { - template - constexpr int FuncAlign(int param = alignof(FunctionTemplateParam)); - - template - constexpr int FuncSizeof(int param = sizeof(FunctionTemplateParam)); - - template - constexpr int FuncAlign2(int param = alignof(TemplateParam)); - - template - constexpr int FuncSizeof2(int param = sizeof(TemplateParam)); -}; - -template -template -constexpr int Problem::FuncAlign(int param) { - return 2U*param; -} - -template -template -constexpr int Problem::FuncSizeof(int param) { - return 2U*param; -} - -template -template -constexpr int Problem::FuncAlign2(int param) { - return 2U*param; -} - -template -template -constexpr int Problem::FuncSizeof2(int param) { - return 2U*param; -} - -template <> -template -constexpr int Problem::FuncAlign(int param) { - return param; -} - -template <> -template -constexpr int Problem::FuncSizeof(int param) { - return param; -} - -template <> -template -constexpr int Problem::FuncAlign2(int param) { - return param; -} - -template <> -template -constexpr int Problem::FuncSizeof2(int param) { - return param; -} - -void foo() { - Problem p = {}; - static_assert(p.FuncAlign() == alignof(char)); - static_assert(p.FuncSizeof() == sizeof(char)); - static_assert(p.FuncAlign2() == alignof(int)); - static_assert(p.FuncSizeof2() == sizeof(int)); - Problem q = {}; - static_assert(q.FuncAlign() == 2U * alignof(char)); - static_assert(q.FuncSizeof() == 2U * sizeof(char)); - static_assert(q.FuncAlign2() == 2U *alignof(short)); - static_assert(q.FuncSizeof2() == 2U * sizeof(short)); -} - -template -class A { - public: - void run( - std::function f1 = [](auto&&) {}, - std::function f2 = [](auto&&) {}); - private: - class Helper { - public: - explicit Helper(std::function f2) : f2_(f2) {} - std::function f2_; - }; -}; - -template -void A::run(std::function f1, - std::function f2) { - Helper h(f2); -} - -struct B {}; - -int main() { - A a; - a.run([&](auto& l) {}); - return 0; -} diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt index 3aca22c0b0a8d3..9273eb5ed977e4 100644 --- a/clang/tools/scan-build-py/CMakeLists.txt +++ b/clang/tools/scan-build-py/CMakeLists.txt @@ -88,7 +88,7 @@ foreach(lib ${LibScanbuild}) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib}) list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib}) install(FILES lib/libscanbuild/${lib} - DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild + DESTINATION lib/libscanbuild COMPONENT scan-build-py) endforeach() @@ -106,7 +106,7 @@ foreach(resource ${LibScanbuildResources}) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource}) list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource}) install(FILES lib/libscanbuild/resources/${resource} - DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libscanbuild/resources + DESTINATION lib/libscanbuild/resources COMPONENT scan-build-py) endforeach() @@ -122,7 +122,7 @@ foreach(lib ${LibEar}) DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib}) list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib}) install(FILES lib/libear/${lib} - DESTINATION lib${CLANG_LIBDIR_SUFFIX}/libear + DESTINATION lib/libear COMPONENT scan-build-py) endforeach() diff --git a/clang/tools/scan-view/share/startfile.py b/clang/tools/scan-view/share/startfile.py index d63e69280e90dd..c72475e8b6212e 100644 --- a/clang/tools/scan-view/share/startfile.py +++ b/clang/tools/scan-view/share/startfile.py @@ -48,7 +48,7 @@ def _invoke(self, cmdline): or sys.platform[:3] == "win" or sys.platform == "darwin" ): - inout = file(os.devnull, "r+") + inout = subprocess.DEVNULL else: # for TTY programs, we need stdin/out inout = None diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp index 45840f5188cdcd..0f2476bd8b0612 100644 --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -20,6 +20,7 @@ #include "clang/Lex/PreprocessorOptions.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" #include "gtest/gtest.h" #include @@ -453,6 +454,65 @@ TEST_F(SourceManagerTest, loadedSLocEntryIsInTheSameTranslationUnit) { #if defined(LLVM_ON_UNIX) +// A single SourceManager instance is sometimes reused across multiple +// compilations. This test makes sure we're resetting caches built for tracking +// include locations that are based on FileIDs, to make sure we don't report +// wrong include locations when FileIDs coincide between two different runs. +TEST_F(SourceManagerTest, ResetsIncludeLocMap) { + auto ParseFile = [&] { + TrivialModuleLoader ModLoader; + HeaderSearch HeaderInfo(std::make_shared(), SourceMgr, + Diags, LangOpts, &*Target); + Preprocessor PP(std::make_shared(), Diags, LangOpts, + SourceMgr, HeaderInfo, ModLoader, + /*IILookup =*/nullptr, + /*OwnsHeaderSearch =*/false); + PP.Initialize(*Target); + PP.EnterMainSourceFile(); + PP.LexTokensUntilEOF(); + EXPECT_FALSE(Diags.hasErrorOccurred()); + }; + + auto Buf = llvm::MemoryBuffer::getMemBuffer(""); + FileEntryRef HeaderFile = + FileMgr.getVirtualFileRef("/foo.h", Buf->getBufferSize(), 0); + SourceMgr.overrideFileContents(HeaderFile, std::move(Buf)); + + Buf = llvm::MemoryBuffer::getMemBuffer(R"cpp(#include "/foo.h")cpp"); + FileEntryRef BarFile = + FileMgr.getVirtualFileRef("/bar.h", Buf->getBufferSize(), 0); + SourceMgr.overrideFileContents(BarFile, std::move(Buf)); + SourceMgr.createFileID(BarFile, {}, clang::SrcMgr::C_User); + + Buf = llvm::MemoryBuffer::getMemBuffer(R"cpp(#include "/foo.h")cpp"); + FileID MFID = SourceMgr.createFileID(std::move(Buf)); + SourceMgr.setMainFileID(MFID); + + ParseFile(); + auto FooFID = SourceMgr.getOrCreateFileID(HeaderFile, clang::SrcMgr::C_User); + auto IncFID = SourceMgr.getDecomposedIncludedLoc(FooFID).first; + EXPECT_EQ(IncFID, MFID); + + // Clean up source-manager state before we start next parse. + SourceMgr.clearIDTables(); + + // Set up a new main file. + Buf = llvm::MemoryBuffer::getMemBuffer(R"cpp( + // silly comment 42 + #include "/bar.h")cpp"); + MFID = SourceMgr.createFileID(std::move(Buf)); + SourceMgr.setMainFileID(MFID); + + ParseFile(); + // Make sure foo.h got the same file-id in both runs. + EXPECT_EQ(FooFID, + SourceMgr.getOrCreateFileID(HeaderFile, clang::SrcMgr::C_User)); + auto BarFID = SourceMgr.getOrCreateFileID(BarFile, clang::SrcMgr::C_User); + IncFID = SourceMgr.getDecomposedIncludedLoc(FooFID).first; + // Check that includer is bar.h during this run. + EXPECT_EQ(IncFID, BarFID); +} + TEST_F(SourceManagerTest, getMacroArgExpandedLocation) { const char *header = "#define FM(x,y) x\n"; diff --git a/clang/utils/creduce-clang-crash.py b/clang/utils/creduce-clang-crash.py index db4a3435a3aef7..180dfbeab224e9 100755 --- a/clang/utils/creduce-clang-crash.py +++ b/clang/utils/creduce-clang-crash.py @@ -8,7 +8,6 @@ *.test.sh -- interestingness test for C-Reduce """ -from __future__ import print_function from argparse import ArgumentParser, RawTextHelpFormatter import os import re @@ -228,8 +227,7 @@ def check_interestingness(self): testfile = os.path.abspath(self.testfile) # Check that the test considers the original file interesting - with open(os.devnull, "w") as devnull: - returncode = subprocess.call(testfile, stdout=devnull) + returncode = subprocess.call(testfile, stdout=subprocess.DEVNULL) if returncode: sys.exit("The interestingness test does not pass for the original file.") diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 13adbd6c4d57d9..2c3b0fa84a4782 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -868,10 +868,12 @@ else () endif() endif() endif() - check_c_source_compiles("_Float16 foo(_Float16 x) { return x; }" + check_c_source_compiles("_Float16 foo(_Float16 x) { return x; } + int main(void) { return 0; }" COMPILER_RT_HAS_${arch}_FLOAT16) append_list_if(COMPILER_RT_HAS_${arch}_FLOAT16 -DCOMPILER_RT_HAS_FLOAT16 BUILTIN_CFLAGS_${arch}) - check_c_source_compiles("__bf16 foo(__bf16 x) { return x; }" + check_c_source_compiles("__bf16 foo(__bf16 x) { return x; } + int main(void) { return 0; }" COMPILER_RT_HAS_${arch}_BFLOAT16) # Build BF16 files only when "__bf16" is available. if(COMPILER_RT_HAS_${arch}_BFLOAT16) diff --git a/compiler-rt/lib/rtsan/rtsan.cpp b/compiler-rt/lib/rtsan/rtsan.cpp index 8a7ff03c611c65..b2c4616b5fd0dc 100644 --- a/compiler-rt/lib/rtsan/rtsan.cpp +++ b/compiler-rt/lib/rtsan/rtsan.cpp @@ -58,11 +58,11 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_exit() { __rtsan::GetContextForThisThread().RealtimePop(); } -SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_off() { +SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_disable() { __rtsan::GetContextForThisThread().BypassPush(); } -SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_on() { +SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_enable() { __rtsan::GetContextForThisThread().BypassPop(); } diff --git a/compiler-rt/lib/rtsan/rtsan.h b/compiler-rt/lib/rtsan/rtsan.h index 3d665c98aed184..ae23609f97d2dc 100644 --- a/compiler-rt/lib/rtsan/rtsan.h +++ b/compiler-rt/lib/rtsan/rtsan.h @@ -38,11 +38,11 @@ SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_realtime_exit(); // Disable all RTSan error reporting. // Injected into the code if "nosanitize(realtime)" is on a function. -SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_off(); +SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_disable(); // Re-enable all RTSan error reporting. -// The counterpart to `__rtsan_off`. -SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_on(); +// The counterpart to `__rtsan_disable`. +SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_enable(); SANITIZER_INTERFACE_ATTRIBUTE void __rtsan_expect_not_realtime(const char *intercepted_function_name); diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp index 6e7ab016a4c6b2..5a86957170dcec 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp @@ -204,10 +204,10 @@ TEST(TestRtsan, ThrowingAnExceptionDiesWhenRealtime) { TEST(TestRtsan, DoesNotDieIfTurnedOff) { std::mutex mutex; auto RealtimeUnsafeFunc = [&]() { - __rtsan_off(); + __rtsan_disable(); mutex.lock(); mutex.unlock(); - __rtsan_on(); + __rtsan_enable(); }; RealtimeInvoke(RealtimeUnsafeFunc); } diff --git a/compiler-rt/test/nsan/vec_sqrt.cpp b/compiler-rt/test/nsan/vec_sqrt.cpp index d1ef0487858506..64a7130322873c 100644 --- a/compiler-rt/test/nsan/vec_sqrt.cpp +++ b/compiler-rt/test/nsan/vec_sqrt.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx_nsan -O0 -g -mavx %s -o %t -// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s +// RUN: env NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s // RUN: %clangxx_nsan -O3 -g -mavx %s -o %t -// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s +// RUN: env NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s #include #include @@ -31,4 +31,4 @@ int main() { // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected } return 0; -} \ No newline at end of file +} diff --git a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c index fb15e0143d3653..b601d90cfcc927 100644 --- a/cross-project-tests/intrinsic-header-tests/wasm_simd128.c +++ b/cross-project-tests/intrinsic-header-tests/wasm_simd128.c @@ -2,7 +2,7 @@ // expected-no-diagnostics // RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \ -// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s +// RUN: -msimd128 -mrelaxed-simd -mfp16 -Wcast-qual -Werror | FileCheck %s #include @@ -1385,3 +1385,139 @@ v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) { v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) { return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c); } + +// CHECK-LABEL: test_f16x8_splat: +// CHECK: f16x8.splat{{$}} +v128_t test_f16x8_splat(float a) { return wasm_f16x8_splat(a); } + +// CHECK-LABEL: test_f16x8_extract_lane: +// CHECK: f16x8.extract_lane 7{{$}} +int16_t test_f16x8_extract_lane(v128_t a) { + return wasm_f16x8_extract_lane(a, 7); +} + +// CHECK-LABEL: test_f16x8_replace_lane: +// CHECK: f16x8.replace_lane 7{{$}} +v128_t test_f16x8_replace_lane(v128_t a, float b) { + return wasm_f16x8_replace_lane(a, 7, b); +} + +// CHECK-LABEL: test_f16x8_abs: +// CHECK: f16x8.abs{{$}} +v128_t test_f16x8_abs(v128_t a) { return wasm_f16x8_abs(a); } + +// CHECK-LABEL: test_f16x8_neg: +// CHECK: f16x8.neg{{$}} +v128_t test_f16x8_neg(v128_t a) { return wasm_f16x8_neg(a); } + +// CHECK-LABEL: test_f16x8_sqrt: +// CHECK: f16x8.sqrt{{$}} +v128_t test_f16x8_sqrt(v128_t a) { return wasm_f16x8_sqrt(a); } + +// CHECK-LABEL: test_f16x8_ceil: +// CHECK: f16x8.ceil{{$}} +v128_t test_f16x8_ceil(v128_t a) { return wasm_f16x8_ceil(a); } + +// CHECK-LABEL: test_f16x8_floor: +// CHECK: f16x8.floor{{$}} +v128_t test_f16x8_floor(v128_t a) { return wasm_f16x8_floor(a); } + +// CHECK-LABEL: test_f16x8_trunc: +// CHECK: f16x8.trunc{{$}} +v128_t test_f16x8_trunc(v128_t a) { return wasm_f16x8_trunc(a); } + +// CHECK-LABEL: test_f16x8_nearest: +// CHECK: f16x8.nearest{{$}} +v128_t test_f16x8_nearest(v128_t a) { return wasm_f16x8_nearest(a); } + +// CHECK-LABEL: test_f16x8_add: +// CHECK: f16x8.add{{$}} +v128_t test_f16x8_add(v128_t a, v128_t b) { return wasm_f16x8_add(a, b); } + +// CHECK-LABEL: test_f16x8_sub: +// CHECK: f16x8.sub{{$}} +v128_t test_f16x8_sub(v128_t a, v128_t b) { return wasm_f16x8_sub(a, b); } + +// CHECK-LABEL: test_f16x8_mul: +// CHECK: f16x8.mul{{$}} +v128_t test_f16x8_mul(v128_t a, v128_t b) { return wasm_f16x8_mul(a, b); } + +// CHECK-LABEL: test_f16x8_div: +// CHECK: f16x8.div{{$}} +v128_t test_f16x8_div(v128_t a, v128_t b) { return wasm_f16x8_div(a, b); } + +// CHECK-LABEL: test_f16x8_min: +// CHECK: f16x8.min{{$}} +v128_t test_f16x8_min(v128_t a, v128_t b) { return wasm_f16x8_min(a, b); } + +// CHECK-LABEL: test_f16x8_max: +// CHECK: f16x8.max{{$}} +v128_t test_f16x8_max(v128_t a, v128_t b) { return wasm_f16x8_max(a, b); } + +// CHECK-LABEL: test_f16x8_pmin: +// CHECK: f16x8.pmin{{$}} +v128_t test_f16x8_pmin(v128_t a, v128_t b) { return wasm_f16x8_pmin(a, b); } + +// CHECK-LABEL: test_f16x8_pmax: +// CHECK: f16x8.pmax{{$}} +v128_t test_f16x8_pmax(v128_t a, v128_t b) { return wasm_f16x8_pmax(a, b); } + +// CHECK-LABEL: test_f16x8_eq: +// CHECK: f16x8.eq{{$}} +v128_t test_f16x8_eq(v128_t a, v128_t b) { return wasm_f16x8_eq(a, b); } + +// CHECK-LABEL: test_f16x8_ne: +// CHECK: f16x8.ne{{$}} +v128_t test_f16x8_ne(v128_t a, v128_t b) { return wasm_f16x8_ne(a, b); } + +// CHECK-LABEL: test_f16x8_lt: +// CHECK: f16x8.lt{{$}} +v128_t test_f16x8_lt(v128_t a, v128_t b) { return wasm_f16x8_lt(a, b); } + +// CHECK-LABEL: test_f16x8_gt: +// CHECK: f16x8.gt{{$}} +v128_t test_f16x8_gt(v128_t a, v128_t b) { return wasm_f16x8_gt(a, b); } + +// CHECK-LABEL: test_f16x8_le: +// CHECK: f16x8.le{{$}} +v128_t test_f16x8_le(v128_t a, v128_t b) { return wasm_f16x8_le(a, b); } + +// CHECK-LABEL: test_f16x8_ge: +// CHECK: f16x8.ge{{$}} +v128_t test_f16x8_ge(v128_t a, v128_t b) { return wasm_f16x8_ge(a, b); } + +// CHECK-LABEL: test_i16x8_trunc_sat_f16x8: +// CHECK: i16x8.trunc_sat_f16x8_s{{$}} +v128_t test_i16x8_trunc_sat_f16x8(v128_t a) { + return wasm_i16x8_trunc_sat_f16x8(a); +} + +// CHECK-LABEL: test_u16x8_trunc_sat_f16x8: +// CHECK: i16x8.trunc_sat_f16x8_u{{$}} +v128_t test_u16x8_trunc_sat_f16x8(v128_t a) { + return wasm_u16x8_trunc_sat_f16x8(a); +} + +// CHECK-LABEL: test_f16x8_convert_i16x8: +// CHECK: f16x8.convert_i16x8_s{{$}} +v128_t test_f16x8_convert_i16x8(v128_t a) { + return wasm_f16x8_convert_i16x8(a); +} + +// CHECK-LABEL: test_f16x8_convert_u16x8: +// CHECK: f16x8.convert_i16x8_u{{$}} +v128_t test_f16x8_convert_u16x8(v128_t a) { + return wasm_f16x8_convert_u16x8(a); +} + +// CHECK-LABEL: test_f16x8_relaxed_madd: +// CHECK: f16x8.relaxed_madd{{$}} +v128_t test_f16x8_relaxed_madd(v128_t a, v128_t b, v128_t c) { + return wasm_f16x8_relaxed_madd(a, b, c); +} + +// CHECK-LABEL: test_f16x8_relaxed_nmadd: +// CHECK: f16x8.relaxed_nmadd{{$}} +v128_t test_f16x8_relaxed_nmadd(v128_t a, v128_t b, v128_t c) { + return wasm_f16x8_relaxed_nmadd(a, b, c); +} diff --git a/flang/lib/Evaluate/intrinsics-library.cpp b/flang/lib/Evaluate/intrinsics-library.cpp index 65636b9956e780..ed28d8130808fa 100644 --- a/flang/lib/Evaluate/intrinsics-library.cpp +++ b/flang/lib/Evaluate/intrinsics-library.cpp @@ -255,6 +255,25 @@ struct HostRuntimeLibrary { static constexpr HostRuntimeMap map{table}; static_assert(map.Verify(), "map must be sorted"); }; + +// Helpers to map complex std::pow whose resolution in F2{std::pow} is +// ambiguous as of clang++ 20. +template +static std::complex StdPowF2( + const std::complex &x, const std::complex &y) { + return std::pow(x, y); +} +template +static std::complex StdPowF2A( + const HostT &x, const std::complex &y) { + return std::pow(x, y); +} +template +static std::complex StdPowF2B( + const std::complex &x, const HostT &y) { + return std::pow(x, y); +} + template struct HostRuntimeLibrary, LibraryVersion::Libm> { using F = FuncPointer, const std::complex &>; @@ -275,9 +294,9 @@ struct HostRuntimeLibrary, LibraryVersion::Libm> { FolderFactory::Create("cosh"), FolderFactory::Create("exp"), FolderFactory::Create("log"), - FolderFactory::Create("pow"), - FolderFactory::Create("pow"), - FolderFactory::Create("pow"), + FolderFactory::Create("pow"), + FolderFactory::Create("pow"), + FolderFactory::Create("pow"), FolderFactory::Create("sin"), FolderFactory::Create("sinh"), FolderFactory::Create("sqrt"), diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 90943fa92493ce..e5ccf659c3f8ed 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2349,8 +2349,11 @@ class FirConverter : public Fortran::lower::AbstractConverter { fir::IfOp topIfOp, currentIfOp; for (Fortran::lower::pft::Evaluation &e : eval.getNestedEvaluations()) { auto genIfOp = [&](mlir::Value cond) { - auto ifOp = - builder->create(toLocation(), cond, /*withElse=*/true); + Fortran::lower::pft::Evaluation &succ = *e.controlSuccessor; + bool hasElse = succ.isA() || + succ.isA(); + auto ifOp = builder->create(toLocation(), cond, + /*withElseRegion=*/hasElse); builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); return ifOp; }; diff --git a/flang/test/HLFIR/assumed_shape_with_value_keyword.f90 b/flang/test/HLFIR/assumed_shape_with_value_keyword.f90 index 197efc08422c6e..208f22badda28d 100644 --- a/flang/test/HLFIR/assumed_shape_with_value_keyword.f90 +++ b/flang/test/HLFIR/assumed_shape_with_value_keyword.f90 @@ -102,7 +102,6 @@ subroutine test_optional1(x) ! CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box>) -> !fir.ref> ! CHECK: fir.call @_QPinternal_call7(%[[VAL_3]]) fastmath : (!fir.ref>) -> () ! CHECK: hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_0]]#0 : (!fir.ref>>>, i1, !fir.box>) -> () -! CHECK: } else { ! CHECK: } ! CHECK: return ! CHECK: } @@ -122,7 +121,6 @@ subroutine test_optional2(x) ! CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]]#0 : (!fir.box>) -> !fir.ref> ! CHECK: fir.call @_QPinternal_call8(%[[VAL_3]]) fastmath : (!fir.ref>) -> () ! CHECK: hlfir.copy_out %[[TMP_BOX]], %[[VAL_2]]#1 to %[[VAL_0]]#0 : (!fir.ref>>>, i1, !fir.box>) -> () -! CHECK: } else { ! CHECK: } ! CHECK: return ! CHECK: } diff --git a/flang/test/Lower/HLFIR/select-rank.f90 b/flang/test/Lower/HLFIR/select-rank.f90 index 211b7565bab8a3..d27a6d732ffc71 100644 --- a/flang/test/Lower/HLFIR/select-rank.f90 +++ b/flang/test/Lower/HLFIR/select-rank.f90 @@ -796,7 +796,6 @@ subroutine test_branching(x) ! CHECK: %[[VAL_10:.*]] = arith.xori %[[VAL_8]], %[[VAL_9]] : i1 ! CHECK: fir.if %[[VAL_10]] { ! CHECK: fir.call @_QPone() fastmath : () -> () -! CHECK: } else { ! CHECK: } ! CHECK: fir.call @_QPrdefault(%[[VAL_6]]#0) fastmath : (!fir.box>) -> () ! CHECK: cf.br ^bb7 diff --git a/flang/test/Lower/Intrinsics/system_clock.f90 b/flang/test/Lower/Intrinsics/system_clock.f90 index ca36920c04eb3b..9eae3a58884faf 100644 --- a/flang/test/Lower/Intrinsics/system_clock.f90 +++ b/flang/test/Lower/Intrinsics/system_clock.f90 @@ -104,7 +104,6 @@ subroutine ss(count) ! CHECK: fir.if %[[V_17]] { ! CHECK: %[[C_0:c[0-9a-z_]+]] = arith.constant 0 : i64 ! CHECK: fir.store %[[C_0]] to %arg0 : !fir.ref - ! CHECK: } else { ! CHECK: } ! CHECK: %[[V_18:[0-9]+]] = fir.zero_bits !fir.ptr ! CHECK: fir.store %[[V_18]] to %[[V_4]] : !fir.ref> @@ -137,7 +136,6 @@ subroutine ss(count) ! CHECK: %[[V_32]] = fir.load %arg0 : !fir.ref ! CHECK: %[[V_33]] = fir.call @_FortranAioOutputInteger64(%[[V_31]], %[[V_32]]) {{.*}}: (!fir.ref, i64) -> i1 ! CHECK: %[[V_34]] = fir.call @_FortranAioEndIoStatement(%[[V_31]]) {{.*}}: (!fir.ref) -> i32 - ! CHECK: } else { ! CHECK: } ! CHECK: return ! CHECK: } diff --git a/flang/test/Lower/OpenMP/master.f90 b/flang/test/Lower/OpenMP/master.f90 index 7db1be4f005b57..9f98ac89fb1fd9 100644 --- a/flang/test/Lower/OpenMP/master.f90 +++ b/flang/test/Lower/OpenMP/master.f90 @@ -91,7 +91,7 @@ subroutine omp_master_parallel() !CHECK: hlfir.assign %{{.*}} to %{{.*}}#0 : i32, !fir.ref beta = alpha + gama end if - !CHECK: else + !CHECK: } !CHECK: omp.terminator !$omp end master diff --git a/flang/test/Lower/OpenMP/unstructured.f90 b/flang/test/Lower/OpenMP/unstructured.f90 index 9c3527eda5bb43..bd030b918033e6 100644 --- a/flang/test/Lower/OpenMP/unstructured.f90 +++ b/flang/test/Lower/OpenMP/unstructured.f90 @@ -141,7 +141,6 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK: @_FortranAioBeginExternalListOutput ! CHECK: %[[LOAD:.*]] = fir.load %[[OMP_LOOP_J_DECL]]#0 : !fir.ref ! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) -! CHECK: } else { ! CHECK: } ! CHECK-NEXT: omp.yield ! CHECK-NEXT: } diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 index 7e4890dd00fea3..56a43abca42a76 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 @@ -118,7 +118,6 @@ ! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box>, i64) -> !fir.ref ! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref ! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref -! CHECK: } else { ! CHECK: } ! CHECK: omp.yield ! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 index 9a93c75f5bd1a8..775554fd3dcca1 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 @@ -108,7 +108,6 @@ ! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box>, i64) -> !fir.ref ! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref ! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref -! CHECK: } else { ! CHECK: } ! CHECK: omp.yield ! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 index 41fcc979cdc9d9..d16de4a867a24c 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 @@ -120,7 +120,6 @@ ! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box>, i64) -> !fir.ref ! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref ! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref -! CHECK: } else { ! CHECK: } ! CHECK: omp.yield ! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 index 50b2db9463d23d..04957c7287eae4 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 @@ -110,7 +110,6 @@ ! CHECK: %[[VAL_46:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_45]]) : (!fir.box>, i64) -> !fir.ref ! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref ! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_37]]#0 : f32, !fir.ref -! CHECK: } else { ! CHECK: } ! CHECK: omp.yield ! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index dc2acf881f482a..7bfb9274f389a3 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -190,7 +190,6 @@ subroutine wsloop_variable_sub !CHECK: %[[VAL_56:.*]] = fir.load %[[VAL_19]]#0 : !fir.ref !CHECK: %[[VAL_57:.*]] = arith.cmpi eq, %[[VAL_55]], %[[VAL_56]] : i8 !CHECK: fir.if %[[VAL_57]] { -!CHECK: } else { !CHECK: } !CHECK: omp.yield !CHECK: } diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv index 9c65ff9a536407..e5d2498473ecde 100644 --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -172,7 +172,7 @@ "`LWG3221 `__","Result of ``year_month``\ arithmetic with ``months``\ is ambiguous","2019-11 (Belfast)","|Complete|","8.0","" "`LWG3235 `__","``parse``\ manipulator without abbreviation is not callable","2019-11 (Belfast)","","","" "`LWG3246 `__","LWG3246: What are the constraints on the template parameter of `basic_format_arg`?","2019-11 (Belfast)","|Nothing To Do|","","" -"`LWG3253 `__","``basic_syncbuf::basic_syncbuf()``\ should not be explicit","2019-11 (Belfast)","","","" +"`LWG3253 `__","``basic_syncbuf::basic_syncbuf()``\ should not be explicit","2019-11 (Belfast)","|Complete|","20.0","" "`LWG3245 `__","Unnecessary restriction on ``'%p'``\ parse specifier","2019-11 (Belfast)","","","" "`LWG3244 `__","Constraints for ``Source``\ in |sect|\ [fs.path.req] insufficiently constrainty","2019-11 (Belfast)","","","" "`LWG3241 `__","``chrono-spec``\ grammar ambiguity in |sect|\ [time.format]","2019-11 (Belfast)","|Complete|","16.0","" diff --git a/libcxx/include/__chrono/leap_second.h b/libcxx/include/__chrono/leap_second.h index d79111ed8eecfc..be3ab4235da3ca 100644 --- a/libcxx/include/__chrono/leap_second.h +++ b/libcxx/include/__chrono/leap_second.h @@ -122,7 +122,7 @@ class leap_second { } // namespace chrono -# endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__chrono/parser_std_format_spec.h b/libcxx/include/__chrono/parser_std_format_spec.h index 785bbae198e464..6803d03ad882fd 100644 --- a/libcxx/include/__chrono/parser_std_format_spec.h +++ b/libcxx/include/__chrono/parser_std_format_spec.h @@ -409,7 +409,7 @@ class _LIBCPP_TEMPLATE_VIS __parser_chrono { } // namespace __format_spec -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__chrono/statically_widen.h b/libcxx/include/__chrono/statically_widen.h index a18c46f057a819..680483a59ac2c4 100644 --- a/libcxx/include/__chrono/statically_widen.h +++ b/libcxx/include/__chrono/statically_widen.h @@ -45,7 +45,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __statically_widen(const char* __s # define _LIBCPP_STATICALLY_WIDEN(_CharT, __str) ::std::__statically_widen<_CharT>(__str) # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__chrono/time_zone_link.h b/libcxx/include/__chrono/time_zone_link.h index b2d365c5fd0820..7b15f6ae39278e 100644 --- a/libcxx/include/__chrono/time_zone_link.h +++ b/libcxx/include/__chrono/time_zone_link.h @@ -68,7 +68,7 @@ operator<=>(const time_zone_link& __x, const time_zone_link& __y) noexcept { } // namespace chrono -# endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__expected/expected.h b/libcxx/include/__expected/expected.h index e47ec2f28844b6..8661d5d6e9b939 100644 --- a/libcxx/include/__expected/expected.h +++ b/libcxx/include/__expected/expected.h @@ -503,25 +503,24 @@ class expected : private __expected_base<_Tp, _Err> { private: template - using __can_convert = - _And< is_constructible<_Tp, _UfQual>, - is_constructible<_Err, _OtherErrQual>, - _If<_Not, bool>>::value, - _And< - _Not<_And, is_same<_Err, _OtherErr>>>, // use the copy constructor instead, see #92676 - _Not&>>, - _Not>>, - _Not&>>, - _Not>>, - _Not&, _Tp>>, - _Not&&, _Tp>>, - _Not&, _Tp>>, - _Not&&, _Tp>>>, - true_type>, - _Not, expected<_Up, _OtherErr>&>>, - _Not, expected<_Up, _OtherErr>>>, - _Not, const expected<_Up, _OtherErr>&>>, - _Not, const expected<_Up, _OtherErr>>> >; + using __can_convert = _And< + is_constructible<_Tp, _UfQual>, + is_constructible<_Err, _OtherErrQual>, + _If<_Not, bool>>::value, + _And< _Not<_And, is_same<_Err, _OtherErr>>>, // use the copy constructor instead, see #92676 + _Not&>>, + _Not>>, + _Not&>>, + _Not>>, + _Not&, _Tp>>, + _Not&&, _Tp>>, + _Not&, _Tp>>, + _Not&&, _Tp>>>, + true_type>, + _Not, expected<_Up, _OtherErr>&>>, + _Not, expected<_Up, _OtherErr>>>, + _Not, const expected<_Up, _OtherErr>&>>, + _Not, const expected<_Up, _OtherErr>>> >; template _LIBCPP_HIDE_FROM_ABI constexpr explicit expected( diff --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h index 8598f0a1c03957..ce9ac0c81e315a 100644 --- a/libcxx/include/__format/buffer.h +++ b/libcxx/include/__format/buffer.h @@ -646,7 +646,7 @@ class _LIBCPP_TEMPLATE_VIS __retarget_buffer { } // namespace __format -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/concepts.h b/libcxx/include/__format/concepts.h index 13380e9b91aff8..737783ed4bdeca 100644 --- a/libcxx/include/__format/concepts.h +++ b/libcxx/include/__format/concepts.h @@ -75,8 +75,8 @@ template concept __fmt_pair_like = __is_specialization_v<_Tp, pair> || (__is_specialization_v<_Tp, tuple> && tuple_size_v<_Tp> == 2); -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/container_adaptor.h b/libcxx/include/__format/container_adaptor.h index 9f49ca03bf4f50..d3be2e18956046 100644 --- a/libcxx/include/__format/container_adaptor.h +++ b/libcxx/include/__format/container_adaptor.h @@ -66,7 +66,7 @@ template _Container> struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/enable_insertable.h b/libcxx/include/__format/enable_insertable.h index 86ef94a325b192..29fe566ff06a3f 100644 --- a/libcxx/include/__format/enable_insertable.h +++ b/libcxx/include/__format/enable_insertable.h @@ -28,7 +28,7 @@ inline constexpr bool __enable_insertable = false; } // namespace __format -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/escaped_output_table.h b/libcxx/include/__format/escaped_output_table.h index f7be2dc61f21a3..bdf86cb6f99ccb 100644 --- a/libcxx/include/__format/escaped_output_table.h +++ b/libcxx/include/__format/escaped_output_table.h @@ -856,7 +856,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { // clang-format on } // namespace __escaped_output_table -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/extended_grapheme_cluster_table.h b/libcxx/include/__format/extended_grapheme_cluster_table.h index 48581d8a5dde3d..7dbc239f5f5cd6 100644 --- a/libcxx/include/__format/extended_grapheme_cluster_table.h +++ b/libcxx/include/__format/extended_grapheme_cluster_table.h @@ -1656,7 +1656,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { } // namespace __extended_grapheme_custer_property_boundary -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index aa02f81dc40e2d..d1ce055874413e 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -392,7 +392,7 @@ _LIBCPP_DEPRECATED_IN_CXX26 } } -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_arg_store.h b/libcxx/include/__format/format_arg_store.h index 23a599e9957599..00de1c30b8733b 100644 --- a/libcxx/include/__format/format_arg_store.h +++ b/libcxx/include/__format/format_arg_store.h @@ -259,7 +259,7 @@ struct _LIBCPP_TEMPLATE_VIS __format_arg_store { _Storage __storage; }; -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_args.h b/libcxx/include/__format/format_args.h index 07923570f38930..e19b4458e41a5b 100644 --- a/libcxx/include/__format/format_args.h +++ b/libcxx/include/__format/format_args.h @@ -71,7 +71,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_args { template basic_format_args(__format_arg_store<_Context, _Args...>) -> basic_format_args<_Context>; -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index 71783c55d72540..a9be17b855837d 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -212,7 +212,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_context= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_error.h b/libcxx/include/__format/format_error.h index ed40e395d6af72..35a39ee82f3daf 100644 --- a/libcxx/include/__format/format_error.h +++ b/libcxx/include/__format/format_error.h @@ -43,7 +43,7 @@ _LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI void __throw_format_error(const ch # endif } -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index d14b49aff14957..1518ab5768d243 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -360,7 +360,7 @@ _LIBCPP_HIDE_FROM_ABI inline __runtime_format_string runtime_format(wst return __fmt; } # endif -# endif //_LIBCPP_STD_VER >= 26 +# endif // _LIBCPP_STD_VER >= 26 template struct _LIBCPP_TEMPLATE_VIS basic_format_string { @@ -671,7 +671,7 @@ formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) # endif // _LIBCPP_HAS_NO_LOCALIZATION -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_parse_context.h b/libcxx/include/__format/format_parse_context.h index aefcd5497f3b9b..54c23014e7dc60 100644 --- a/libcxx/include/__format/format_parse_context.h +++ b/libcxx/include/__format/format_parse_context.h @@ -98,7 +98,7 @@ using format_parse_context = basic_format_parse_context; using wformat_parse_context = basic_format_parse_context; # endif -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_string.h b/libcxx/include/__format/format_string.h index bdf3cff7f49b18..a499afee8874a5 100644 --- a/libcxx/include/__format/format_string.h +++ b/libcxx/include/__format/format_string.h @@ -153,7 +153,7 @@ __parse_arg_id(_Iterator __begin, _Iterator __end, auto& __parse_ctx) { } // namespace __format -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/format_to_n_result.h b/libcxx/include/__format/format_to_n_result.h index 6f30546dec081c..344299e32f0ee6 100644 --- a/libcxx/include/__format/format_to_n_result.h +++ b/libcxx/include/__format/format_to_n_result.h @@ -28,7 +28,7 @@ struct _LIBCPP_TEMPLATE_VIS format_to_n_result { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(format_to_n_result); -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h index 63aa815efbe9b3..a43eba53c93701 100644 --- a/libcxx/include/__format/formatter_bool.h +++ b/libcxx/include/__format/formatter_bool.h @@ -72,8 +72,8 @@ struct _LIBCPP_TEMPLATE_VIS formatter { # if _LIBCPP_STD_VER >= 23 template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h index abfd65a4282989..a96acba08d5ca5 100644 --- a/libcxx/include/__format/formatter_char.h +++ b/libcxx/include/__format/formatter_char.h @@ -92,9 +92,9 @@ inline constexpr bool enable_nonlocking_formatter_optimization = true; template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -# endif //_LIBCPP_STD_VER >= 23 +# endif // _LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index 334755f4e8143b..fc95dd3f22bbe7 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -781,8 +781,8 @@ template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h index 2c2e7995053671..b7f46014c57231 100644 --- a/libcxx/include/__format/formatter_integer.h +++ b/libcxx/include/__format/formatter_integer.h @@ -118,8 +118,8 @@ inline constexpr bool enable_nonlocking_formatter_optimization inline constexpr bool enable_nonlocking_formatter_optimization<__uint128_t> = true; # endif -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h index eca966f8886f84..beed3ab8d93df1 100644 --- a/libcxx/include/__format/formatter_integral.h +++ b/libcxx/include/__format/formatter_integral.h @@ -436,7 +436,7 @@ __format_bool(bool __value, _FormatContext& __ctx, __format_spec::__parsed_speci } // namespace __formatter -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h index 1498f64c4aeff7..34c4c87313a450 100644 --- a/libcxx/include/__format/formatter_output.h +++ b/libcxx/include/__format/formatter_output.h @@ -326,7 +326,7 @@ _LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __pre } // namespace __formatter -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_pointer.h b/libcxx/include/__format/formatter_pointer.h index e1c062cec6ed2b..6e0fa9a1b4f196 100644 --- a/libcxx/include/__format/formatter_pointer.h +++ b/libcxx/include/__format/formatter_pointer.h @@ -72,8 +72,8 @@ template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; template <> inline constexpr bool enable_nonlocking_formatter_optimization = true; -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index dee2b3ad073a51..b29e97847f0ba1 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -167,8 +167,8 @@ inline constexpr bool enable_nonlocking_formatter_optimization inline constexpr bool enable_nonlocking_formatter_optimization> = true; # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/formatter_tuple.h b/libcxx/include/__format/formatter_tuple.h index 030097a8797dae..bb841ef11440dd 100644 --- a/libcxx/include/__format/formatter_tuple.h +++ b/libcxx/include/__format/formatter_tuple.h @@ -143,7 +143,7 @@ template <__fmt_char_type _CharT, formattable<_CharT>... _Args> struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_tuple<_CharT, tuple<_Args...>, _Args...> {}; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/indic_conjunct_break_table.h b/libcxx/include/__format/indic_conjunct_break_table.h index 44521d27498c3c..39dd45da771fc2 100644 --- a/libcxx/include/__format/indic_conjunct_break_table.h +++ b/libcxx/include/__format/indic_conjunct_break_table.h @@ -343,7 +343,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { } // namespace __indic_conjunct_break -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h index 150bdde89f3b39..28891e5d2876cd 100644 --- a/libcxx/include/__format/parser_std_format_spec.h +++ b/libcxx/include/__format/parser_std_format_spec.h @@ -1163,7 +1163,7 @@ __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __col } // namespace __format_spec -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/range_default_formatter.h b/libcxx/include/__format/range_default_formatter.h index b35223ae933291..fb21b0f8beb3a1 100644 --- a/libcxx/include/__format/range_default_formatter.h +++ b/libcxx/include/__format/range_default_formatter.h @@ -207,7 +207,7 @@ template requires(format_kind<_Rp> != range_format::disabled && formattable, _CharT>) struct _LIBCPP_TEMPLATE_VIS formatter<_Rp, _CharT> : __range_default_formatter, _Rp, _CharT> {}; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/range_formatter.h b/libcxx/include/__format/range_formatter.h index 69156307434937..def55c86ce51cd 100644 --- a/libcxx/include/__format/range_formatter.h +++ b/libcxx/include/__format/range_formatter.h @@ -257,7 +257,7 @@ struct _LIBCPP_TEMPLATE_VIS range_formatter { basic_string_view<_CharT> __closing_bracket_ = _LIBCPP_STATICALLY_WIDEN(_CharT, "]"); }; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/unicode.h b/libcxx/include/__format/unicode.h index de7d0fea1df56a..ce6d55ae346a3f 100644 --- a/libcxx/include/__format/unicode.h +++ b/libcxx/include/__format/unicode.h @@ -595,7 +595,7 @@ class __code_point_view { } // namespace __unicode -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__format/width_estimation_table.h b/libcxx/include/__format/width_estimation_table.h index 11f61dea18d696..23a08746b91031 100644 --- a/libcxx/include/__format/width_estimation_table.h +++ b/libcxx/include/__format/width_estimation_table.h @@ -263,7 +263,7 @@ inline constexpr uint32_t __table_upper_bound = 0x0003fffd; } // namespace __width_estimation_table -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__fwd/format.h b/libcxx/include/__fwd/format.h index b30c220f8a0435..815e3e1922c62d 100644 --- a/libcxx/include/__fwd/format.h +++ b/libcxx/include/__fwd/format.h @@ -31,7 +31,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_context; template struct _LIBCPP_TEMPLATE_VIS formatter; -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__memory/allocator.h b/libcxx/include/__memory/allocator.h index 0dbdc41d3c3d14..6a9eed926e05f4 100644 --- a/libcxx/include/__memory/allocator.h +++ b/libcxx/include/__memory/allocator.h @@ -47,7 +47,7 @@ class _LIBCPP_TEMPLATE_VIS allocator { typedef allocator<_Up> other; }; }; -#endif // _LIBCPP_STD_VER <= 17 +#endif // _LIBCPP_STD_VER <= 17 // This class provides a non-trivial default constructor to the class that derives from it // if the condition is satisfied. diff --git a/libcxx/include/__type_traits/is_member_pointer.h b/libcxx/include/__type_traits/is_member_pointer.h index cc125e318cf919..3e2753ac4228c2 100644 --- a/libcxx/include/__type_traits/is_member_pointer.h +++ b/libcxx/include/__type_traits/is_member_pointer.h @@ -27,7 +27,7 @@ struct _LIBCPP_TEMPLATE_VIS is_member_object_pointer : _BoolConstant<__is_member template struct _LIBCPP_TEMPLATE_VIS is_member_function_pointer : _BoolConstant<__is_member_function_pointer(_Tp)> {}; -# if _LIBCPP_STD_VER >= 17 +#if _LIBCPP_STD_VER >= 17 template inline constexpr bool is_member_pointer_v = __is_member_pointer(_Tp); @@ -36,7 +36,7 @@ inline constexpr bool is_member_object_pointer_v = __is_member_object_pointer(_T template inline constexpr bool is_member_function_pointer_v = __is_member_function_pointer(_Tp); -# endif +#endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/is_void.h b/libcxx/include/__type_traits/is_void.h index 46316b0d3a534e..562faae9fba2cd 100644 --- a/libcxx/include/__type_traits/is_void.h +++ b/libcxx/include/__type_traits/is_void.h @@ -21,10 +21,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct _LIBCPP_TEMPLATE_VIS is_void : _BoolConstant<__is_same(__remove_cv(_Tp), void)> {}; -# if _LIBCPP_STD_VER >= 17 +#if _LIBCPP_STD_VER >= 17 template inline constexpr bool is_void_v = __is_same(__remove_cv(_Tp), void); -# endif +#endif _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/array b/libcxx/include/array index 4db0cb7bd7e3b5..588664ace0162a 100644 --- a/libcxx/include/array +++ b/libcxx/include/array @@ -427,8 +427,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator>=(const array<_Tp, _Size>& __x, const template _LIBCPP_HIDE_FROM_ABI constexpr __synth_three_way_result<_Tp> operator<=>(const array<_Tp, _Size>& __x, const array<_Tp, _Size>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/cstddef b/libcxx/include/cstddef index 1a4049e4d34f2d..592f6261a6de3f 100644 --- a/libcxx/include/cstddef +++ b/libcxx/include/cstddef @@ -66,8 +66,8 @@ using ::max_align_t _LIBCPP_USING_IF_EXISTS; _LIBCPP_END_NAMESPACE_STD #if _LIBCPP_STD_VER >= 17 -namespace std // purposefully not versioned -{ +namespace std { // purposefully not versioned + enum class byte : unsigned char {}; _LIBCPP_HIDE_FROM_ABI inline constexpr byte operator|(byte __lhs, byte __rhs) noexcept { @@ -127,7 +127,6 @@ template ::value, int> = 0> } } // namespace std - -#endif +#endif // _LIBCPP_STD_VER >= 17 #endif // _LIBCPP_CSTDDEF diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list index b8e3d05588f96e..6c0dc5f96a5d5e 100644 --- a/libcxx/include/forward_list +++ b/libcxx/include/forward_list @@ -1517,8 +1517,7 @@ operator<=(const forward_list<_Tp, _Alloc>& __x, const forward_list<_Tp, _Alloc> template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> operator<=>(const forward_list<_Tp, _Allocator>& __x, const forward_list<_Tp, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // #if _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/iosfwd b/libcxx/include/iosfwd index 051c73995e98b4..eeafcc37c598ef 100644 --- a/libcxx/include/iosfwd +++ b/libcxx/include/iosfwd @@ -170,8 +170,8 @@ class __save_flags { _CharT __fill_; public: - __save_flags(const __save_flags&) = delete; - __save_flags& operator=(const __save_flags&) = delete; + __save_flags(const __save_flags&) = delete; + __save_flags& operator=(const __save_flags&) = delete; _LIBCPP_HIDE_FROM_ABI explicit __save_flags(__stream_type& __stream) : __stream_(__stream), __fmtflags_(__stream.flags()), __fill_(__stream.fill()) {} diff --git a/libcxx/include/list b/libcxx/include/list index 929c84de7be449..76b1d9241b41ca 100644 --- a/libcxx/include/list +++ b/libcxx/include/list @@ -466,7 +466,7 @@ public: template class __list_imp { public: - __list_imp(const __list_imp&) = delete; + __list_imp(const __list_imp&) = delete; __list_imp& operator=(const __list_imp&) = delete; typedef _Alloc allocator_type; @@ -1679,8 +1679,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator<=(const list<_Tp, _Alloc>& __x, const template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Tp> operator<=>(const list<_Tp, _Allocator>& __x, const list<_Tp, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 13d0dce34d97e3..f193b5d95f49f5 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -245,8 +245,15 @@ module std_stdexcept [system] { header "stdexcept" export * } -module std_stop_token { +module std_stop_token [system] { header "stop_token" + private header "__stop_token/atomic_unique_lock.h" + private header "__stop_token/intrusive_list_view.h" + private header "__stop_token/intrusive_shared_ptr.h" + private header "__stop_token/stop_callback.h" + private header "__stop_token/stop_source.h" + private header "__stop_token/stop_state.h" + private header "__stop_token/stop_token.h" export * } module std_streambuf [system] { @@ -1592,41 +1599,25 @@ module std_private_numeric_transform_exclusive_scan [system] { header "__numeric module std_private_numeric_transform_inclusive_scan [system] { header "__numeric/transform_inclusive_scan.h" } module std_private_numeric_transform_reduce [system] { header "__numeric/transform_reduce.h" } -module std_private_pstl_backend [system] { +module std_private_pstl [system] { header "__pstl/backend.h" - export * -} -module std_private_pstl_backend_fwd [system] { header "__pstl/backend_fwd.h" - export * -} -module std_private_pstl_backends_default [system] { header "__pstl/backends/default.h" - export * -} -module std_private_pstl_backends_libdispatch [system] { header "__pstl/backends/libdispatch.h" - export * -} -module std_private_pstl_backends_serial [system] { header "__pstl/backends/serial.h" - export * -} -module std_private_pstl_backends_std_thread [system] { header "__pstl/backends/std_thread.h" - export * + header "__pstl/cpu_algos/any_of.h" + header "__pstl/cpu_algos/cpu_traits.h" + header "__pstl/cpu_algos/fill.h" + header "__pstl/cpu_algos/find_if.h" + header "__pstl/cpu_algos/for_each.h" + header "__pstl/cpu_algos/merge.h" + header "__pstl/cpu_algos/stable_sort.h" + header "__pstl/cpu_algos/transform.h" + header "__pstl/cpu_algos/transform_reduce.h" + header "__pstl/dispatch.h" + header "__pstl/handle_exception.h" } -module std_private_pstl_cpu_algos_any_of [system] { header "__pstl/cpu_algos/any_of.h" } -module std_private_pstl_cpu_algos_cpu_traits [system] { header "__pstl/cpu_algos/cpu_traits.h" } -module std_private_pstl_cpu_algos_fill [system] { header "__pstl/cpu_algos/fill.h" } -module std_private_pstl_cpu_algos_find_if [system] { header "__pstl/cpu_algos/find_if.h" } -module std_private_pstl_cpu_algos_for_each [system] { header "__pstl/cpu_algos/for_each.h" } -module std_private_pstl_cpu_algos_merge [system] { header "__pstl/cpu_algos/merge.h" } -module std_private_pstl_cpu_algos_stable_sort [system] { header "__pstl/cpu_algos/stable_sort.h" } -module std_private_pstl_cpu_algos_transform [system] { header "__pstl/cpu_algos/transform.h" } -module std_private_pstl_cpu_algos_transform_reduce [system] { header "__pstl/cpu_algos/transform_reduce.h" } -module std_private_pstl_dispatch [system] { header "__pstl/dispatch.h" } -module std_private_pstl_handle_exception [system] { header "__pstl/handle_exception.h" } module std_private_queue_fwd [system] { header "__fwd/queue.h" } @@ -1781,23 +1772,6 @@ module std_private_span_span_fwd [system] { header "__fwd/span.h" } module std_private_stack_fwd [system] { header "__fwd/stack.h" } -module std_private_stop_token_atomic_unique_lock [system] { header "__stop_token/atomic_unique_lock.h" } -module std_private_stop_token_intrusive_list_view [system] { header "__stop_token/intrusive_list_view.h" } -module std_private_stop_token_intrusive_shared_ptr [system] { header "__stop_token/intrusive_shared_ptr.h" } -module std_private_stop_token_stop_callback [system] { header "__stop_token/stop_callback.h" } -module std_private_stop_token_stop_source [system] { - header "__stop_token/stop_source.h" - export * -} -module std_private_stop_token_stop_state [system] { - header "__stop_token/stop_state.h" - export * -} -module std_private_stop_token_stop_token [system] { - header "__stop_token/stop_token.h" - export * -} - module std_private_string_char_traits [system] { header "__string/char_traits.h" export * diff --git a/libcxx/include/set b/libcxx/include/set index 94533583798699..7e9661a0149ab9 100644 --- a/libcxx/include/set +++ b/libcxx/include/set @@ -1452,8 +1452,7 @@ operator<=(const multiset<_Key, _Compare, _Allocator>& __x, const multiset<_Key, template _LIBCPP_HIDE_FROM_ABI __synth_three_way_result<_Key> operator<=>(const multiset<_Key, _Allocator>& __x, const multiset<_Key, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), __synth_three_way); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), __synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/include/string b/libcxx/include/string index 45be4050304125..15c7a2f6b988b4 100644 --- a/libcxx/include/string +++ b/libcxx/include/string @@ -2014,11 +2014,11 @@ private: (void)__old_mid; (void)__new_mid; #if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN) - #if defined(__APPLE__) +# if defined(__APPLE__) // TODO: remove after addressing issue #96099 (https://github.com/llvm/llvm-project/issues/96099) - if(!__is_long()) + if (!__is_long()) return; - #endif +# endif std::__annotate_contiguous_container<_Allocator>(data(), data() + capacity() + 1, __old_mid, __new_mid); #endif } diff --git a/libcxx/include/syncstream b/libcxx/include/syncstream index e6f35b6f428eda..fea4c66b8e118f 100644 --- a/libcxx/include/syncstream +++ b/libcxx/include/syncstream @@ -46,7 +46,9 @@ namespace std { using streambuf_type = basic_streambuf; // [syncstream.syncbuf.cons], construction and destruction - explicit basic_syncbuf(streambuf_type* obuf = nullptr) + basic_syncbuf() + : basic_syncbuf(nullptr) {} + explicit basic_syncbuf(streambuf_type* obuf) : basic_syncbuf(obuf, Allocator()) {} basic_syncbuf(streambuf_type*, const Allocator&); basic_syncbuf(basic_syncbuf&&); @@ -253,8 +255,9 @@ public: // [syncstream.syncbuf.cons], construction and destruction - _LIBCPP_HIDE_FROM_ABI explicit basic_syncbuf(streambuf_type* __obuf = nullptr) - : basic_syncbuf(__obuf, _Allocator()) {} + _LIBCPP_HIDE_FROM_ABI basic_syncbuf() : basic_syncbuf(nullptr) {} + + _LIBCPP_HIDE_FROM_ABI explicit basic_syncbuf(streambuf_type* __obuf) : basic_syncbuf(__obuf, _Allocator()) {} _LIBCPP_HIDE_FROM_ABI basic_syncbuf(streambuf_type* __obuf, _Allocator const& __alloc) : __wrapped_(__obuf), __str_(__alloc) { diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 081b90c7bbec54..5161c2aa97c2ba 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -833,8 +833,8 @@ public: // [tuple.assign] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& - operator=(_If<_And...>::value, tuple, __nat> const& __tuple) - noexcept(_And...>::value) { + operator=(_If<_And...>::value, tuple, __nat> const& __tuple) noexcept( + _And...>::value) { std::__memberwise_copy_assign(*this, __tuple, typename __make_tuple_indices::type()); return *this; } @@ -857,8 +857,8 @@ public: # endif // _LIBCPP_STD_VER >= 23 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& - operator=(_If<_And...>::value, tuple, __nat>&& __tuple) - noexcept(_And...>::value) { + operator=(_If<_And...>::value, tuple, __nat>&& __tuple) noexcept( + _And...>::value) { std::__memberwise_forward_assign( *this, std::move(__tuple), __tuple_types<_Tp...>(), typename __make_tuple_indices::type()); return *this; @@ -868,8 +868,8 @@ public: class... _Up, __enable_if_t< _And< _BoolConstant, is_assignable<_Tp&, _Up const&>... >::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(tuple<_Up...> const& __tuple) - noexcept(_And...>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& + operator=(tuple<_Up...> const& __tuple) noexcept(_And...>::value) { std::__memberwise_copy_assign(*this, __tuple, typename __make_tuple_indices::type()); return *this; } @@ -877,8 +877,8 @@ public: template , is_assignable<_Tp&, _Up>... >::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(tuple<_Up...>&& __tuple) - noexcept(_And...>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& + operator=(tuple<_Up...>&& __tuple) noexcept(_And...>::value) { std::__memberwise_forward_assign( *this, std::move(__tuple), __tuple_types<_Up...>(), typename __make_tuple_indices::type()); return *this; @@ -942,16 +942,16 @@ public: template const&>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(pair<_Up1, _Up2> const& __pair) - noexcept(_NothrowAssignFromPair const&>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& + operator=(pair<_Up1, _Up2> const& __pair) noexcept(_NothrowAssignFromPair const&>::value) { std::get<0>(*this) = __pair.first; std::get<1>(*this) = __pair.second; return *this; } template &&>::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(pair<_Up1, _Up2>&& __pair) - noexcept(_NothrowAssignFromPair&&>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& + operator=(pair<_Up1, _Up2>&& __pair) noexcept(_NothrowAssignFromPair&&>::value) { std::get<0>(*this) = std::forward<_Up1>(__pair.first); std::get<1>(*this) = std::forward<_Up2>(__pair.second); return *this; @@ -962,8 +962,8 @@ public: class _Up, size_t _Np, __enable_if_t< _And< _BoolConstant<_Np == sizeof...(_Tp)>, is_assignable<_Tp&, _Up const&>... >::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(array<_Up, _Np> const& __array) - noexcept(_And...>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& + operator=(array<_Up, _Np> const& __array) noexcept(_And...>::value) { std::__memberwise_copy_assign(*this, __array, typename __make_tuple_indices::type()); return *this; } @@ -973,8 +973,8 @@ public: size_t _Np, class = void, __enable_if_t< _And< _BoolConstant<_Np == sizeof...(_Tp)>, is_assignable<_Tp&, _Up>... >::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& operator=(array<_Up, _Np>&& __array) - noexcept(_And...>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 tuple& + operator=(array<_Up, _Np>&& __array) noexcept(_And...>::value) { std::__memberwise_forward_assign( *this, std::move(__array), @@ -984,8 +984,8 @@ public: } // [tuple.swap] - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(tuple& __t) - noexcept(__all<__is_nothrow_swappable_v<_Tp>...>::value) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void + swap(tuple& __t) noexcept(__all<__is_nothrow_swappable_v<_Tp>...>::value) { __base_.swap(__t.__base_); } @@ -1043,8 +1043,8 @@ tuple(allocator_arg_t, _Alloc, tuple<_Tp...>) -> tuple<_Tp...>; # endif template ...>::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(tuple<_Tp...>& __t, tuple<_Tp...>& __u) - noexcept(__all<__is_nothrow_swappable_v<_Tp>...>::value) { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void +swap(tuple<_Tp...>& __t, tuple<_Tp...>& __u) noexcept(__all<__is_nothrow_swappable_v<_Tp>...>::value) { __t.swap(__u); } diff --git a/libcxx/include/vector b/libcxx/include/vector index 81aab9407714cc..0f852e7f36c29c 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -2938,8 +2938,7 @@ inline _LIBCPP_HIDE_FROM_ABI bool operator<=(const vector<_Tp, _Allocator>& __x, template _LIBCPP_HIDE_FROM_ABI constexpr __synth_three_way_result<_Tp> operator<=>(const vector<_Tp, _Allocator>& __x, const vector<_Tp, _Allocator>& __y) { - return std::lexicographical_compare_three_way( - __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + return std::lexicographical_compare_three_way(__x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); } #endif // _LIBCPP_STD_VER <= 17 diff --git a/libcxx/modules/std/format.inc b/libcxx/modules/std/format.inc index 09aa03ad73e388..8daf0de85cc412 100644 --- a/libcxx/modules/std/format.inc +++ b/libcxx/modules/std/format.inc @@ -30,7 +30,7 @@ export namespace std { #endif #if _LIBCPP_STD_VER >= 26 using std::runtime_format; -#endif //_LIBCPP_STD_VER >= 26 +#endif // _LIBCPP_STD_VER >= 26 // [format.functions], formatting functions using std::format; diff --git a/libcxx/src/include/refstring.h b/libcxx/src/include/refstring.h index 78452249f4fecf..3e0ec7a97c7bec 100644 --- a/libcxx/src/include/refstring.h +++ b/libcxx/src/include/refstring.h @@ -124,4 +124,4 @@ inline bool __libcpp_refstring::__uses_refcount() const { _LIBCPP_END_NAMESPACE_STD -#endif //_LIBCPP_REFSTRING_H +#endif // _LIBCPP_REFSTRING_H diff --git a/libcxx/test/libcxx/thread/thread.stoptoken/atomic_unique_lock.pass.cpp b/libcxx/test/libcxx/thread/thread.stoptoken/atomic_unique_lock.pass.cpp index 2a9b828f4389ce..44d51921ac74ad 100644 --- a/libcxx/test/libcxx/thread/thread.stoptoken/atomic_unique_lock.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.stoptoken/atomic_unique_lock.pass.cpp @@ -5,12 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// UNSUPPORTED: no-threads - -// XFAIL: availability-synchronization_library-missing +// UNSUPPORTED: no-threads // UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: availability-synchronization_library-missing +// ADDITIONAL_COMPILE_FLAGS: -Wno-private-header #include <__stop_token/atomic_unique_lock.h> #include diff --git a/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_list_view.pass.cpp b/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_list_view.pass.cpp index 85cd9786258955..d8cd2fb68e132e 100644 --- a/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_list_view.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_list_view.pass.cpp @@ -8,6 +8,7 @@ // // UNSUPPORTED: c++03, c++11, c++14, c++17 +// ADDITIONAL_COMPILE_FLAGS: -Wno-private-header #include <__stop_token/intrusive_list_view.h> #include diff --git a/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_shared_ptr.pass.cpp b/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_shared_ptr.pass.cpp index 47440015f2c50c..99d4226662a0b7 100644 --- a/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_shared_ptr.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.stoptoken/intrusive_shared_ptr.pass.cpp @@ -8,6 +8,7 @@ // // UNSUPPORTED: c++03, c++11, c++14, c++17 +// ADDITIONAL_COMPILE_FLAGS: -Wno-private-header #include <__stop_token/intrusive_shared_ptr.h> #include diff --git a/libcxx/test/std/input.output/syncstream/syncbuf/syncstream.syncbuf.cons/cons.default.pass.cpp b/libcxx/test/std/input.output/syncstream/syncbuf/syncstream.syncbuf.cons/cons.default.pass.cpp index aa0eb2d41e0f01..beebc36c76758e 100644 --- a/libcxx/test/std/input.output/syncstream/syncbuf/syncstream.syncbuf.cons/cons.default.pass.cpp +++ b/libcxx/test/std/input.output/syncstream/syncbuf/syncstream.syncbuf.cons/cons.default.pass.cpp @@ -25,8 +25,15 @@ #include "constexpr_char_traits.h" #include "test_allocator.h" +template +std::basic_syncbuf lwg3253_default_constructor_is_not_explicit() { + return {}; +} + template void test() { + lwg3253_default_constructor_is_not_explicit(); + { using Buf = std::basic_syncbuf; static_assert(std::default_initializable); diff --git a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp index c9c2ba20021491..cd032d48648953 100644 --- a/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp +++ b/libcxx/test/std/language.support/cmp/cmp.alg/strong_order_long_double.verify.cpp @@ -8,21 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// The following platforms have sizeof(long double) == sizeof(double), so this test doesn't apply to them. -// This test does apply to aarch64 where Arm's AAPCS64 is followed. There they are different sizes. -// XFAIL: target={{arm64|arm64e|armv(7|8)(l|m)?|powerpc|powerpc64}}-{{.+}} - -// MSVC configurations have long double equal to regular double on all -// architectures. -// XFAIL: target={{.+}}-pc-windows-msvc - -// ARM/AArch64 MinGW also has got long double equal to regular double, just -// like MSVC (thus match both MinGW and MSVC here, for those architectures). -// XFAIL: target={{aarch64|armv7}}-{{.*}}-windows-{{.+}} - -// Android's 32-bit x86 target has long double equal to regular double. -// XFAIL: target=i686-{{.+}}-android{{.*}} - // // template constexpr strong_ordering strong_order(const T& a, const T& b); @@ -37,5 +22,9 @@ void f() { long double ld = 3.14; +#ifdef TEST_LONG_DOUBLE_IS_DOUBLE + (void)ld; // expected-no-diagnostics +#else (void)std::strong_order(ld, ld); // expected-error@*:* {{std::strong_order is unimplemented for this floating-point type}} +#endif } diff --git a/libcxx/test/std/numerics/bit/bit.cast/bit_cast.pass.cpp b/libcxx/test/std/numerics/bit/bit.cast/bit_cast.pass.cpp index f73877416a7170..044589298439c1 100644 --- a/libcxx/test/std/numerics/bit/bit.cast/bit_cast.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.cast/bit_cast.pass.cpp @@ -229,7 +229,7 @@ bool tests() { test_roundtrip_through_nested_T(i); test_roundtrip_through_buffer(i); -#if __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__ +#ifdef TEST_LONG_DOUBLE_IS_DOUBLE test_roundtrip_through(i); #endif #if defined(__SIZEOF_INT128__) && __SIZEOF_LONG_DOUBLE__ == __SIZEOF_INT128__ && \ diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h index 6f7ec3aa0c1f9f..5d4c1a65cfafb2 100644 --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -511,4 +511,8 @@ inline Tp const& DoNotOptimize(Tp const& value) { # define TEST_CONSTEXPR_OPERATOR_NEW #endif +#if __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__ +# define TEST_LONG_DOUBLE_IS_DOUBLE +#endif + #endif // SUPPORT_TEST_MACROS_HPP diff --git a/libcxx/utils/generate_escaped_output_table.py b/libcxx/utils/generate_escaped_output_table.py index 9dcecaa5575cdd..41524e8fe7186c 100755 --- a/libcxx/utils/generate_escaped_output_table.py +++ b/libcxx/utils/generate_escaped_output_table.py @@ -235,7 +235,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: // clang-format on }} // namespace __escaped_output_table -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/utils/generate_extended_grapheme_cluster_table.py b/libcxx/utils/generate_extended_grapheme_cluster_table.py index 76d1e78e9239c6..558b606186130f 100755 --- a/libcxx/utils/generate_extended_grapheme_cluster_table.py +++ b/libcxx/utils/generate_extended_grapheme_cluster_table.py @@ -230,7 +230,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: {content} }} // namespace __extended_grapheme_custer_property_boundary -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/utils/generate_indic_conjunct_break_table.py b/libcxx/utils/generate_indic_conjunct_break_table.py index 762dfa73b51f7b..e41f6e9be233d7 100755 --- a/libcxx/utils/generate_indic_conjunct_break_table.py +++ b/libcxx/utils/generate_indic_conjunct_break_table.py @@ -223,7 +223,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: {content} }} // namespace __indic_conjunct_break -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/utils/generate_width_estimation_table.py b/libcxx/utils/generate_width_estimation_table.py index f4cce1071d1f15..d8c036f34e8353 100644 --- a/libcxx/utils/generate_width_estimation_table.py +++ b/libcxx/utils/generate_width_estimation_table.py @@ -261,7 +261,7 @@ def compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: {content} }} // namespace __width_estimation_table -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lld/test/ELF/avr-reloc.s b/lld/test/ELF/avr-reloc.s index ec088eaa149d01..41c32580f63a1c 100644 --- a/lld/test/ELF/avr-reloc.s +++ b/lld/test/ELF/avr-reloc.s @@ -76,32 +76,6 @@ adiw r24, b ; R_AVR_6_ADIW in r20, b ; R_AVR_PORT6 sbic b, 1 ; R_AVR_PORT5 -.section .PCREL,"ax",@progbits -; CHECK-LABEL: section .PCREL -; CHECK: rjmp .+30 -; CHECK-NEXT: rjmp .-36 -; CHECK-NEXT: breq .+26 -; CHECK-NEXT: breq .-40 -; CHECK-NEXT: rjmp .-4096 -; CHECK-NEXT: rjmp .+4094 -; CHECK-NEXT: rjmp .+4094 -; CHECK-NEXT: rjmp .-4096 -; CHECK-NEXT: breq .-128 -; CHECK-NEXT: breq .+126 -; HEX-LABEL: section .PCREL: -; HEX-NEXT: 0fc0eecf 69f061f3 -foo: -rjmp foo + 32 ; R_AVR_13_PCREL -rjmp foo - 32 ; R_AVR_13_PCREL -breq foo + 32 ; R_AVR_7_PCREL -breq foo - 32 ; R_AVR_7_PCREL -rjmp 1f - 4096 $ 1: ; R_AVR_13_PCREL -rjmp 1f + 4094 $ 1: ; R_AVR_13_PCREL -rjmp 1f - 4098 $ 1: ; R_AVR_13_PCREL (overflow) -rjmp 1f + 4096 $ 1: ; R_AVR_13_PCREL (overflow) -breq 1f - 128 $ 1: ; R_AVR_7_PCREL -breq 1f + 126 $ 1: ; R_AVR_7_PCREL - .section .LDSSTS,"ax",@progbits ; CHECK-LABEL: section .LDSSTS: ; CHECK: lds r20, 0x1e diff --git a/lldb/bindings/interface/SBErrorDocstrings.i b/lldb/bindings/interface/SBErrorDocstrings.i index b64c3d64c6c77b..c272ffb7605ffb 100644 --- a/lldb/bindings/interface/SBErrorDocstrings.i +++ b/lldb/bindings/interface/SBErrorDocstrings.i @@ -10,8 +10,10 @@ For example (from test/python_api/hello_world/TestHelloWorld.py), :: # Spawn a new process and don't display the stdout if not in TraceOn() mode. import subprocess - popen = subprocess.Popen([self.exe, 'abc', 'xyz'], - stdout = open(os.devnull, 'w') if not self.TraceOn() else None) + popen = subprocess.Popen( + [self.exe, 'abc', 'xyz'], + stdout=subprocess.DEVNULL if not self.TraceOn() else None, + ) listener = lldb.SBListener('my.attach.listener') error = lldb.SBError() diff --git a/lldb/include/lldb/Core/SourceManager.h b/lldb/include/lldb/Core/SourceManager.h index 5239ac6f4055f5..172824dc78a6bc 100644 --- a/lldb/include/lldb/Core/SourceManager.h +++ b/lldb/include/lldb/Core/SourceManager.h @@ -9,6 +9,7 @@ #ifndef LLDB_CORE_SOURCEMANAGER_H #define LLDB_CORE_SOURCEMANAGER_H +#include "lldb/Utility/Checksum.h" #include "lldb/Utility/FileSpec.h" #include "lldb/lldb-defines.h" #include "lldb/lldb-forward.h" @@ -37,8 +38,8 @@ class SourceManager { const SourceManager::File &rhs); public: - File(const FileSpec &file_spec, lldb::TargetSP target_sp); - File(const FileSpec &file_spec, lldb::DebuggerSP debugger_sp); + File(lldb::SupportFileSP support_file_sp, lldb::TargetSP target_sp); + File(lldb::SupportFileSP support_file_sp, lldb::DebuggerSP debugger_sp); bool ModificationTimeIsStale() const; bool PathRemappingIsStale() const; @@ -56,7 +57,10 @@ class SourceManager { bool LineIsValid(uint32_t line); - const FileSpec &GetFileSpec() { return m_file_spec; } + lldb::SupportFileSP GetSupportFile() const { + assert(m_support_file_sp && "SupportFileSP must always be valid"); + return m_support_file_sp; + } uint32_t GetSourceMapModificationID() const { return m_source_map_mod_id; } @@ -68,17 +72,20 @@ class SourceManager { llvm::sys::TimePoint<> GetTimestamp() const { return m_mod_time; } + const Checksum &GetChecksum() const { return m_checksum; } + protected: /// Set file and update modification time. - void SetFileSpec(FileSpec file_spec); + void SetSupportFile(lldb::SupportFileSP support_file_sp); bool CalculateLineOffsets(uint32_t line = UINT32_MAX); - FileSpec m_file_spec_orig; // The original file spec that was used (can be - // different from m_file_spec) - FileSpec m_file_spec; // The actually file spec being used (if the target - // has source mappings, this might be different from - // m_file_spec_orig) + /// The support file. If the target has source mappings, this might be + /// different from the original support file passed to the constructor. + lldb::SupportFileSP m_support_file_sp; + + /// Keep track of the on-disk checksum. + Checksum m_checksum; // Keep the modification time that this file data is valid for llvm::sys::TimePoint<> m_mod_time; @@ -93,7 +100,8 @@ class SourceManager { lldb::TargetWP m_target_wp; private: - void CommonInitializer(const FileSpec &file_spec, lldb::TargetSP target_sp); + void CommonInitializer(lldb::SupportFileSP support_file_sp, + lldb::TargetSP target_sp); }; typedef std::shared_ptr FileSP; @@ -139,14 +147,13 @@ class SourceManager { ~SourceManager(); - FileSP GetLastFile() { return GetFile(m_last_file_spec); } + FileSP GetLastFile() { return GetFile(m_last_support_file_sp); } - size_t - DisplaySourceLinesWithLineNumbers(const FileSpec &file, uint32_t line, - uint32_t column, uint32_t context_before, - uint32_t context_after, - const char *current_line_cstr, Stream *s, - const SymbolContextList *bp_locs = nullptr); + size_t DisplaySourceLinesWithLineNumbers( + lldb::SupportFileSP support_file_sp, uint32_t line, uint32_t column, + uint32_t context_before, uint32_t context_after, + const char *current_line_cstr, Stream *s, + const SymbolContextList *bp_locs = nullptr); // This variant uses the last file we visited. size_t DisplaySourceLinesWithLineNumbersUsingLastFile( @@ -157,22 +164,31 @@ class SourceManager { size_t DisplayMoreWithLineNumbers(Stream *s, uint32_t count, bool reverse, const SymbolContextList *bp_locs = nullptr); - bool SetDefaultFileAndLine(const FileSpec &file_spec, uint32_t line); + bool SetDefaultFileAndLine(lldb::SupportFileSP support_file_sp, + uint32_t line); + + struct SupportFileAndLine { + lldb::SupportFileSP support_file_sp; + uint32_t line; + SupportFileAndLine(lldb::SupportFileSP support_file_sp, uint32_t line) + : support_file_sp(support_file_sp), line(line) {} + }; - bool GetDefaultFileAndLine(FileSpec &file_spec, uint32_t &line); + std::optional GetDefaultFileAndLine(); bool DefaultFileAndLineSet() { - return (GetFile(m_last_file_spec).get() != nullptr); + return (GetFile(m_last_support_file_sp).get() != nullptr); } - void FindLinesMatchingRegex(FileSpec &file_spec, RegularExpression ®ex, - uint32_t start_line, uint32_t end_line, + void FindLinesMatchingRegex(lldb::SupportFileSP support_file_sp, + RegularExpression ®ex, uint32_t start_line, + uint32_t end_line, std::vector &match_lines); - FileSP GetFile(const FileSpec &file_spec); + FileSP GetFile(lldb::SupportFileSP support_file_sp); protected: - FileSpec m_last_file_spec; + lldb::SupportFileSP m_last_support_file_sp; uint32_t m_last_line; uint32_t m_last_count; bool m_default_set; diff --git a/lldb/include/lldb/Utility/SupportFile.h b/lldb/include/lldb/Utility/SupportFile.h index 334a0aaac2c27e..6a091bb84ada35 100644 --- a/lldb/include/lldb/Utility/SupportFile.h +++ b/lldb/include/lldb/Utility/SupportFile.h @@ -14,10 +14,10 @@ namespace lldb_private { -/// Wraps either a FileSpec that represents a local file or a source -/// file whose contents is known (for example because it can be -/// reconstructed from debug info), but that hasn't been written to a -/// file yet. This also stores an optional checksum of the on-disk content. +/// Wraps a FileSpec and an optional Checksum. The FileSpec represents either a +/// path to a file or a source file whose contents is known (for example because +/// it can be reconstructed from debug info), but that hasn't been written to a +/// file yet. class SupportFile { public: SupportFile() : m_file_spec(), m_checksum() {} diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 0e8ca159efd55d..834f01aaa61e6b 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -467,9 +467,8 @@ def should_skip_simulator_test(): if lldbplatformutil.getHostPlatform() not in ["darwin", "macosx"]: return "simulator tests are run only on darwin hosts." try: - DEVNULL = open(os.devnull, "w") output = subprocess.check_output( - ["xcodebuild", "-showsdks"], stderr=DEVNULL + ["xcodebuild", "-showsdks"], stderr=subprocess.DEVNULL ).decode("utf-8") if re.search("%ssimulator" % platform, output): return None @@ -1094,9 +1093,8 @@ def skipUnlessFeature(feature): def is_feature_enabled(): if platform.system() == "Darwin": try: - DEVNULL = open(os.devnull, "w") output = subprocess.check_output( - ["/usr/sbin/sysctl", feature], stderr=DEVNULL + ["/usr/sbin/sysctl", feature], stderr=subprocess.DEVNULL ).decode("utf-8") # If 'feature: 1' was output, then this feature is available and # the test should not be skipped. diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py index b57c3bdd87c83c..e0da7cbd1ddd6e 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbtest.py +++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py @@ -31,7 +31,6 @@ import abc from functools import wraps import gc -import glob import io import json import os.path @@ -416,7 +415,7 @@ def launch(self, executable, args, extra_env): self._proc = Popen( [executable] + args, - stdout=open(os.devnull) if not self._trace_on else None, + stdout=DEVNULL if not self._trace_on else None, stdin=PIPE, env=env, ) diff --git a/lldb/source/API/SBSourceManager.cpp b/lldb/source/API/SBSourceManager.cpp index e46f990698d826..4b96f1222bc88f 100644 --- a/lldb/source/API/SBSourceManager.cpp +++ b/lldb/source/API/SBSourceManager.cpp @@ -46,15 +46,15 @@ class SourceManagerImpl { lldb::TargetSP target_sp(m_target_wp.lock()); if (target_sp) { return target_sp->GetSourceManager().DisplaySourceLinesWithLineNumbers( - file, line, column, context_before, context_after, current_line_cstr, - s); + std::make_shared(file), line, column, context_before, + context_after, current_line_cstr, s); } else { lldb::DebuggerSP debugger_sp(m_debugger_wp.lock()); if (debugger_sp) { return debugger_sp->GetSourceManager() - .DisplaySourceLinesWithLineNumbers(file, line, column, - context_before, context_after, - current_line_cstr, s); + .DisplaySourceLinesWithLineNumbers( + std::make_shared(file), line, column, + context_before, context_after, current_line_cstr, s); } } return 0; diff --git a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp index 0509924e6300be..05fa7b93096889 100644 --- a/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverFileRegex.cpp @@ -102,7 +102,8 @@ Searcher::CallbackReturn BreakpointResolverFileRegex::SearchCallback( FileSpec cu_file_spec = cu->GetPrimaryFile(); std::vector line_matches; context.target_sp->GetSourceManager().FindLinesMatchingRegex( - cu_file_spec, m_regex, 1, UINT32_MAX, line_matches); + std::make_shared(cu_file_spec), m_regex, 1, UINT32_MAX, + line_matches); uint32_t num_matches = line_matches.size(); for (uint32_t i = 0; i < num_matches; i++) { diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index abde27b2b53ad8..ede3dd2f2a864c 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -769,20 +769,26 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { private: bool GetDefaultFile(Target &target, FileSpec &file, CommandReturnObject &result) { - uint32_t default_line; // First use the Source Manager's default file. Then use the current stack // frame's file. - if (!target.GetSourceManager().GetDefaultFileAndLine(file, default_line)) { + if (auto maybe_file_and_line = + target.GetSourceManager().GetDefaultFileAndLine()) { + file = maybe_file_and_line->support_file_sp->GetSpecOnly(); + return true; + } + StackFrame *cur_frame = m_exe_ctx.GetFramePtr(); if (cur_frame == nullptr) { result.AppendError( "No selected frame to use to find the default file."); return false; - } else if (!cur_frame->HasDebugInformation()) { + } + if (!cur_frame->HasDebugInformation()) { result.AppendError("Cannot use the selected frame to find the default " "file, it has no debug info."); return false; - } else { + } + const SymbolContext &sc = cur_frame->GetSymbolContext(eSymbolContextLineEntry); if (sc.line_entry.GetFile()) { @@ -791,8 +797,6 @@ class CommandObjectBreakpointSet : public CommandObjectParsed { result.AppendError("Can't find the file for the selected frame to " "use as the default file."); return false; - } - } } return true; } diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp index 5ddd46ac5fdc07..1fc122420388d8 100644 --- a/lldb/source/Commands/CommandObjectSource.cpp +++ b/lldb/source/Commands/CommandObjectSource.cpp @@ -777,14 +777,16 @@ class CommandObjectSourceList : public CommandObjectParsed { if (sc.function) { Target &target = GetTarget(); - FileSpec start_file; + SupportFileSP start_file = std::make_shared(); uint32_t start_line; uint32_t end_line; FileSpec end_file; if (sc.block == nullptr) { // Not an inlined function - sc.function->GetStartLineSourceInfo(start_file, start_line); + FileSpec function_file_spec; + sc.function->GetStartLineSourceInfo(function_file_spec, start_line); + start_file = std::make_shared(function_file_spec); if (start_line == 0) { result.AppendErrorWithFormat("Could not find line information for " "start of function: \"%s\".\n", @@ -794,7 +796,7 @@ class CommandObjectSourceList : public CommandObjectParsed { sc.function->GetEndLineSourceInfo(end_file, end_line); } else { // We have an inlined function - start_file = source_info.line_entry.GetFile(); + start_file = source_info.line_entry.file_sp; start_line = source_info.line_entry.line; end_line = start_line + m_options.num_lines; } @@ -825,14 +827,15 @@ class CommandObjectSourceList : public CommandObjectParsed { if (m_options.show_bp_locs) { const bool show_inlines = true; - m_breakpoint_locations.Reset(start_file, 0, show_inlines); + m_breakpoint_locations.Reset(start_file->GetSpecOnly(), 0, + show_inlines); SearchFilterForUnconstrainedSearches target_search_filter( m_exe_ctx.GetTargetSP()); target_search_filter.Search(m_breakpoint_locations); } - result.AppendMessageWithFormat("File: %s\n", - start_file.GetPath().c_str()); + result.AppendMessageWithFormat( + "File: %s\n", start_file->GetSpecOnly().GetPath().c_str()); // We don't care about the column here. const uint32_t column = 0; return target.GetSourceManager().DisplaySourceLinesWithLineNumbers( @@ -1050,8 +1053,9 @@ class CommandObjectSourceList : public CommandObjectParsed { ? sc.line_entry.column : 0; target.GetSourceManager().DisplaySourceLinesWithLineNumbers( - sc.comp_unit->GetPrimaryFile(), sc.line_entry.line, column, - lines_to_back_up, m_options.num_lines - lines_to_back_up, "->", + std::make_shared(sc.comp_unit->GetPrimaryFile()), + sc.line_entry.line, column, lines_to_back_up, + m_options.num_lines - lines_to_back_up, "->", &result.GetOutputStream(), GetBreakpointLocations()); result.SetStatus(eReturnStatusSuccessFinishResult); } @@ -1076,8 +1080,8 @@ class CommandObjectSourceList : public CommandObjectParsed { target.GetSourceManager().GetLastFile()); if (last_file_sp) { const bool show_inlines = true; - m_breakpoint_locations.Reset(last_file_sp->GetFileSpec(), 0, - show_inlines); + m_breakpoint_locations.Reset( + last_file_sp->GetSupportFile()->GetSpecOnly(), 0, show_inlines); SearchFilterForUnconstrainedSearches target_search_filter( target.shared_from_this()); target_search_filter.Search(m_breakpoint_locations); @@ -1170,9 +1174,9 @@ class CommandObjectSourceList : public CommandObjectParsed { m_options.num_lines = 10; const uint32_t column = 0; target.GetSourceManager().DisplaySourceLinesWithLineNumbers( - sc.comp_unit->GetPrimaryFile(), m_options.start_line, column, 0, - m_options.num_lines, "", &result.GetOutputStream(), - GetBreakpointLocations()); + std::make_shared(sc.comp_unit->GetPrimaryFile()), + m_options.start_line, column, 0, m_options.num_lines, "", + &result.GetOutputStream(), GetBreakpointLocations()); result.SetStatus(eReturnStatusSuccessFinishResult); } else { diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp index 9286f62058bc8d..d071e3bfe4f77d 100644 --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -517,7 +517,8 @@ void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, line_highlight = "**"; } source_manager.DisplaySourceLinesWithLineNumbers( - ln.file, ln.line, ln.column, 0, 0, line_highlight, &strm); + std::make_shared(ln.file), ln.line, ln.column, 0, 0, + line_highlight, &strm); } if (source_lines_to_display.print_source_context_end_eol) strm.EOL(); diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index d922d32f910583..3d69aedb6b13ee 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -6894,8 +6894,8 @@ class SourceFileWindowDelegate : public WindowDelegate { if (context_changed) m_selected_line = m_pc_line; - if (m_file_sp && - m_file_sp->GetFileSpec() == m_sc.line_entry.GetFile()) { + if (m_file_sp && m_file_sp->GetSupportFile()->GetSpecOnly() == + m_sc.line_entry.GetFile()) { // Same file, nothing to do, we should either have the lines or // not (source file missing) if (m_selected_line >= static_cast(m_first_visible_line)) { @@ -6910,8 +6910,8 @@ class SourceFileWindowDelegate : public WindowDelegate { } else { // File changed, set selected line to the line with the PC m_selected_line = m_pc_line; - m_file_sp = m_debugger.GetSourceManager().GetFile( - m_sc.line_entry.GetFile()); + m_file_sp = + m_debugger.GetSourceManager().GetFile(m_sc.line_entry.file_sp); if (m_file_sp) { const size_t num_lines = m_file_sp->GetNumLines(); m_line_width = 1; @@ -7001,7 +7001,8 @@ class SourceFileWindowDelegate : public WindowDelegate { LineEntry bp_loc_line_entry; if (bp_loc_sp->GetAddress().CalculateSymbolContextLineEntry( bp_loc_line_entry)) { - if (m_file_sp->GetFileSpec() == bp_loc_line_entry.GetFile()) { + if (m_file_sp->GetSupportFile()->GetSpecOnly() == + bp_loc_line_entry.GetFile()) { bp_lines.insert(bp_loc_line_entry.line); } } @@ -7332,7 +7333,7 @@ class SourceFileWindowDelegate : public WindowDelegate { if (exe_ctx.HasProcessScope() && exe_ctx.GetProcessRef().IsAlive()) { BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( nullptr, // Don't limit the breakpoint to certain modules - m_file_sp->GetFileSpec(), // Source file + m_file_sp->GetSupportFile()->GetSpecOnly(), // Source file m_selected_line + 1, // Source line number (m_selected_line is zero based) 0, // Unspecified column. @@ -7478,7 +7479,8 @@ class SourceFileWindowDelegate : public WindowDelegate { LineEntry bp_loc_line_entry; if (bp_loc_sp->GetAddress().CalculateSymbolContextLineEntry( bp_loc_line_entry)) { - if (m_file_sp->GetFileSpec() == bp_loc_line_entry.GetFile() && + if (m_file_sp->GetSupportFile()->GetSpecOnly() == + bp_loc_line_entry.GetFile() && m_selected_line + 1 == bp_loc_line_entry.line) { bool removed = exe_ctx.GetTargetRef().RemoveBreakpointByID(bp_sp->GetID()); @@ -7492,7 +7494,7 @@ class SourceFileWindowDelegate : public WindowDelegate { // No breakpoint found on the location, add it. BreakpointSP bp_sp = exe_ctx.GetTargetRef().CreateBreakpoint( nullptr, // Don't limit the breakpoint to certain modules - m_file_sp->GetFileSpec(), // Source file + m_file_sp->GetSupportFile()->GetSpecOnly(), // Source file m_selected_line + 1, // Source line number (m_selected_line is zero based) 0, // No column specified. diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index 0d70c554e5342b..f6e59ce731a573 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -63,18 +63,22 @@ static void resolve_tilde(FileSpec &file_spec) { // SourceManager constructor SourceManager::SourceManager(const TargetSP &target_sp) - : m_last_line(0), m_last_count(0), m_default_set(false), - m_target_wp(target_sp), + : m_last_support_file_sp(std::make_shared()), m_last_line(0), + m_last_count(0), m_default_set(false), m_target_wp(target_sp), m_debugger_wp(target_sp->GetDebugger().shared_from_this()) {} SourceManager::SourceManager(const DebuggerSP &debugger_sp) - : m_last_line(0), m_last_count(0), m_default_set(false), m_target_wp(), + : m_last_support_file_sp(std::make_shared()), m_last_line(0), + m_last_count(0), m_default_set(false), m_target_wp(), m_debugger_wp(debugger_sp) {} // Destructor SourceManager::~SourceManager() = default; -SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { +SourceManager::FileSP SourceManager::GetFile(SupportFileSP support_file_sp) { + assert(support_file_sp && "SupportFileSP must be valid"); + + FileSpec file_spec = support_file_sp->GetSpecOnly(); if (!file_spec) return {}; @@ -87,8 +91,8 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { LLDB_LOG(log, "Source file caching disabled: creating new source file: {0}", file_spec); if (target_sp) - return std::make_shared(file_spec, target_sp); - return std::make_shared(file_spec, debugger_sp); + return std::make_shared(support_file_sp, target_sp); + return std::make_shared(support_file_sp, debugger_sp); } ProcessSP process_sp = target_sp ? target_sp->GetProcessSP() : ProcessSP(); @@ -136,7 +140,8 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { } // Check if the file exists on disk. - if (file_sp && !FileSystem::Instance().Exists(file_sp->GetFileSpec())) { + if (file_sp && !FileSystem::Instance().Exists( + file_sp->GetSupportFile()->GetSpecOnly())) { LLDB_LOG(log, "File doesn't exist on disk: {0}", file_spec); file_sp.reset(); } @@ -148,9 +153,9 @@ SourceManager::FileSP SourceManager::GetFile(const FileSpec &file_spec) { // (Re)create the file. if (target_sp) - file_sp = std::make_shared(file_spec, target_sp); + file_sp = std::make_shared(support_file_sp, target_sp); else - file_sp = std::make_shared(file_spec, debugger_sp); + file_sp = std::make_shared(support_file_sp, debugger_sp); // Add the file to the debugger and process cache. If the file was // invalidated, this will overwrite it. @@ -230,11 +235,8 @@ size_t SourceManager::DisplaySourceLinesWithLineNumbersUsingLastFile( start_line = 1; } - if (!m_default_set) { - FileSpec tmp_spec; - uint32_t tmp_line; - GetDefaultFileAndLine(tmp_spec, tmp_line); - } + if (!m_default_set) + GetDefaultFileAndLine(); m_last_line = start_line; m_last_count = count; @@ -305,11 +307,12 @@ size_t SourceManager::DisplaySourceLinesWithLineNumbersUsingLastFile( } size_t SourceManager::DisplaySourceLinesWithLineNumbers( - const FileSpec &file_spec, uint32_t line, uint32_t column, + lldb::SupportFileSP support_file_sp, uint32_t line, uint32_t column, uint32_t context_before, uint32_t context_after, const char *current_line_cstr, Stream *s, const SymbolContextList *bp_locs) { - FileSP file_sp(GetFile(file_spec)); + assert(support_file_sp && "SupportFile must be valid"); + FileSP file_sp(GetFile(support_file_sp)); uint32_t start_line; uint32_t count = context_before + context_after + 1; @@ -322,8 +325,9 @@ size_t SourceManager::DisplaySourceLinesWithLineNumbers( if (last_file_sp.get() != file_sp.get()) { if (line == 0) m_last_line = 0; - m_last_file_spec = file_spec; + m_last_support_file_sp = support_file_sp; } + return DisplaySourceLinesWithLineNumbersUsingLastFile( start_line, count, line, column, current_line_cstr, s, bp_locs); } @@ -334,11 +338,8 @@ size_t SourceManager::DisplayMoreWithLineNumbers( // to figure it out here. FileSP last_file_sp(GetLastFile()); const bool have_default_file_line = last_file_sp && m_last_line > 0; - if (!m_default_set) { - FileSpec tmp_spec; - uint32_t tmp_line; - GetDefaultFileAndLine(tmp_spec, tmp_line); - } + if (!m_default_set) + GetDefaultFileAndLine(); if (last_file_sp) { if (m_last_line == UINT32_MAX) @@ -373,26 +374,27 @@ size_t SourceManager::DisplayMoreWithLineNumbers( return 0; } -bool SourceManager::SetDefaultFileAndLine(const FileSpec &file_spec, +bool SourceManager::SetDefaultFileAndLine(lldb::SupportFileSP support_file_sp, uint32_t line) { + assert(support_file_sp && "SupportFile must be valid"); + m_default_set = true; - FileSP file_sp(GetFile(file_spec)); - if (file_sp) { + if (FileSP file_sp = GetFile(support_file_sp)) { m_last_line = line; - m_last_file_spec = file_spec; + m_last_support_file_sp = support_file_sp; return true; - } else { - return false; } + + return false; } -bool SourceManager::GetDefaultFileAndLine(FileSpec &file_spec, uint32_t &line) { - if (FileSP last_file_sp = GetLastFile()) { - file_spec = m_last_file_spec; - line = m_last_line; - return true; - } else if (!m_default_set) { +std::optional +SourceManager::GetDefaultFileAndLine() { + if (FileSP last_file_sp = GetLastFile()) + return SupportFileAndLine(m_last_support_file_sp, m_last_line); + + if (!m_default_set) { TargetSP target_sp(m_target_wp.lock()); if (target_sp) { @@ -418,51 +420,51 @@ bool SourceManager::GetDefaultFileAndLine(FileSpec &file_spec, uint32_t &line) { if (sc.function->GetAddressRange() .GetBaseAddress() .CalculateSymbolContextLineEntry(line_entry)) { - SetDefaultFileAndLine(line_entry.GetFile(), line_entry.line); - file_spec = m_last_file_spec; - line = m_last_line; - return true; + SetDefaultFileAndLine(line_entry.file_sp, line_entry.line); + return SupportFileAndLine(line_entry.file_sp, m_last_line); } } } } } } - return false; + + return std::nullopt; } -void SourceManager::FindLinesMatchingRegex(FileSpec &file_spec, +void SourceManager::FindLinesMatchingRegex(SupportFileSP support_file_sp, RegularExpression ®ex, uint32_t start_line, uint32_t end_line, std::vector &match_lines) { match_lines.clear(); - FileSP file_sp = GetFile(file_spec); + FileSP file_sp = GetFile(support_file_sp); if (!file_sp) return; return file_sp->FindLinesMatchingRegex(regex, start_line, end_line, match_lines); } -SourceManager::File::File(const FileSpec &file_spec, +SourceManager::File::File(SupportFileSP support_file_sp, lldb::DebuggerSP debugger_sp) - : m_file_spec_orig(file_spec), m_file_spec(), m_mod_time(), - m_debugger_wp(debugger_sp), m_target_wp(TargetSP()) { - CommonInitializer(file_spec, {}); + : m_support_file_sp(std::make_shared()), m_checksum(), + m_mod_time(), m_debugger_wp(debugger_sp), m_target_wp(TargetSP()) { + CommonInitializer(support_file_sp, {}); } -SourceManager::File::File(const FileSpec &file_spec, TargetSP target_sp) - : m_file_spec_orig(file_spec), m_file_spec(), m_mod_time(), +SourceManager::File::File(SupportFileSP support_file_sp, TargetSP target_sp) + : m_support_file_sp(std::make_shared()), m_checksum(), + m_mod_time(), m_debugger_wp(target_sp ? target_sp->GetDebugger().shared_from_this() : DebuggerSP()), m_target_wp(target_sp) { - CommonInitializer(file_spec, target_sp); + CommonInitializer(support_file_sp, target_sp); } -void SourceManager::File::CommonInitializer(const FileSpec &file_spec, +void SourceManager::File::CommonInitializer(SupportFileSP support_file_sp, TargetSP target_sp) { // Set the file and update the modification time. - SetFileSpec(file_spec); + SetSupportFile(support_file_sp); // Always update the source map modification ID if we have a target. if (target_sp) @@ -472,65 +474,78 @@ void SourceManager::File::CommonInitializer(const FileSpec &file_spec, if (m_mod_time == llvm::sys::TimePoint<>()) { if (target_sp) { // If this is just a file name, try finding it in the target. - if (!file_spec.GetDirectory() && file_spec.GetFilename()) { - bool check_inlines = false; - SymbolContextList sc_list; - size_t num_matches = - target_sp->GetImages().ResolveSymbolContextForFilePath( - file_spec.GetFilename().AsCString(), 0, check_inlines, - SymbolContextItem(eSymbolContextModule | - eSymbolContextCompUnit), - sc_list); - bool got_multiple = false; - if (num_matches != 0) { - if (num_matches > 1) { - CompileUnit *test_cu = nullptr; - for (const SymbolContext &sc : sc_list) { - if (sc.comp_unit) { - if (test_cu) { - if (test_cu != sc.comp_unit) - got_multiple = true; - break; - } else - test_cu = sc.comp_unit; + { + FileSpec file_spec = support_file_sp->GetSpecOnly(); + if (!file_spec.GetDirectory() && file_spec.GetFilename()) { + bool check_inlines = false; + SymbolContextList sc_list; + size_t num_matches = + target_sp->GetImages().ResolveSymbolContextForFilePath( + file_spec.GetFilename().AsCString(), 0, check_inlines, + SymbolContextItem(eSymbolContextModule | + eSymbolContextCompUnit), + sc_list); + bool got_multiple = false; + if (num_matches != 0) { + if (num_matches > 1) { + CompileUnit *test_cu = nullptr; + for (const SymbolContext &sc : sc_list) { + if (sc.comp_unit) { + if (test_cu) { + if (test_cu != sc.comp_unit) + got_multiple = true; + break; + } else + test_cu = sc.comp_unit; + } } } - } - if (!got_multiple) { - SymbolContext sc; - sc_list.GetContextAtIndex(0, sc); - if (sc.comp_unit) - SetFileSpec(sc.comp_unit->GetPrimaryFile()); + if (!got_multiple) { + SymbolContext sc; + sc_list.GetContextAtIndex(0, sc); + if (sc.comp_unit) + SetSupportFile(std::make_shared( + sc.comp_unit->GetPrimaryFile())); + } } } } // Try remapping the file if it doesn't exist. - if (!FileSystem::Instance().Exists(m_file_spec)) { - // Check target specific source remappings (i.e., the - // target.source-map setting), then fall back to the module - // specific remapping (i.e., the .dSYM remapping dictionary). - auto remapped = target_sp->GetSourcePathMap().FindFile(m_file_spec); - if (!remapped) { - FileSpec new_spec; - if (target_sp->GetImages().FindSourceFile(m_file_spec, new_spec)) - remapped = new_spec; + { + FileSpec file_spec = support_file_sp->GetSpecOnly(); + if (!FileSystem::Instance().Exists(file_spec)) { + // Check target specific source remappings (i.e., the + // target.source-map setting), then fall back to the module + // specific remapping (i.e., the .dSYM remapping dictionary). + auto remapped = target_sp->GetSourcePathMap().FindFile(file_spec); + if (!remapped) { + FileSpec new_spec; + if (target_sp->GetImages().FindSourceFile(file_spec, new_spec)) + remapped = new_spec; + } + if (remapped) + SetSupportFile(std::make_shared( + *remapped, support_file_sp->GetChecksum())); } - if (remapped) - SetFileSpec(*remapped); } } } // If the file exists, read in the data. - if (m_mod_time != llvm::sys::TimePoint<>()) - m_data_sp = FileSystem::Instance().CreateDataBuffer(m_file_spec); + if (m_mod_time != llvm::sys::TimePoint<>()) { + m_data_sp = FileSystem::Instance().CreateDataBuffer( + m_support_file_sp->GetSpecOnly()); + m_checksum = llvm::MD5::hash(m_data_sp->GetData()); + } } -void SourceManager::File::SetFileSpec(FileSpec file_spec) { +void SourceManager::File::SetSupportFile(lldb::SupportFileSP support_file_sp) { + FileSpec file_spec = support_file_sp->GetSpecOnly(); resolve_tilde(file_spec); - m_file_spec = std::move(file_spec); - m_mod_time = FileSystem::Instance().GetModificationTime(m_file_spec); + m_support_file_sp = + std::make_shared(file_spec, support_file_sp->GetChecksum()); + m_mod_time = FileSystem::Instance().GetModificationTime(file_spec); } uint32_t SourceManager::File::GetLineOffset(uint32_t line) { @@ -603,7 +618,8 @@ bool SourceManager::File::ModificationTimeIsStale() const { // TODO: use host API to sign up for file modifications to anything in our // source cache and only update when we determine a file has been updated. // For now we check each time we want to display info for the file. - auto curr_mod_time = FileSystem::Instance().GetModificationTime(m_file_spec); + auto curr_mod_time = FileSystem::Instance().GetModificationTime( + m_support_file_sp->GetSpecOnly()); return curr_mod_time != llvm::sys::TimePoint<>() && m_mod_time != curr_mod_time; } @@ -644,7 +660,8 @@ size_t SourceManager::File::DisplaySourceLines(uint32_t line, debugger_sp->GetStopShowColumnAnsiSuffix()); HighlighterManager mgr; - std::string path = GetFileSpec().GetPath(/*denormalize*/ false); + std::string path = + GetSupportFile()->GetSpecOnly().GetPath(/*denormalize*/ false); // FIXME: Find a way to get the definitive language this file was written in // and pass it to the highlighter. const auto &h = mgr.getHighlighterFor(lldb::eLanguageTypeUnknown, path); @@ -698,7 +715,8 @@ void SourceManager::File::FindLinesMatchingRegex( bool lldb_private::operator==(const SourceManager::File &lhs, const SourceManager::File &rhs) { - if (lhs.m_file_spec != rhs.m_file_spec) + if (!lhs.GetSupportFile()->Equal(*rhs.GetSupportFile(), + SupportFile::eEqualChecksumIfSet)) return false; return lhs.m_mod_time == rhs.m_mod_time; } @@ -778,9 +796,9 @@ void SourceManager::SourceFileCache::AddSourceFile(const FileSpec &file_spec, assert(file_sp && "invalid FileSP"); AddSourceFileImpl(file_spec, file_sp); - const FileSpec &resolved_file_spec = file_sp->GetFileSpec(); + const FileSpec &resolved_file_spec = file_sp->GetSupportFile()->GetSpecOnly(); if (file_spec != resolved_file_spec) - AddSourceFileImpl(file_sp->GetFileSpec(), file_sp); + AddSourceFileImpl(file_sp->GetSupportFile()->GetSpecOnly(), file_sp); } void SourceManager::SourceFileCache::RemoveSourceFile(const FileSP &file_sp) { @@ -820,14 +838,24 @@ SourceManager::FileSP SourceManager::SourceFileCache::FindSourceFile( return {}; } +static std::string toString(const Checksum &checksum) { + if (!checksum) + return ""; + return std::string(llvm::formatv("{0}", checksum.digest())); +} + void SourceManager::SourceFileCache::Dump(Stream &stream) const { - stream << "Modification time Lines Path\n"; - stream << "------------------- -------- --------------------------------\n"; + // clang-format off + stream << "Modification time MD5 Checksum (on-disk) MD5 Checksum (line table) Lines Path\n"; + stream << "------------------- -------------------------------- -------------------------------- -------- --------------------------------\n"; + // clang-format on for (auto &entry : m_file_cache) { if (!entry.second) continue; FileSP file = entry.second; - stream.Format("{0:%Y-%m-%d %H:%M:%S} {1,8:d} {2}\n", file->GetTimestamp(), + stream.Format("{0:%Y-%m-%d %H:%M:%S} {1,32} {2,32} {3,8:d} {4}\n", + file->GetTimestamp(), toString(file->GetChecksum()), + toString(file->GetSupportFile()->GetChecksum()), file->GetNumLines(), entry.first.GetPath()); } } diff --git a/lldb/source/Expression/REPL.cpp b/lldb/source/Expression/REPL.cpp index a6a4ffb5e0af9e..56c50e346b39b8 100644 --- a/lldb/source/Expression/REPL.cpp +++ b/lldb/source/Expression/REPL.cpp @@ -473,7 +473,8 @@ void REPL::IOHandlerInputComplete(IOHandler &io_handler, std::string &code) { // Now set the default file and line to the REPL source file m_target.GetSourceManager().SetDefaultFileAndLine( - FileSpec(m_repl_source_path), new_default_line); + std::make_shared(FileSpec(m_repl_source_path)), + new_default_line); } static_cast(io_handler) .SetBaseLineNumber(m_code.GetSize() + 1); @@ -570,13 +571,11 @@ Status REPL::RunLoop() { lldb::IOHandlerSP io_handler_sp(GetIOHandler()); - FileSpec save_default_file; - uint32_t save_default_line = 0; + std::optional default_file_line; if (!m_repl_source_path.empty()) { // Save the current default file and line - m_target.GetSourceManager().GetDefaultFileAndLine(save_default_file, - save_default_line); + default_file_line = m_target.GetSourceManager().GetDefaultFileAndLine(); } debugger.RunIOHandlerAsync(io_handler_sp); @@ -615,8 +614,8 @@ Status REPL::RunLoop() { } // Restore the default file and line - if (save_default_file && save_default_line != 0) - m_target.GetSourceManager().SetDefaultFileAndLine(save_default_file, - save_default_line); + if (default_file_line) + m_target.GetSourceManager().SetDefaultFileAndLine( + default_file_line->support_file_sp, default_file_line->line); return error; } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 695801da9da69a..b0f49ebf2d2cbb 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -4241,6 +4241,9 @@ TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) { // We don't handle pack indexing yet case clang::Type::PackIndexing: break; + + case clang::Type::HLSLAttributedResource: + break; } // We don't know hot to display this type... return lldb::eTypeClassOther; @@ -5148,6 +5151,9 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, // We don't handle pack indexing yet case clang::Type::PackIndexing: break; + + case clang::Type::HLSLAttributedResource: + break; } count = 0; return lldb::eEncodingInvalid; @@ -5309,6 +5315,9 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { // We don't handle pack indexing yet case clang::Type::PackIndexing: break; + + case clang::Type::HLSLAttributedResource: + break; } // We don't know hot to display this type... return lldb::eFormatBytes; diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp index 5d90ed90b3d3fd..e35a4c318d358f 100644 --- a/lldb/source/Target/StackFrame.cpp +++ b/lldb/source/Target/StackFrame.cpp @@ -1923,7 +1923,7 @@ bool StackFrame::GetStatus(Stream &strm, bool show_frame_info, bool show_source, size_t num_lines = target->GetSourceManager().DisplaySourceLinesWithLineNumbers( - m_sc.line_entry.GetFile(), start_line, m_sc.line_entry.column, + m_sc.line_entry.file_sp, start_line, m_sc.line_entry.column, source_lines_before, source_lines_after, "->", &strm); if (num_lines != 0) have_source = true; diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 7808bd3674ab19..3849ec5ed178d9 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -886,7 +886,7 @@ void StackFrameList::SetDefaultFileAndLineToSelectedFrame() { SymbolContext sc = frame_sp->GetSymbolContext(eSymbolContextLineEntry); if (sc.line_entry.GetFile()) m_thread.CalculateTarget()->GetSourceManager().SetDefaultFileAndLine( - sc.line_entry.GetFile(), sc.line_entry.line); + sc.line_entry.file_sp, sc.line_entry.line); } } } diff --git a/lldb/unittests/Core/SourceManagerTest.cpp b/lldb/unittests/Core/SourceManagerTest.cpp index 58d6f6cb3f8503..26ab0edffb398d 100644 --- a/lldb/unittests/Core/SourceManagerTest.cpp +++ b/lldb/unittests/Core/SourceManagerTest.cpp @@ -8,6 +8,7 @@ #include "lldb/Core/SourceManager.h" #include "lldb/Host/FileSystem.h" +#include "lldb/Utility/SupportFile.h" #include "gtest/gtest.h" #include "TestingSupport/MockTildeExpressionResolver.h" @@ -29,8 +30,8 @@ TEST_F(SourceFileCache, FindSourceFileFound) { // Insert: foo FileSpec foo_file_spec("foo"); - auto foo_file_sp = - std::make_shared(foo_file_spec, lldb::DebuggerSP()); + auto foo_file_sp = std::make_shared( + std::make_shared(foo_file_spec), lldb::DebuggerSP()); cache.AddSourceFile(foo_file_spec, foo_file_sp); // Query: foo, expect found. @@ -43,8 +44,8 @@ TEST_F(SourceFileCache, FindSourceFileNotFound) { // Insert: foo FileSpec foo_file_spec("foo"); - auto foo_file_sp = - std::make_shared(foo_file_spec, lldb::DebuggerSP()); + auto foo_file_sp = std::make_shared( + std::make_shared(foo_file_spec), lldb::DebuggerSP()); cache.AddSourceFile(foo_file_spec, foo_file_sp); // Query: bar, expect not found. @@ -63,7 +64,8 @@ TEST_F(SourceFileCache, FindSourceFileByUnresolvedPath) { // Create the file with the resolved file spec. auto foo_file_sp = std::make_shared( - resolved_foo_file_spec, lldb::DebuggerSP()); + std::make_shared(resolved_foo_file_spec), + lldb::DebuggerSP()); // Cache the result with the unresolved file spec. cache.AddSourceFile(foo_file_spec, foo_file_sp); diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index cf0a6f96fb012e..c75b75edaf2ca0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2189,10 +2189,6 @@ example: ``nosanitize_coverage`` This attribute indicates that SanitizerCoverage instrumentation is disabled for this function. -``nosanitize_realtime`` - This attribute indicates that the Realtime Sanitizer instrumentation is - disabled for this function. - This attribute is incompatible with the ``sanitize_realtime`` attribute. ``null_pointer_is_valid`` If ``null_pointer_is_valid`` is set, then the ``null`` address in address-space 0 is considered to be a valid address for memory loads and @@ -2319,7 +2315,6 @@ example: This attribute indicates that RealtimeSanitizer checks (realtime safety analysis - no allocations, syscalls or exceptions) are enabled for this function. - This attribute is incompatible with the ``nosanitize_realtime`` attribute. ``speculative_load_hardening`` This attribute indicates that `Speculative Load Hardening `_ diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst index 9bd2b1d435fd0a..2b5b5139858e7f 100644 --- a/llvm/docs/Security.rst +++ b/llvm/docs/Security.rst @@ -46,9 +46,9 @@ username for an individual isn't available, the brackets will be empty. * Josh Stone (Red Hat; Rust) [@cuviper] * Kristof Beyls (ARM) [@kbeyls] * Matthew Riley (Google) [@mmdriley] +* Matthew Voss (Sony) [@ormris] * Nikhil Gupta (Nvidia) [] * Oliver Hunt (Apple) [@ojhunt] -* Paul Robinson (Sony) [@pogo59] * Peter Smith (ARM) [@smithp35] * Pietro Albini (Ferrous Systems; Rust) [@pietroalbini] * Serge Guelton (Mozilla) [@serge-sans-paille] diff --git a/llvm/docs/TestSuiteGuide.md b/llvm/docs/TestSuiteGuide.md index 9552cd89aa1c1b..19db0ee7d01b82 100644 --- a/llvm/docs/TestSuiteGuide.md +++ b/llvm/docs/TestSuiteGuide.md @@ -134,6 +134,44 @@ Every program can work as a correctness test. Some programs are unsuitable for performance measurements. Setting the `TEST_SUITE_BENCHMARKING_ONLY` CMake option to `ON` will disable them. +The MultiSource benchmarks consist of the following apps and benchmarks: + +| MultiSource | Language | Application Area | Remark | +|----------------------|-----------|-------------------------------|----------------------| +| 7zip | C/C++ | Compression/Decompression | | +| ASCI_Purple | C | SMG2000 benchmark and solver | Memory intensive app | +| ASC_Sequoia | C | Simulation and solver | | +| BitBench | C | uudecode/uuencode utility | Bit Stream benchmark for functional compilers | +| Bullet | C++ | Bullet 2.75 physics engine | | +| DOE-ProxyApps-C++ | C++ | HPC/scientific apps | Small applications, representative of our larger DOE workloads | +| DOE-ProxyApps-C | C | HPC/scientific apps | " | +| Fhourstones | C | Game/solver | Integer benchmark that efficiently solves positions in the game of Connect-4 | +| Fhourstones-3.1 | C | Game/solver | " | +| FreeBench | C | Benchmark suite | Raytracer, four in a row, neural network, file compressor, Fast Fourier/Cosine/Sine Transform | +| llubenchmark | C | Linked-list micro-benchmark | | +| mafft | C | Bioinformatics | A multiple sequence alignment program | +| MallocBench | C | Benchmark suite | cfrac, espresso, gawk, gs, make, p2c, perl | +| McCat | C | Benchmark suite | Quicksort, bubblesort, eigenvalues | +| mediabench | C | Benchmark suite | adpcm, g721, gsm, jpeg, mpeg2 | +| MiBench | C | Embedded benchmark suite | Automotive, consumer, office, security, telecom apps | +| nbench | C | | BYTE Magazine's BYTEmark benchmark program | +| NPB-serial | C | Parallel computing | Serial version of the NPB IS code | +| Olden | C | Data Structures | SGI version of the Olden benchmark | +| OptimizerEval | C | Solver | Preston Brigg's optimizer evaluation framework | +| PAQ8p | C++ | Data compression | | +| Prolangs-C++ | C++ | Benchmark suite | city, employ, life, NP, ocean, primes, simul, vcirc | +| Prolangs-C | C | Benchmark suite | agrep, archie-client, bison, gnugo, unix-smail | +| Ptrdist | C | Pointer-Intensive Benchmark Suite | | +| Rodinia | C | Scientific apps | backprop, pathfinder, srad | +| SciMark2-C | C | Scientific apps | FFT, LU, Montecarlo, sparse matmul | +| sim | C | Dynamic programming | A Time-Efficient, Linear-Space Local Similarity Algorithm | +| tramp3d-v4 | C++ | Numerical analysis | Template-intensive numerical program based on FreePOOMA | +| Trimaran | C | Encryption | 3des, md5, crc | +| TSVC | C | Vectorization benchmark | Test Suite for Vectorizing Compilers (TSVC) | +| VersaBench | C | Benchmark suite | 8b10b, beamformer, bmm, dbms, ecbdes | + +All MultiSource applications are suitable for performance measurements +and will run when CMake option `TEST_SUITE_BENCHMARKING_ONLY` is set. Configuration ------------- diff --git a/llvm/include/llvm/Analysis/PtrUseVisitor.h b/llvm/include/llvm/Analysis/PtrUseVisitor.h index f5c23b1b4e014d..237d328721609b 100644 --- a/llvm/include/llvm/Analysis/PtrUseVisitor.h +++ b/llvm/include/llvm/Analysis/PtrUseVisitor.h @@ -157,7 +157,7 @@ class PtrUseVisitorBase { /// /// This will visit the users with the same offset of the current visit /// (including an unknown offset if that is the current state). - void enqueueUsers(Instruction &I); + void enqueueUsers(Value &I); /// Walk the operands of a GEP and adjust the offset as appropriate. /// @@ -208,11 +208,14 @@ class PtrUseVisitor : protected InstVisitor, /// Recursively visit the uses of the given pointer. /// \returns An info struct about the pointer. See \c PtrInfo for details. - PtrInfo visitPtr(Instruction &I) { + /// We may also need to process Argument pointers, so the input uses is + /// a common Value type. + PtrInfo visitPtr(Value &I) { // This must be a pointer type. Get an integer type suitable to hold // offsets on this pointer. // FIXME: Support a vector of pointers. assert(I.getType()->isPointerTy()); + assert(isa(I) || isa(I)); IntegerType *IntIdxTy = cast(DL.getIndexType(I.getType())); IsOffsetKnown = true; Offset = APInt(IntIdxTy->getBitWidth(), 0); diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h index a4cc814549c95b..21e28d546286ee 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainer.h +++ b/llvm/include/llvm/BinaryFormat/DXContainer.h @@ -313,13 +313,19 @@ enum class ResourceKind : uint32_t { ArrayRef> getResourceKinds(); -#define RESOURCE_FLAG(Val, Enum) Enum = Val, -enum class ResourceFlag : uint32_t { -#include "DXContainerConstants.def" +#define RESOURCE_FLAG(Index, Enum) bool Enum = false; +struct ResourceFlags { + ResourceFlags() {}; + struct FlagsBits { +#include "llvm/BinaryFormat/DXContainerConstants.def" + }; + union { + uint32_t Flags; + FlagsBits Bits; + }; + bool operator==(const uint32_t RFlags) const { return Flags == RFlags; } }; -ArrayRef> getResourceFlags(); - namespace v0 { struct RuntimeInfo { PipelinePSVInfo StageInfo; @@ -439,12 +445,12 @@ struct RuntimeInfo : public v1::RuntimeInfo { struct ResourceBindInfo : public v0::ResourceBindInfo { ResourceKind Kind; - uint32_t Flags; + ResourceFlags Flags; void swapBytes() { v0::ResourceBindInfo::swapBytes(); sys::swapByteOrder(Kind); - sys::swapByteOrder(Flags); + sys::swapByteOrder(Flags.Flags); } }; diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 4111cecb018bb3..1aacbb2f65b27f 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -190,8 +190,7 @@ RESOURCE_KIND(18, FeedbackTexture2DArray) #endif // RESOURCE_KIND #ifdef RESOURCE_FLAG -RESOURCE_FLAG(0, None) -RESOURCE_FLAG(1, UsedByAtomic64) +RESOURCE_FLAG(0, UsedByAtomic64) #undef RESOURCE_FLAG #endif // RESOURCE_FLAG diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 8a2e6583af87c5..4beac37a583445 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -759,7 +759,6 @@ enum AttributeKindCodes { ATTR_KIND_INITIALIZES = 94, ATTR_KIND_HYBRID_PATCHABLE = 95, ATTR_KIND_SANITIZE_REALTIME = 96, - ATTR_KIND_NO_SANITIZE_REALTIME = 97, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index 80936c0ee83355..891e34fec0c798 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -212,9 +212,6 @@ def NoSanitizeBounds : EnumAttr<"nosanitize_bounds", [FnAttr]>; /// No SanitizeCoverage instrumentation. def NoSanitizeCoverage : EnumAttr<"nosanitize_coverage", [FnAttr]>; -/// No SanitizeRealtime instrumentation. -def NoSanitizeRealtime : EnumAttr<"nosanitize_realtime", [FnAttr]>; - /// Null pointer in address space zero is valid. def NullPointerIsValid : EnumAttr<"null_pointer_is_valid", [FnAttr]>; diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h index e432359b7bbd07..66ad057ab0e30f 100644 --- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h +++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h @@ -72,6 +72,7 @@ struct ShaderHash { std::vector Digest; }; +using ResourceFlags = dxbc::PSV::ResourceFlags; using ResourceBindInfo = dxbc::PSV::v2::ResourceBindInfo; struct SignatureElement { @@ -178,7 +179,6 @@ LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::ComponentType) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::InterpolationMode) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::ResourceType) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::ResourceKind) -LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::PSV::ResourceFlag) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::D3DSystemValue) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SigComponentType) LLVM_YAML_DECLARE_ENUM_TRAITS(llvm::dxbc::SigMinPrecision) @@ -221,6 +221,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, DXContainerYAML::Object &Obj); }; +template <> struct MappingTraits { + static void mapping(IO &IO, DXContainerYAML::ResourceFlags &Flags); +}; + template <> struct MappingTraits { static void mapping(IO &IO, DXContainerYAML::ResourceBindInfo &Res); }; diff --git a/llvm/include/llvm/SandboxIR/SandboxIR.h b/llvm/include/llvm/SandboxIR/SandboxIR.h index 0f7752eda6d66f..2ed7243fa612f4 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIR.h +++ b/llvm/include/llvm/SandboxIR/SandboxIR.h @@ -113,6 +113,7 @@ namespace sandboxir { class BasicBlock; class ConstantInt; +class ConstantFP; class Context; class Function; class Instruction; @@ -597,6 +598,94 @@ class ConstantInt : public Constant { #endif }; +// TODO: This should inherit from ConstantData. +class ConstantFP final : public Constant { + ConstantFP(llvm::ConstantFP *C, Context &Ctx) + : Constant(ClassID::ConstantFP, C, Ctx) {} + friend class Context; // For constructor. + +public: + /// This returns a ConstantFP, or a vector containing a splat of a ConstantFP, + /// for the specified value in the specified type. This should only be used + /// for simple constant values like 2.0/1.0 etc, that are known-valid both as + /// host double and as the target format. + static Constant *get(Type *Ty, double V); + + /// If Ty is a vector type, return a Constant with a splat of the given + /// value. Otherwise return a ConstantFP for the given value. + static Constant *get(Type *Ty, const APFloat &V); + + static Constant *get(Type *Ty, StringRef Str); + + static ConstantFP *get(const APFloat &V, Context &Ctx); + + static Constant *getNaN(Type *Ty, bool Negative = false, + uint64_t Payload = 0); + static Constant *getQNaN(Type *Ty, bool Negative = false, + APInt *Payload = nullptr); + static Constant *getSNaN(Type *Ty, bool Negative = false, + APInt *Payload = nullptr); + static Constant *getZero(Type *Ty, bool Negative = false); + + static Constant *getNegativeZero(Type *Ty); + static Constant *getInfinity(Type *Ty, bool Negative = false); + + /// Return true if Ty is big enough to represent V. + static bool isValueValidForType(Type *Ty, const APFloat &V); + + inline const APFloat &getValueAPF() const { + return cast(Val)->getValueAPF(); + } + inline const APFloat &getValue() const { + return cast(Val)->getValue(); + } + + /// Return true if the value is positive or negative zero. + bool isZero() const { return cast(Val)->isZero(); } + + /// Return true if the sign bit is set. + bool isNegative() const { return cast(Val)->isNegative(); } + + /// Return true if the value is infinity + bool isInfinity() const { return cast(Val)->isInfinity(); } + + /// Return true if the value is a NaN. + bool isNaN() const { return cast(Val)->isNaN(); } + + /// We don't rely on operator== working on double values, as it returns true + /// for things that are clearly not equal, like -0.0 and 0.0. + /// As such, this method can be used to do an exact bit-for-bit comparison of + /// two floating point values. The version with a double operand is retained + /// because it's so convenient to write isExactlyValue(2.0), but please use + /// it only for simple constants. + bool isExactlyValue(const APFloat &V) const { + return cast(Val)->isExactlyValue(V); + } + + bool isExactlyValue(double V) const { + return cast(Val)->isExactlyValue(V); + } + + /// For isa/dyn_cast. + static bool classof(const sandboxir::Value *From) { + return From->getSubclassID() == ClassID::ConstantFP; + } + + // TODO: Better name: getOperandNo(const Use&). Should be private. + unsigned getUseOperandNo(const Use &Use) const final { + llvm_unreachable("ConstantFP has no operands!"); + } +#ifndef NDEBUG + void verify() const override { + assert(isa(Val) && "Expected a ConstantFP!"); + } + void dumpOS(raw_ostream &OS) const override { + dumpCommonPrefix(OS); + dumpCommonSuffix(OS); + } +#endif +}; + /// Iterator for `Instruction`s in a `BasicBlock. /// \Returns an sandboxir::Instruction & when derereferenced. class BBIterator { @@ -3156,7 +3245,10 @@ class Context { Constant *getOrCreateConstant(llvm::Constant *LLVMC) { return cast(getOrCreateValueInternal(LLVMC, 0)); } - friend class ConstantInt; // For getOrCreateConstant(). + // Friends for getOrCreateConstant(). +#define DEF_CONST(ID, CLASS) friend class CLASS; +#include "llvm/SandboxIR/SandboxIRValues.def" + /// Create a sandboxir::BasicBlock for an existing LLVM IR \p BB. This will /// also create all contents of the block. BasicBlock *createBasicBlock(llvm::BasicBlock *BB); diff --git a/llvm/include/llvm/SandboxIR/SandboxIRValues.def b/llvm/include/llvm/SandboxIR/SandboxIRValues.def index d29fc3b5e95871..2fc24ed71c4cf6 100644 --- a/llvm/include/llvm/SandboxIR/SandboxIRValues.def +++ b/llvm/include/llvm/SandboxIR/SandboxIRValues.def @@ -26,6 +26,7 @@ DEF_USER(User, User) DEF_VALUE(Block, BasicBlock) DEF_CONST(Constant, Constant) DEF_CONST(ConstantInt, ConstantInt) +DEF_CONST(ConstantFP, ConstantFP) #ifndef DEF_INSTR #define DEF_INSTR(ID, OPCODE, CLASS) diff --git a/llvm/include/llvm/SandboxIR/Type.h b/llvm/include/llvm/SandboxIR/Type.h index 4588cd2f738876..89e787f5f5d4b2 100644 --- a/llvm/include/llvm/SandboxIR/Type.h +++ b/llvm/include/llvm/SandboxIR/Type.h @@ -27,6 +27,7 @@ class PointerType; class VectorType; class FunctionType; #define DEF_INSTR(ID, OPCODE, CLASS) class CLASS; +#define DEF_CONST(ID, CLASS) class CLASS; #include "llvm/SandboxIR/SandboxIRValues.def" /// Just like llvm::Type these are immutable, unique, never get freed and can @@ -42,7 +43,7 @@ class Type { friend class ConstantInt; // For LLVMTy. // Friend all instruction classes because `create()` functions use LLVMTy. #define DEF_INSTR(ID, OPCODE, CLASS) friend class CLASS; - // TODO: Friend DEF_CONST() +#define DEF_CONST(ID, CLASS) friend class CLASS; #include "llvm/SandboxIR/SandboxIRValues.def" Context &Ctx; diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index f5258601fd5d49..ba3619417114c7 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1040,28 +1040,13 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp, case RecurKind::Xor: case RecurKind::Add: case RecurKind::Or: - // Adding, Xoring, Oring zero to a number does not change it. - return ConstantInt::get(Tp, 0); case RecurKind::Mul: - // Multiplying a number by 1 does not change it. - return ConstantInt::get(Tp, 1); case RecurKind::And: - // AND-ing a number with an all-1 value does not change it. - return ConstantInt::get(Tp, -1, true); case RecurKind::FMul: - // Multiplying a number by 1 does not change it. - return ConstantFP::get(Tp, 1.0L); - case RecurKind::FMulAdd: case RecurKind::FAdd: - // Adding zero to a number does not change it. - // FIXME: Ideally we should not need to check FMF for FAdd and should always - // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0. - // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI - // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would - // mean we can then remove the check for noSignedZeros() below (see D98963). - if (FMF.noSignedZeros()) - return ConstantFP::get(Tp, 0.0L); - return ConstantFP::get(Tp, -0.0L); + return ConstantExpr::getBinOpIdentity(getOpcode(K), Tp, false, FMF.noSignedZeros()); + case RecurKind::FMulAdd: + return ConstantExpr::getBinOpIdentity(Instruction::FAdd, Tp, false, FMF.noSignedZeros()); case RecurKind::UMin: return ConstantInt::get(Tp, -1, true); case RecurKind::UMax: diff --git a/llvm/lib/Analysis/PtrUseVisitor.cpp b/llvm/lib/Analysis/PtrUseVisitor.cpp index 49304818d7efed..9c79546f491eff 100644 --- a/llvm/lib/Analysis/PtrUseVisitor.cpp +++ b/llvm/lib/Analysis/PtrUseVisitor.cpp @@ -17,7 +17,7 @@ using namespace llvm; -void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { +void detail::PtrUseVisitorBase::enqueueUsers(Value &I) { for (Use &U : I.uses()) { if (VisitedUses.insert(&U).second) { UseToVisit NewU = { diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 173faa32a3878d..533fe62fb8cdd6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -5921,6 +5921,61 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts, break; } + case Instruction::BitCast: { + const Value *Src; + if (!match(Op, m_ElementWiseBitCast(m_Value(Src))) || + !Src->getType()->isIntOrIntVectorTy()) + break; + + const Type *Ty = Op->getType()->getScalarType(); + KnownBits Bits(Ty->getScalarSizeInBits()); + computeKnownBits(Src, DemandedElts, Bits, Depth + 1, Q); + + // Transfer information from the sign bit. + if (Bits.isNonNegative()) + Known.signBitMustBeZero(); + else if (Bits.isNegative()) + Known.signBitMustBeOne(); + + if (Ty->isIEEE()) { + // IEEE floats are NaN when all bits of the exponent plus at least one of + // the fraction bits are 1. This means: + // - If we assume unknown bits are 0 and the value is NaN, it will + // always be NaN + // - If we assume unknown bits are 1 and the value is not NaN, it can + // never be NaN + if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN()) + Known.KnownFPClasses = fcNan; + else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN()) + Known.knownNot(fcNan); + + // Build KnownBits representing Inf and check if it must be equal or + // unequal to this value. + auto InfKB = KnownBits::makeConstant( + APFloat::getInf(Ty->getFltSemantics()).bitcastToAPInt()); + InfKB.Zero.clearSignBit(); + if (const auto InfResult = KnownBits::eq(Bits, InfKB)) { + assert(!InfResult.value()); + Known.knownNot(fcInf); + } else if (Bits == InfKB) { + Known.KnownFPClasses = fcInf; + } + + // Build KnownBits representing Zero and check if it must be equal or + // unequal to this value. + auto ZeroKB = KnownBits::makeConstant( + APFloat::getZero(Ty->getFltSemantics()).bitcastToAPInt()); + ZeroKB.Zero.clearSignBit(); + if (const auto ZeroResult = KnownBits::eq(Bits, ZeroKB)) { + assert(!ZeroResult.value()); + Known.knownNot(fcZero); + } else if (Bits == ZeroKB) { + Known.KnownFPClasses = fcZero; + } + } + + break; + } default: break; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index cc742ab35f4498..32ce34114b2f50 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -66,9 +66,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::umul_fix: case Intrinsic::umul_fix_sat: case Intrinsic::sqrt: // Begin floating-point. + case Intrinsic::asin: + case Intrinsic::acos: + case Intrinsic::atan: case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::tan: + case Intrinsic::sinh: + case Intrinsic::cosh: + case Intrinsic::tanh: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::log: diff --git a/llvm/lib/BinaryFormat/DXContainer.cpp b/llvm/lib/BinaryFormat/DXContainer.cpp index 790947cc729c0b..97ceb16ccf53f4 100644 --- a/llvm/lib/BinaryFormat/DXContainer.cpp +++ b/llvm/lib/BinaryFormat/DXContainer.cpp @@ -109,13 +109,3 @@ static const EnumEntry ResourceKindNames[] = { ArrayRef> PSV::getResourceKinds() { return ArrayRef(ResourceKindNames); } - -#define RESOURCE_FLAG(Val, Enum) {#Enum, PSV::ResourceFlag::Enum}, - -static const EnumEntry ResourceFlagNames[] = { -#include "llvm/BinaryFormat/DXContainerConstants.def" -}; - -ArrayRef> PSV::getResourceFlags() { - return ArrayRef(ResourceFlagNames); -} diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 974a05023c72a5..654be985a3229c 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2093,8 +2093,6 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::NoSanitizeBounds; case bitc::ATTR_KIND_NO_SANITIZE_COVERAGE: return Attribute::NoSanitizeCoverage; - case bitc::ATTR_KIND_NO_SANITIZE_REALTIME: - return Attribute::NoSanitizeRealtime; case bitc::ATTR_KIND_NULL_POINTER_IS_VALID: return Attribute::NullPointerIsValid; case bitc::ATTR_KIND_OPTIMIZE_FOR_DEBUGGING: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 3c5097f4af7c56..26fd02b3e1a043 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -795,8 +795,6 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NO_SANITIZE_BOUNDS; case Attribute::NoSanitizeCoverage: return bitc::ATTR_KIND_NO_SANITIZE_COVERAGE; - case llvm::Attribute::NoSanitizeRealtime: - return bitc::ATTR_KIND_NO_SANITIZE_REALTIME; case Attribute::NullPointerIsValid: return bitc::ATTR_KIND_NULL_POINTER_IS_VALID; case Attribute::OptimizeForDebugging: diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp index 675d88d6d38cd9..5140f5951d6d3f 100644 --- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp +++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp @@ -26,6 +26,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include using namespace llvm; @@ -437,69 +438,33 @@ CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, default: llvm_unreachable("Impossible reduction kind"); case Intrinsic::vp_reduce_add: - Reduction = Builder.CreateAddReduce(RedOp); - Reduction = Builder.CreateAdd(Reduction, Start); - break; case Intrinsic::vp_reduce_mul: - Reduction = Builder.CreateMulReduce(RedOp); - Reduction = Builder.CreateMul(Reduction, Start); - break; case Intrinsic::vp_reduce_and: - Reduction = Builder.CreateAndReduce(RedOp); - Reduction = Builder.CreateAnd(Reduction, Start); - break; case Intrinsic::vp_reduce_or: - Reduction = Builder.CreateOrReduce(RedOp); - Reduction = Builder.CreateOr(Reduction, Start); - break; - case Intrinsic::vp_reduce_xor: - Reduction = Builder.CreateXorReduce(RedOp); - Reduction = Builder.CreateXor(Reduction, Start); - break; - case Intrinsic::vp_reduce_smax: - Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); + case Intrinsic::vp_reduce_xor: { + Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); + unsigned Opc = getArithmeticReductionInstruction(RedID); + assert(Instruction::isBinaryOp(Opc)); + Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); + Builder.CreateBinOp((Instruction::BinaryOps)Opc, Reduction, Start); break; + } + case Intrinsic::vp_reduce_smax: case Intrinsic::vp_reduce_smin: - Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); - break; case Intrinsic::vp_reduce_umax: - Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); - break; case Intrinsic::vp_reduce_umin: - Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); - break; case Intrinsic::vp_reduce_fmax: - Reduction = Builder.CreateFPMaxReduce(RedOp); - transferDecorations(*Reduction, VPI); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); - break; case Intrinsic::vp_reduce_fmin: - Reduction = Builder.CreateFPMinReduce(RedOp); - transferDecorations(*Reduction, VPI); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); - break; case Intrinsic::vp_reduce_fmaximum: - Reduction = Builder.CreateFPMaximumReduce(RedOp); - transferDecorations(*Reduction, VPI); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::maximum, Reduction, Start); - break; - case Intrinsic::vp_reduce_fminimum: - Reduction = Builder.CreateFPMinimumReduce(RedOp); + case Intrinsic::vp_reduce_fminimum: { + Intrinsic::ID RedID = *VPI.getFunctionalIntrinsicID(); + Intrinsic::ID ScalarID = getMinMaxReductionIntrinsicOp(RedID); + Reduction = Builder.CreateUnaryIntrinsic(RedID, RedOp); transferDecorations(*Reduction, VPI); - Reduction = - Builder.CreateBinaryIntrinsic(Intrinsic::minimum, Reduction, Start); + Reduction = Builder.CreateBinaryIntrinsic(ScalarID, Reduction, Start); break; + } case Intrinsic::vp_reduce_fadd: Reduction = Builder.CreateFAddReduce(Start, RedOp); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 2557fa288606e7..87221c14433ab5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -135,6 +135,9 @@ class VectorLegalizer { SDValue ExpandVP_SELECT(SDNode *Node); SDValue ExpandVP_MERGE(SDNode *Node); SDValue ExpandVP_REM(SDNode *Node); + SDValue ExpandVP_FNEG(SDNode *Node); + SDValue ExpandVP_FABS(SDNode *Node); + SDValue ExpandVP_FCOPYSIGN(SDNode *Node); SDValue ExpandSELECT(SDNode *Node); std::pair ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -699,6 +702,11 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl &Results) { // These operations are used to do promotion so they can't be promoted // themselves. llvm_unreachable("Don't know how to promote this operation!"); + case ISD::VP_FABS: + case ISD::VP_FCOPYSIGN: + case ISD::VP_FNEG: + // Promoting fabs, fneg, and fcopysign changes their semantics. + llvm_unreachable("These operations should not be promoted"); } // There are currently two cases of vector promotion: @@ -887,6 +895,24 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl &Results) { return; } break; + case ISD::VP_FNEG: + if (SDValue Expanded = ExpandVP_FNEG(Node)) { + Results.push_back(Expanded); + return; + } + break; + case ISD::VP_FABS: + if (SDValue Expanded = ExpandVP_FABS(Node)) { + Results.push_back(Expanded); + return; + } + break; + case ISD::VP_FCOPYSIGN: + if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; @@ -1557,6 +1583,80 @@ SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL); } +SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) { + EVT VT = Node->getValueType(0); + EVT IntVT = VT.changeVectorElementTypeToInteger(); + + if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT)) + return SDValue(); + + SDValue Mask = Node->getOperand(1); + SDValue EVL = Node->getOperand(2); + + SDLoc DL(Node); + SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); + SDValue SignMask = DAG.getConstant( + APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); + SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL); + return DAG.getNode(ISD::BITCAST, DL, VT, Xor); +} + +SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) { + EVT VT = Node->getValueType(0); + EVT IntVT = VT.changeVectorElementTypeToInteger(); + + if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT)) + return SDValue(); + + SDValue Mask = Node->getOperand(1); + SDValue EVL = Node->getOperand(2); + + SDLoc DL(Node); + SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); + SDValue ClearSignMask = DAG.getConstant( + APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); + SDValue ClearSign = + DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL); + return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign); +} + +SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) { + EVT VT = Node->getValueType(0); + + if (VT != Node->getOperand(1).getValueType()) + return SDValue(); + + EVT IntVT = VT.changeVectorElementTypeToInteger(); + if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) || + !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT)) + return SDValue(); + + SDValue Mask = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + SDLoc DL(Node); + SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); + SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1)); + + SDValue SignMask = DAG.getConstant( + APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); + SDValue SignBit = + DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL); + + SDValue ClearSignMask = DAG.getConstant( + APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); + SDValue ClearedSign = + DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL); + + SDNodeFlags Flags; + Flags.setDisjoint(true); + + SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit, + Mask, EVL, Flags); + + return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl &Results) { // Attempt to expand using TargetLowering. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9efcd3f25797b5..7f57b6db40ef49 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13267,7 +13267,9 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, case ISD::SMIN: return getConstant(APInt::getSignedMaxValue(VT.getSizeInBits()), DL, VT); case ISD::FADD: - return getConstantFP(-0.0, DL, VT); + // If flags allow, prefer positive zero single it's generally cheaper + // to materialize on most targets. + return getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, VT); case ISD::FMUL: return getConstantFP(1.0, DL, VT); case ISD::FMINNUM: diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 9fcb850a147a8e..e9900adf2b3130 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2223,12 +2223,6 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, "Attributes 'optdebug and optnone' are incompatible!", V); } - Check(!(Attrs.hasFnAttr(Attribute::SanitizeRealtime) && - Attrs.hasFnAttr(Attribute::NoSanitizeRealtime)), - "Attributes " - "'sanitize_realtime and nosanitize_realtime' are incompatible!", - V); - if (Attrs.hasFnAttr(Attribute::OptimizeForDebugging)) { Check(!Attrs.hasFnAttr(Attribute::OptimizeForSize), "Attributes 'optsize and optdebug' are incompatible!", V); diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp index 21a966d5abd132..5dee1221b27c01 100644 --- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp +++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp @@ -206,6 +206,12 @@ void MappingTraits::mapping( IO.mapRequired("Parts", Obj.Parts); } +void MappingTraits::mapping( + IO &IO, DXContainerYAML::ResourceFlags &Flags) { +#define RESOURCE_FLAG(FlagIndex, Enum) IO.mapRequired(#Enum, Flags.Bits.Enum); +#include "llvm/BinaryFormat/DXContainerConstants.def" +} + void MappingTraits::mapping( IO &IO, DXContainerYAML::ResourceBindInfo &Res) { IO.mapRequired("Type", Res.Type); @@ -266,12 +272,6 @@ void ScalarEnumerationTraits::enumeration( IO.enumCase(Value, E.Name.str().c_str(), E.Value); } -void ScalarEnumerationTraits::enumeration( - IO &IO, dxbc::PSV::ResourceFlag &Value) { - for (const auto &E : dxbc::PSV::getResourceFlags()) - IO.enumCase(Value, E.Name.str().c_str(), E.Value); -} - void ScalarEnumerationTraits::enumeration( IO &IO, dxbc::D3DSystemValue &Value) { for (const auto &E : dxbc::getD3DSystemValues()) diff --git a/llvm/lib/SandboxIR/SandboxIR.cpp b/llvm/lib/SandboxIR/SandboxIR.cpp index bf224b73f3bad2..6bdc580f751d18 100644 --- a/llvm/lib/SandboxIR/SandboxIR.cpp +++ b/llvm/lib/SandboxIR/SandboxIR.cpp @@ -2248,6 +2248,54 @@ ConstantInt *ConstantInt::get(Type *Ty, uint64_t V, bool IsSigned) { return cast(Ty->getContext().getOrCreateConstant(LLVMC)); } +Constant *ConstantFP::get(Type *Ty, double V) { + auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} + +Constant *ConstantFP::get(Type *Ty, const APFloat &V) { + auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, V); + return Ty->getContext().getOrCreateConstant(LLVMC); +} + +Constant *ConstantFP::get(Type *Ty, StringRef Str) { + auto *LLVMC = llvm::ConstantFP::get(Ty->LLVMTy, Str); + return Ty->getContext().getOrCreateConstant(LLVMC); +} + +ConstantFP *ConstantFP::get(const APFloat &V, Context &Ctx) { + auto *LLVMC = llvm::ConstantFP::get(Ctx.LLVMCtx, V); + return cast(Ctx.getOrCreateConstant(LLVMC)); +} + +Constant *ConstantFP::getNaN(Type *Ty, bool Negative, uint64_t Payload) { + auto *LLVMC = llvm::ConstantFP::getNaN(Ty->LLVMTy, Negative, Payload); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getQNaN(Type *Ty, bool Negative, APInt *Payload) { + auto *LLVMC = llvm::ConstantFP::getQNaN(Ty->LLVMTy, Negative, Payload); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getSNaN(Type *Ty, bool Negative, APInt *Payload) { + auto *LLVMC = llvm::ConstantFP::getSNaN(Ty->LLVMTy, Negative, Payload); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getZero(Type *Ty, bool Negative) { + auto *LLVMC = llvm::ConstantFP::getZero(Ty->LLVMTy, Negative); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getNegativeZero(Type *Ty) { + auto *LLVMC = llvm::ConstantFP::getNegativeZero(Ty->LLVMTy); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) { + auto *LLVMC = llvm::ConstantFP::getInfinity(Ty->LLVMTy, Negative); + return cast(Ty->getContext().getOrCreateConstant(LLVMC)); +} +bool ConstantFP::isValueValidForType(Type *Ty, const APFloat &V) { + return llvm::ConstantFP::isValueValidForType(Ty->LLVMTy, V); +} + FunctionType *Function::getFunctionType() const { return cast( Ctx.getType(cast(Val)->getFunctionType())); @@ -2339,6 +2387,10 @@ Value *Context::getOrCreateValueInternal(llvm::Value *LLVMV, llvm::User *U) { It->second = std::unique_ptr(new ConstantInt(CI, *this)); return It->second.get(); } + if (auto *CF = dyn_cast(C)) { + It->second = std::unique_ptr(new ConstantFP(CF, *this)); + return It->second.get(); + } if (auto *F = dyn_cast(LLVMV)) It->second = std::unique_ptr(new Function(F, *this)); else diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 3296f63a9b8876..11aca69db0a148 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11463,7 +11463,9 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, // movw+movk is fused). So we limit up to 2 instrdduction at most. SmallVector Insn; AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), Insn); - unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2)); + assert(Insn.size() <= 4 && + "Should be able to build any value with at most 4 moves"); + unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 4 : 2)); IsLegal = Insn.size() <= Limit; } @@ -19852,7 +19854,6 @@ static SDValue performConcatVectorsCombine(SDNode *N, // This optimization reduces instruction count. if (N00Opc == AArch64ISD::VLSHR && N10Opc == AArch64ISD::VLSHR && N00->getOperand(1) == N10->getOperand(1)) { - SDValue N000 = N00->getOperand(0); SDValue N100 = N10->getOperand(0); uint64_t N001ConstVal = N00->getConstantOperandVal(1), @@ -19860,7 +19861,8 @@ static SDValue performConcatVectorsCombine(SDNode *N, NScalarSize = N->getValueType(0).getScalarSizeInBits(); if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) { - + N000 = DAG.getNode(AArch64ISD::NVCAST, dl, VT, N000); + N100 = DAG.getNode(AArch64ISD::NVCAST, dl, VT, N100); SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, dl, VT, N000, N100); SDValue NewShiftConstant = DAG.getConstant(N001ConstVal - NScalarSize, dl, MVT::i32); @@ -29344,8 +29346,10 @@ void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const { assert(OpVT.getSizeInBits() == VT.getSizeInBits() && "Expected vectors of equal size!"); // TODO: Enable assert once bogus creations have been fixed. - // assert(OpVT.getVectorElementCount() == VT.getVectorElementCount()*2 && - // "Expected result vector with half the lanes of its input!"); + if (VT.isScalableVector()) + break; + assert(OpVT.getVectorElementCount() == VT.getVectorElementCount() * 2 && + "Expected result vector with half the lanes of its input!"); break; } case AArch64ISD::TRN1: @@ -29362,7 +29366,9 @@ void AArch64TargetLowering::verifyTargetSDNode(const SDNode *N) const { assert(VT.isVector() && Op0VT.isVector() && Op1VT.isVector() && "Expected vectors!"); // TODO: Enable assert once bogus creations have been fixed. - // assert(VT == Op0VT && VT == Op1VT && "Expected matching vectors!"); + if (VT.isScalableVector()) + break; + assert(VT == Op0VT && VT == Op1VT && "Expected matching vectors!"); break; } } diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index bd5684a287381a..9f96f6c5e83ec4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -98,7 +98,7 @@ static cl::opt EnableCollectLOH( static cl::opt EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden, cl::desc("Enable the pass that removes dead" - " definitons and replaces stores to" + " definitions and replaces stores to" " them with stores to the zero" " register"), cl::init(true)); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 37add682b150e7..34c0fad45fc499 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -6947,10 +6947,14 @@ static void ExpandCryptoAEK(const AArch64::ArchInfo &ArchInfo, } } +static SMLoc incrementLoc(SMLoc L, int Offset) { + return SMLoc::getFromPointer(L.getPointer() + Offset); +} + /// parseDirectiveArch /// ::= .arch token bool AArch64AsmParser::parseDirectiveArch(SMLoc L) { - SMLoc ArchLoc = getLoc(); + SMLoc CurLoc = getLoc(); StringRef Arch, ExtensionString; std::tie(Arch, ExtensionString) = @@ -6958,7 +6962,7 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) { const AArch64::ArchInfo *ArchInfo = AArch64::parseArch(Arch); if (!ArchInfo) - return Error(ArchLoc, "unknown arch name"); + return Error(CurLoc, "unknown arch name"); if (parseToken(AsmToken::EndOfStatement)) return true; @@ -6978,27 +6982,30 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) { ExtensionString.split(RequestedExtensions, '+'); ExpandCryptoAEK(*ArchInfo, RequestedExtensions); + CurLoc = incrementLoc(CurLoc, Arch.size()); - FeatureBitset Features = STI.getFeatureBits(); - setAvailableFeatures(ComputeAvailableFeatures(Features)); for (auto Name : RequestedExtensions) { + // Advance source location past '+'. + CurLoc = incrementLoc(CurLoc, 1); + bool EnableFeature = !Name.consume_front_insensitive("no"); - for (const auto &Extension : ExtensionMap) { - if (Extension.Name != Name) - continue; + auto It = llvm::find_if(ExtensionMap, [&Name](const auto &Extension) { + return Extension.Name == Name; + }); - if (Extension.Features.none()) - report_fatal_error("unsupported architectural extension: " + Name); + if (It == std::end(ExtensionMap)) + Error(CurLoc, "unsupported architectural extension: " + Name); - FeatureBitset ToggleFeatures = - EnableFeature - ? STI.SetFeatureBitsTransitively(~Features & Extension.Features) - : STI.ToggleFeature(Features & Extension.Features); - setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures)); - break; - } + if (EnableFeature) + STI.SetFeatureBitsTransitively(It->Features); + else + STI.ClearFeatureBitsTransitively(It->Features); + + CurLoc = incrementLoc(CurLoc, Name.size()); } + FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits()); + setAvailableFeatures(Features); return false; } @@ -7018,28 +7025,21 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) { Name = Name.substr(2); } - MCSubtargetInfo &STI = copySTI(); - FeatureBitset Features = STI.getFeatureBits(); - for (const auto &Extension : ExtensionMap) { - if (Extension.Name != Name) - continue; - - if (Extension.Features.none()) - return Error(ExtLoc, "unsupported architectural extension: " + Name); - - FeatureBitset ToggleFeatures = - EnableFeature - ? STI.SetFeatureBitsTransitively(~Features & Extension.Features) - : STI.ToggleFeature(Features & Extension.Features); - setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures)); - return false; - } + auto It = llvm::find_if(ExtensionMap, [&Name](const auto &Extension) { + return Extension.Name == Name; + }); - return Error(ExtLoc, "unknown architectural extension: " + Name); -} + if (It == std::end(ExtensionMap)) + return Error(ExtLoc, "unsupported architectural extension: " + Name); -static SMLoc incrementLoc(SMLoc L, int Offset) { - return SMLoc::getFromPointer(L.getPointer() + Offset); + MCSubtargetInfo &STI = copySTI(); + if (EnableFeature) + STI.SetFeatureBitsTransitively(It->Features); + else + STI.ClearFeatureBitsTransitively(It->Features); + FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits()); + setAvailableFeatures(Features); + return false; } /// parseDirectiveCPU @@ -7075,30 +7075,22 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) { bool EnableFeature = !Name.consume_front_insensitive("no"); - bool FoundExtension = false; - for (const auto &Extension : ExtensionMap) { - if (Extension.Name != Name) - continue; - - if (Extension.Features.none()) - report_fatal_error("unsupported architectural extension: " + Name); - - FeatureBitset Features = STI.getFeatureBits(); - FeatureBitset ToggleFeatures = - EnableFeature - ? STI.SetFeatureBitsTransitively(~Features & Extension.Features) - : STI.ToggleFeature(Features & Extension.Features); - setAvailableFeatures(ComputeAvailableFeatures(ToggleFeatures)); - FoundExtension = true; + auto It = llvm::find_if(ExtensionMap, [&Name](const auto &Extension) { + return Extension.Name == Name; + }); - break; - } + if (It == std::end(ExtensionMap)) + Error(CurLoc, "unsupported architectural extension: " + Name); - if (!FoundExtension) - Error(CurLoc, "unsupported architectural extension"); + if (EnableFeature) + STI.SetFeatureBitsTransitively(It->Features); + else + STI.ClearFeatureBitsTransitively(It->Features); CurLoc = incrementLoc(CurLoc, Name.size()); } + FeatureBitset Features = ComputeAvailableFeatures(STI.getFeatureBits()); + setAvailableFeatures(Features); return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp index a5807a70582b39..df084cf41c4783 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp @@ -7,36 +7,33 @@ //===----------------------------------------------------------------------===// // /// \file Implements a module splitting algorithm designed to support the -/// FullLTO --lto-partitions option for parallel codegen. +/// FullLTO --lto-partitions option for parallel codegen. This is completely +/// different from the common SplitModule pass, as this system is designed with +/// AMDGPU in mind. /// -/// The role of this module splitting pass is the same as -/// lib/Transforms/Utils/SplitModule.cpp: load-balance the module's functions -/// across a set of N partitions to allow for parallel codegen. +/// The basic idea of this module splitting implementation is the same as +/// SplitModule: load-balance the module's functions across a set of N +/// partitions to allow parallel codegen. However, it does it very +/// differently than the target-agnostic variant: +/// - The module has "split roots", which are kernels in the vast +// majority of cases. +/// - Each root has a set of dependencies, and when a root and its +/// dependencies is considered "big", we try to put it in a partition where +/// most dependencies are already imported, to avoid duplicating large +/// amounts of code. +/// - There's special care for indirect calls in order to ensure +/// AMDGPUResourceUsageAnalysis can work correctly. /// -/// The similarities mostly end here, as this pass achieves load-balancing in a -/// more elaborate fashion which is targeted towards AMDGPU modules. It can take -/// advantage of the structure of AMDGPU modules (which are mostly -/// self-contained) to allow for more efficient splitting without affecting -/// codegen negatively, or causing innaccurate resource usage analysis. -/// -/// High-level pass overview: -/// - SplitGraph & associated classes -/// - Graph representation of the module and of the dependencies that -/// matter for splitting. -/// - RecursiveSearchSplitting -/// - Core splitting algorithm. -/// - SplitProposal -/// - Represents a suggested solution for splitting the input module. These -/// solutions can be scored to determine the best one when multiple -/// solutions are available. -/// - Driver/pass "run" function glues everything together. +/// This file also includes a more elaborate logging system to enable +/// users to easily generate logs that (if desired) do not include any value +/// names, in order to not leak information about the source file. +/// Such logs are very helpful to understand and fix potential issues with +/// module splitting. #include "AMDGPUSplitModule.h" #include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -47,56 +44,44 @@ #include "llvm/IR/Module.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/GraphWriter.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Timer.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/SHA256.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" #include #include -#include #include #include #include #include -#ifndef NDEBUG -#include "llvm/Support/LockFileManager.h" -#endif +using namespace llvm; #define DEBUG_TYPE "amdgpu-split-module" -namespace llvm { namespace { -static cl::opt MaxDepth( - "amdgpu-module-splitting-max-depth", - cl::desc( - "maximum search depth. 0 forces a greedy approach. " - "warning: the algorithm is up to O(2^N), where N is the max depth."), - cl::init(8)); - static cl::opt LargeFnFactor( - "amdgpu-module-splitting-large-threshold", cl::init(2.0f), cl::Hidden, + "amdgpu-module-splitting-large-function-threshold", cl::init(2.0f), + cl::Hidden, cl::desc( - "when max depth is reached and we can no longer branch out, this " - "value determines if a function is worth merging into an already " - "existing partition to reduce code duplication. This is a factor " - "of the ideal partition size, e.g. 2.0 means we consider the " - "function for merging if its cost (including its callees) is 2x the " - "size of an ideal partition.")); + "consider a function as large and needing special treatment when the " + "cost of importing it into a partition" + "exceeds the average cost of a partition by this factor; e;g. 2.0 " + "means if the function and its dependencies is 2 times bigger than " + "an average partition; 0 disables large functions handling entirely")); static cl::opt LargeFnOverlapForMerge( - "amdgpu-module-splitting-merge-threshold", cl::init(0.7f), cl::Hidden, - cl::desc("when a function is considered for merging into a partition that " - "already contains some of its callees, do the merge if at least " - "n% of the code it can reach is already present inside the " - "partition; e.g. 0.7 means only merge >70%")); + "amdgpu-module-splitting-large-function-merge-overlap", cl::init(0.8f), + cl::Hidden, + cl::desc( + "defines how much overlap between two large function's dependencies " + "is needed to put them in the same partition")); static cl::opt NoExternalizeGlobals( "amdgpu-module-splitting-no-externalize-globals", cl::Hidden, @@ -104,92 +89,142 @@ static cl::opt NoExternalizeGlobals( "may cause globals to be duplicated which increases binary size")); static cl::opt - ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg", - cl::Hidden, - cl::desc("output file to write out the dotgraph " - "representation of the input module")); + LogDirOpt("amdgpu-module-splitting-log-dir", cl::Hidden, + cl::desc("output directory for AMDGPU module splitting logs")); -static cl::opt PartitionSummariesOutput( - "amdgpu-module-splitting-print-partition-summaries", cl::Hidden, - cl::desc("output file to write out a summary of " - "the partitions created for each module")); - -#ifndef NDEBUG static cl::opt - UseLockFile("amdgpu-module-splitting-serial-execution", cl::Hidden, - cl::desc("use a lock file so only one process in the system " - "can run this pass at once. useful to avoid mangled " - "debug output in multithreaded environments.")); + LogPrivate("amdgpu-module-splitting-log-private", cl::Hidden, + cl::desc("hash value names before printing them in the AMDGPU " + "module splitting logs")); -static cl::opt - DebugProposalSearch("amdgpu-module-splitting-debug-proposal-search", - cl::Hidden, - cl::desc("print all proposals received and whether " - "they were rejected or accepted")); -#endif +using CostType = InstructionCost::CostType; +using PartitionID = unsigned; +using GetTTIFn = function_ref; -struct SplitModuleTimer : NamedRegionTimer { - SplitModuleTimer(StringRef Name, StringRef Desc) - : NamedRegionTimer(Name, Desc, DEBUG_TYPE, "AMDGPU Module Splitting", - TimePassesIsEnabled) {} -}; +static bool isEntryPoint(const Function *F) { + return AMDGPU::isEntryFunctionCC(F->getCallingConv()); +} -//===----------------------------------------------------------------------===// -// Utils -//===----------------------------------------------------------------------===// +static std::string getName(const Value &V) { + static bool HideNames; -using CostType = InstructionCost::CostType; -using FunctionsCostMap = DenseMap; -using GetTTIFn = function_ref; -static constexpr unsigned InvalidPID = -1; + static llvm::once_flag HideNameInitFlag; + llvm::call_once(HideNameInitFlag, [&]() { + if (LogPrivate.getNumOccurrences()) + HideNames = LogPrivate; + else { + const auto EV = sys::Process::GetEnv("AMD_SPLIT_MODULE_LOG_PRIVATE"); + HideNames = (EV.value_or("0") != "0"); + } + }); -/// \param Num numerator -/// \param Dem denominator -/// \returns a printable object to print (Num/Dem) using "%0.2f". -static auto formatRatioOf(CostType Num, CostType Dem) { - return format("%0.2f", (static_cast(Num) / Dem) * 100); + if (!HideNames) + return V.getName().str(); + return toHex(SHA256::hash(arrayRefFromStringRef(V.getName())), + /*LowerCase=*/true); } -/// Checks whether a given function is non-copyable. +/// Main logging helper. /// -/// Non-copyable functions cannot be cloned into multiple partitions, and only -/// one copy of the function can be present across all partitions. +/// Logging can be configured by the following environment variable. +/// AMD_SPLIT_MODULE_LOG_DIR= +/// If set, uses as the directory to write logfiles to +/// each time module splitting is used. +/// AMD_SPLIT_MODULE_LOG_PRIVATE +/// If set to anything other than zero, all names are hidden. /// -/// External functions fall into this category. If we were to clone them, we -/// would end up with multiple symbol definitions and a very unhappy linker. -static bool isNonCopyable(const Function &F) { - assert(AMDGPU::isEntryFunctionCC(F.getCallingConv()) - ? F.hasExternalLinkage() - : true && "Kernel w/o external linkage?"); - return F.hasExternalLinkage() || !F.isDefinitionExact(); -} +/// Both environment variables have corresponding CL options which +/// takes priority over them. +/// +/// Any output printed to the log files is also printed to dbgs() when -debug is +/// used and LLVM_DEBUG is defined. +/// +/// This approach has a small disadvantage over LLVM_DEBUG though: logging logic +/// cannot be removed from the code (by building without debug). This probably +/// has a small performance cost because if some computation/formatting is +/// needed for logging purpose, it may be done everytime only to be ignored +/// by the logger. +/// +/// As this pass only runs once and is not doing anything computationally +/// expensive, this is likely a reasonable trade-off. +/// +/// If some computation should really be avoided when unused, users of the class +/// can check whether any logging will occur by using the bool operator. +/// +/// \code +/// if (SML) { +/// // Executes only if logging to a file or if -debug is available and +/// used. +/// } +/// \endcode +class SplitModuleLogger { +public: + SplitModuleLogger(const Module &M) { + std::string LogDir = LogDirOpt; + if (LogDir.empty()) + LogDir = sys::Process::GetEnv("AMD_SPLIT_MODULE_LOG_DIR").value_or(""); + + // No log dir specified means we don't need to log to a file. + // We may still log to dbgs(), though. + if (LogDir.empty()) + return; + + // If a log directory is specified, create a new file with a unique name in + // that directory. + int Fd; + SmallString<0> PathTemplate; + SmallString<0> RealPath; + sys::path::append(PathTemplate, LogDir, "Module-%%-%%-%%-%%-%%-%%-%%.txt"); + if (auto Err = + sys::fs::createUniqueFile(PathTemplate.str(), Fd, RealPath)) { + report_fatal_error("Failed to create log file at '" + Twine(LogDir) + + "': " + Err.message(), + /*CrashDiag=*/false); + } -/// If \p GV has local linkage, make it external + hidden. -static void externalize(GlobalValue &GV) { - if (GV.hasLocalLinkage()) { - GV.setLinkage(GlobalValue::ExternalLinkage); - GV.setVisibility(GlobalValue::HiddenVisibility); + FileOS = std::make_unique(Fd, /*shouldClose=*/true); } - // Unnamed entities must be named consistently between modules. setName will - // give a distinct name to each such entity. - if (!GV.hasName()) - GV.setName("__llvmsplit_unnamed"); + bool hasLogFile() const { return FileOS != nullptr; } + + raw_ostream &logfile() { + assert(FileOS && "no logfile!"); + return *FileOS; + } + + /// \returns true if this SML will log anything either to a file or dbgs(). + /// Can be used to avoid expensive computations that are ignored when logging + /// is disabled. + operator bool() const { + return hasLogFile() || (DebugFlag && isCurrentDebugType(DEBUG_TYPE)); + } + +private: + std::unique_ptr FileOS; +}; + +template +static SplitModuleLogger &operator<<(SplitModuleLogger &SML, const Ty &Val) { + static_assert( + !std::is_same_v, + "do not print values to logs directly, use handleName instead!"); + LLVM_DEBUG(dbgs() << Val); + if (SML.hasLogFile()) + SML.logfile() << Val; + return SML; } -/// Cost analysis function. Calculates the cost of each function in \p M -/// +/// Calculate the cost of each function in \p M +/// \param SML Log Helper /// \param GetTTI Abstract getter for TargetTransformInfo. /// \param M Module to analyze. /// \param CostMap[out] Resulting Function -> Cost map. /// \return The module's total cost. -static CostType calculateFunctionCosts(GetTTIFn GetTTI, Module &M, - FunctionsCostMap &CostMap) { - SplitModuleTimer SMT("calculateFunctionCosts", "cost analysis"); - - LLVM_DEBUG(dbgs() << "[cost analysis] calculating function costs\n"); +static CostType +calculateFunctionCosts(SplitModuleLogger &SML, GetTTIFn GetTTI, Module &M, + DenseMap &CostMap) { CostType ModuleCost = 0; - [[maybe_unused]] CostType KernelCost = 0; + CostType KernelCost = 0; for (auto &Fn : M) { if (Fn.isDeclaration()) @@ -216,30 +251,23 @@ static CostType calculateFunctionCosts(GetTTIFn GetTTI, Module &M, assert((ModuleCost + FnCost) >= ModuleCost && "Overflow!"); ModuleCost += FnCost; - if (AMDGPU::isEntryFunctionCC(Fn.getCallingConv())) + if (isEntryPoint(&Fn)) KernelCost += FnCost; } - if (CostMap.empty()) - return 0; - - assert(ModuleCost); - LLVM_DEBUG({ - const CostType FnCost = ModuleCost - KernelCost; - dbgs() << " - total module cost is " << ModuleCost << ". kernels cost " - << "" << KernelCost << " (" - << format("%0.2f", (float(KernelCost) / ModuleCost) * 100) - << "% of the module), functions cost " << FnCost << " (" - << format("%0.2f", (float(FnCost) / ModuleCost) * 100) - << "% of the module)\n"; - }); + CostType FnCost = (ModuleCost - KernelCost); + CostType ModuleCostOr1 = ModuleCost ? ModuleCost : 1; + SML << "=> Total Module Cost: " << ModuleCost << '\n' + << " => KernelCost: " << KernelCost << " (" + << format("%0.2f", (float(KernelCost) / ModuleCostOr1) * 100) << "%)\n" + << " => FnsCost: " << FnCost << " (" + << format("%0.2f", (float(FnCost) / ModuleCostOr1) * 100) << "%)\n"; return ModuleCost; } -/// \return true if \p F can be indirectly called static bool canBeIndirectlyCalled(const Function &F) { - if (F.isDeclaration() || AMDGPU::isEntryFunctionCC(F.getCallingConv())) + if (F.isDeclaration() || isEntryPoint(&F)) return false; return !F.hasLocalLinkage() || F.hasAddressTaken(/*PutOffender=*/nullptr, @@ -250,1081 +278,351 @@ static bool canBeIndirectlyCalled(const Function &F) { /*IgnoreCastedDirectCall=*/true); } -//===----------------------------------------------------------------------===// -// Graph-based Module Representation -//===----------------------------------------------------------------------===// - -/// AMDGPUSplitModule's view of the source Module, as a graph of all components -/// that can be split into different modules. -/// -/// The most trivial instance of this graph is just the CallGraph of the module, -/// but it is not guaranteed that the graph is strictly equal to the CG. It -/// currently always is but it's designed in a way that would eventually allow -/// us to create abstract nodes, or nodes for different entities such as global -/// variables or any other meaningful constraint we must consider. +/// When a function or any of its callees performs an indirect call, this +/// takes over \ref addAllDependencies and adds all potentially callable +/// functions to \p Fns so they can be counted as dependencies of the function. /// -/// The graph is only mutable by this class, and is generally not modified -/// after \ref SplitGraph::buildGraph runs. No consumers of the graph can -/// mutate it. -class SplitGraph { -public: - class Node; - - enum class EdgeKind : uint8_t { - /// The nodes are related through a direct call. This is a "strong" edge as - /// it means the Src will directly reference the Dst. - DirectCall, - /// The nodes are related through an indirect call. - /// This is a "weaker" edge and is only considered when traversing the graph - /// starting from a kernel. We need this edge for resource usage analysis. - /// - /// The reason why we have this edge in the first place is due to how - /// AMDGPUResourceUsageAnalysis works. In the presence of an indirect call, - /// the resource usage of the kernel containing the indirect call is the - /// max resource usage of all functions that can be indirectly called. - IndirectCall, - }; - - /// An edge between two nodes. Edges are directional, and tagged with a - /// "kind". - struct Edge { - Edge(Node *Src, Node *Dst, EdgeKind Kind) - : Src(Src), Dst(Dst), Kind(Kind) {} - - Node *Src; ///< Source - Node *Dst; ///< Destination - EdgeKind Kind; - }; - - using EdgesVec = SmallVector; - using edges_iterator = EdgesVec::const_iterator; - using nodes_iterator = const Node *const *; - - SplitGraph(const Module &M, const FunctionsCostMap &CostMap, - CostType ModuleCost) - : M(M), CostMap(CostMap), ModuleCost(ModuleCost) {} - - void buildGraph(CallGraph &CG); - -#ifndef NDEBUG - bool verifyGraph() const; -#endif - - bool empty() const { return Nodes.empty(); } - const iterator_range nodes() const { - return {Nodes.begin(), Nodes.end()}; +/// This is needed due to how AMDGPUResourceUsageAnalysis operates: in the +/// presence of an indirect call, the function's resource usage is the same as +/// the most expensive function in the module. +/// \param M The module. +/// \param Fns[out] Resulting list of functions. +static void addAllIndirectCallDependencies(const Module &M, + DenseSet &Fns) { + for (const auto &Fn : M) { + if (canBeIndirectlyCalled(Fn)) + Fns.insert(&Fn); } - const Node &getNode(unsigned ID) const { return *Nodes[ID]; } - - unsigned getNumNodes() const { return Nodes.size(); } - BitVector createNodesBitVector() const { return BitVector(Nodes.size()); } - - const Module &getModule() const { return M; } - - CostType getModuleCost() const { return ModuleCost; } - CostType getCost(const Function &F) const { return CostMap.at(&F); } - - /// \returns the aggregated cost of all nodes in \p BV (bits set to 1 = node - /// IDs). - CostType calculateCost(const BitVector &BV) const; - -private: - /// Retrieves the node for \p GV in \p Cache, or creates a new node for it and - /// updates \p Cache. - Node &getNode(DenseMap &Cache, - const GlobalValue &GV); - - // Create a new edge between two nodes and add it to both nodes. - const Edge &createEdge(Node &Src, Node &Dst, EdgeKind EK); - - const Module &M; - const FunctionsCostMap &CostMap; - CostType ModuleCost; - - // Final list of nodes with stable ordering. - SmallVector Nodes; - - SpecificBumpPtrAllocator NodesPool; - - // Edges are trivially destructible objects, so as a small optimization we - // use a BumpPtrAllocator which avoids destructor calls but also makes - // allocation faster. - static_assert( - std::is_trivially_destructible_v, - "Edge must be trivially destructible to use the BumpPtrAllocator"); - BumpPtrAllocator EdgesPool; -}; +} -/// Nodes in the SplitGraph contain both incoming, and outgoing edges. -/// Incoming edges have this node as their Dst, and Outgoing ones have this node -/// as their Src. +/// Adds the functions that \p Fn may call to \p Fns, then recurses into each +/// callee until all reachable functions have been gathered. /// -/// Edge objects are shared by both nodes in Src/Dst. They provide immediate -/// feedback on how two nodes are related, and in which direction they are -/// related, which is valuable information to make splitting decisions. -/// -/// Nodes are fundamentally abstract, and any consumers of the graph should -/// treat them as such. While a node will be a function most of the time, we -/// could also create nodes for any other reason. In the future, we could have -/// single nodes for multiple functions, or nodes for GVs, etc. -class SplitGraph::Node { - friend class SplitGraph; - -public: - Node(unsigned ID, const GlobalValue &GV, CostType IndividualCost, - bool IsNonCopyable) - : ID(ID), GV(GV), IndividualCost(IndividualCost), - IsNonCopyable(IsNonCopyable), IsEntryFnCC(false), IsGraphEntry(false) { - if (auto *Fn = dyn_cast(&GV)) - IsEntryFnCC = AMDGPU::isEntryFunctionCC(Fn->getCallingConv()); - } - - /// An 0-indexed ID for the node. The maximum ID (exclusive) is the number of - /// nodes in the graph. This ID can be used as an index in a BitVector. - unsigned getID() const { return ID; } - - const Function &getFunction() const { return cast(GV); } - - /// \returns the cost to import this component into a given module, not - /// accounting for any dependencies that may need to be imported as well. - CostType getIndividualCost() const { return IndividualCost; } - - bool isNonCopyable() const { return IsNonCopyable; } - bool isEntryFunctionCC() const { return IsEntryFnCC; } - - /// \returns whether this is an entry point in the graph. Entry points are - /// defined as follows: if you take all entry points in the graph, and iterate - /// their dependencies, you are guaranteed to visit all nodes in the graph at - /// least once. - bool isGraphEntryPoint() const { return IsGraphEntry; } - - StringRef getName() const { return GV.getName(); } - - bool hasAnyIncomingEdges() const { return IncomingEdges.size(); } - bool hasAnyIncomingEdgesOfKind(EdgeKind EK) const { - return any_of(IncomingEdges, [&](const auto *E) { return E->Kind == EK; }); - } - - bool hasAnyOutgoingEdges() const { return OutgoingEdges.size(); } - bool hasAnyOutgoingEdgesOfKind(EdgeKind EK) const { - return any_of(OutgoingEdges, [&](const auto *E) { return E->Kind == EK; }); - } - - iterator_range incoming_edges() const { - return IncomingEdges; - } - - iterator_range outgoing_edges() const { - return OutgoingEdges; - } - - bool shouldFollowIndirectCalls() const { return isEntryFunctionCC(); } - - /// Visit all children of this node in a recursive fashion. Also visits Self. - /// If \ref shouldFollowIndirectCalls returns false, then this only follows - /// DirectCall edges. - /// - /// \param Visitor Visitor Function. - void visitAllDependencies(std::function Visitor) const; - - /// Adds the depedencies of this node in \p BV by setting the bit - /// corresponding to each node. - /// - /// Implemented using \ref visitAllDependencies, hence it follows the same - /// rules regarding dependencies traversal. - /// - /// \param[out] BV The bitvector where the bits should be set. - void getDependencies(BitVector &BV) const { - visitAllDependencies([&](const Node &N) { BV.set(N.getID()); }); - } - - /// Uses \ref visitAllDependencies to aggregate the individual cost of this - /// node and all of its dependencies. - /// - /// This is cached. - CostType getFullCost() const; - -private: - void markAsGraphEntry() { IsGraphEntry = true; } - - unsigned ID; - const GlobalValue &GV; - CostType IndividualCost; - bool IsNonCopyable : 1; - bool IsEntryFnCC : 1; - bool IsGraphEntry : 1; - - // TODO: Cache dependencies as well? - mutable CostType FullCost = 0; - - // TODO: Use a single sorted vector (with all incoming/outgoing edges grouped - // together) - EdgesVec IncomingEdges; - EdgesVec OutgoingEdges; -}; - -void SplitGraph::Node::visitAllDependencies( - std::function Visitor) const { - const bool FollowIndirect = shouldFollowIndirectCalls(); - // FIXME: If this can access SplitGraph in the future, use a BitVector - // instead. - DenseSet Seen; - SmallVector WorkList({this}); +/// \param SML Log Helper +/// \param CG Call graph for \p Fn's module. +/// \param Fn Current function to look at. +/// \param Fns[out] Resulting list of functions. +/// \param OnlyDirect Whether to only consider direct callees. +/// \param HadIndirectCall[out] Set to true if an indirect call was seen at some +/// point, either in \p Fn or in one of the function it calls. When that +/// happens, we fall back to adding all callable functions inside \p Fn's module +/// to \p Fns. +static void addAllDependencies(SplitModuleLogger &SML, const CallGraph &CG, + const Function &Fn, + DenseSet &Fns, bool OnlyDirect, + bool &HadIndirectCall) { + assert(!Fn.isDeclaration()); + + const Module &M = *Fn.getParent(); + SmallVector WorkList({&Fn}); while (!WorkList.empty()) { - const Node *CurN = WorkList.pop_back_val(); - if (auto [It, Inserted] = Seen.insert(CurN); !Inserted) - continue; - - Visitor(*CurN); - - for (const Edge *E : CurN->outgoing_edges()) { - if (!FollowIndirect && E->Kind == EdgeKind::IndirectCall) - continue; - WorkList.push_back(E->Dst); - } - } -} - -CostType SplitGraph::Node::getFullCost() const { - if (FullCost) - return FullCost; - - assert(FullCost == 0); - visitAllDependencies( - [&](const Node &N) { FullCost += N.getIndividualCost(); }); - return FullCost; -} + const auto &CurFn = *WorkList.pop_back_val(); + assert(!CurFn.isDeclaration()); -void SplitGraph::buildGraph(CallGraph &CG) { - SplitModuleTimer SMT("buildGraph", "graph construction"); - LLVM_DEBUG( - dbgs() - << "[build graph] constructing graph representation of the input\n"); - - // We build the graph by just iterating all functions in the module and - // working on their direct callees. At the end, all nodes should be linked - // together as expected. - DenseMap Cache; - SmallVector FnsWithIndirectCalls, IndirectlyCallableFns; - for (const Function &Fn : M) { - if (Fn.isDeclaration()) - continue; + // Scan for an indirect call. If such a call is found, we have to + // conservatively assume this can call all non-entrypoint functions in the + // module. - // Look at direct callees and create the necessary edges in the graph. - bool HasIndirectCall = false; - Node &N = getNode(Cache, Fn); - for (auto &CGEntry : *CG[&Fn]) { + for (auto &CGEntry : *CG[&CurFn]) { auto *CGNode = CGEntry.second; auto *Callee = CGNode->getFunction(); if (!Callee) { - // TODO: Don't consider inline assembly as indirect calls. - if (CGNode == CG.getCallsExternalNode()) - HasIndirectCall = true; + if (OnlyDirect) + continue; + + // Functions have an edge towards CallsExternalNode if they're external + // declarations, or if they do an indirect call. As we only process + // definitions here, we know this means the function has an indirect + // call. We then have to conservatively assume this can call all + // non-entrypoint functions in the module. + if (CGNode != CG.getCallsExternalNode()) + continue; // this is another function-less node we don't care about. + + SML << "Indirect call detected in " << getName(CurFn) + << " - treating all non-entrypoint functions as " + "potential dependencies\n"; + + // TODO: Print an ORE as well ? + addAllIndirectCallDependencies(M, Fns); + HadIndirectCall = true; continue; } - if (!Callee->isDeclaration()) - createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall); - } - - // Keep track of this function if it contains an indirect call and/or if it - // can be indirectly called. - if (HasIndirectCall) { - LLVM_DEBUG(dbgs() << "indirect call found in " << Fn.getName() << "\n"); - FnsWithIndirectCalls.push_back(&Fn); - } - - if (canBeIndirectlyCalled(Fn)) - IndirectlyCallableFns.push_back(&Fn); - } + if (Callee->isDeclaration()) + continue; - // Post-process functions with indirect calls. - for (const Function *Fn : FnsWithIndirectCalls) { - for (const Function *Candidate : IndirectlyCallableFns) { - Node &Src = getNode(Cache, *Fn); - Node &Dst = getNode(Cache, *Candidate); - createEdge(Src, Dst, EdgeKind::IndirectCall); + auto [It, Inserted] = Fns.insert(Callee); + if (Inserted) + WorkList.push_back(Callee); } } - - // Now, find all entry points. - SmallVector CandidateEntryPoints; - BitVector NodesReachableByKernels = createNodesBitVector(); - for (Node *N : Nodes) { - // Functions with an Entry CC are always graph entry points too. - if (N->isEntryFunctionCC()) { - N->markAsGraphEntry(); - N->getDependencies(NodesReachableByKernels); - } else if (!N->hasAnyIncomingEdgesOfKind(EdgeKind::DirectCall)) - CandidateEntryPoints.push_back(N); - } - - for (Node *N : CandidateEntryPoints) { - // This can be another entry point if it's not reachable by a kernel - // TODO: We could sort all of the possible new entries in a stable order - // (e.g. by cost), then consume them one by one until - // NodesReachableByKernels is all 1s. It'd allow us to avoid - // considering some nodes as non-entries in some specific cases. - if (!NodesReachableByKernels.test(N->getID())) - N->markAsGraphEntry(); - } - -#ifndef NDEBUG - assert(verifyGraph()); -#endif } -#ifndef NDEBUG -bool SplitGraph::verifyGraph() const { - unsigned ExpectedID = 0; - // Exceptionally using a set here in case IDs are messed up. - DenseSet SeenNodes; - DenseSet SeenFunctionNodes; - for (const Node *N : Nodes) { - if (N->getID() != (ExpectedID++)) { - errs() << "Node IDs are incorrect!\n"; - return false; - } - - if (!SeenNodes.insert(N).second) { - errs() << "Node seen more than once!\n"; - return false; - } - - if (&getNode(N->getID()) != N) { - errs() << "getNode doesn't return the right node\n"; - return false; - } - - for (const Edge *E : N->IncomingEdges) { - if (!E->Src || !E->Dst || (E->Dst != N) || - (find(E->Src->OutgoingEdges, E) == E->Src->OutgoingEdges.end())) { - errs() << "ill-formed incoming edges\n"; - return false; - } - } - - for (const Edge *E : N->OutgoingEdges) { - if (!E->Src || !E->Dst || (E->Src != N) || - (find(E->Dst->IncomingEdges, E) == E->Dst->IncomingEdges.end())) { - errs() << "ill-formed outgoing edges\n"; - return false; - } - } - - const Function &Fn = N->getFunction(); - if (AMDGPU::isEntryFunctionCC(Fn.getCallingConv())) { - if (N->hasAnyIncomingEdges()) { - errs() << "Kernels cannot have incoming edges\n"; - return false; - } - } - - if (Fn.isDeclaration()) { - errs() << "declarations shouldn't have nodes!\n"; - return false; - } - - auto [It, Inserted] = SeenFunctionNodes.insert(&Fn); - if (!Inserted) { - errs() << "one function has multiple nodes!\n"; - return false; +/// Contains information about a function and its dependencies. +/// This is a splitting root. The splitting algorithm works by +/// assigning these to partitions. +struct FunctionWithDependencies { + FunctionWithDependencies(SplitModuleLogger &SML, CallGraph &CG, + const DenseMap &FnCosts, + const Function *Fn) + : Fn(Fn) { + // When Fn is not a kernel, we don't need to collect indirect callees. + // Resource usage analysis is only performed on kernels, and we collect + // indirect callees for resource usage analysis. + addAllDependencies(SML, CG, *Fn, Dependencies, + /*OnlyDirect*/ !isEntryPoint(Fn), HasIndirectCall); + TotalCost = FnCosts.at(Fn); + for (const auto *Dep : Dependencies) { + TotalCost += FnCosts.at(Dep); + + // We cannot duplicate functions with external linkage, or functions that + // may be overriden at runtime. + HasNonDuplicatableDependecy |= + (Dep->hasExternalLinkage() || !Dep->isDefinitionExact()); } } - if (ExpectedID != Nodes.size()) { - errs() << "Node IDs out of sync!\n"; - return false; - } - - if (createNodesBitVector().size() != getNumNodes()) { - errs() << "nodes bit vector doesn't have the right size!\n"; - return false; - } - - // Check we respect the promise of Node::isKernel - BitVector BV = createNodesBitVector(); - for (const Node *N : nodes()) { - if (N->isGraphEntryPoint()) - N->getDependencies(BV); - } - - // Ensure each function in the module has an associated node. - for (const auto &Fn : M) { - if (!Fn.isDeclaration()) { - if (!SeenFunctionNodes.contains(&Fn)) { - errs() << "Fn has no associated node in the graph!\n"; - return false; - } - } - } - - if (!BV.all()) { - errs() << "not all nodes are reachable through the graph's entry points!\n"; - return false; - } - - return true; -} -#endif - -CostType SplitGraph::calculateCost(const BitVector &BV) const { - CostType Cost = 0; - for (unsigned NodeID : BV.set_bits()) - Cost += getNode(NodeID).getIndividualCost(); - return Cost; -} - -SplitGraph::Node & -SplitGraph::getNode(DenseMap &Cache, - const GlobalValue &GV) { - auto &N = Cache[&GV]; - if (N) - return *N; - - CostType Cost = 0; - bool NonCopyable = false; - if (const Function *Fn = dyn_cast(&GV)) { - NonCopyable = isNonCopyable(*Fn); - Cost = CostMap.at(Fn); - } - N = new (NodesPool.Allocate()) Node(Nodes.size(), GV, Cost, NonCopyable); - Nodes.push_back(N); - assert(&getNode(N->getID()) == N); - return *N; -} - -const SplitGraph::Edge &SplitGraph::createEdge(Node &Src, Node &Dst, - EdgeKind EK) { - const Edge *E = new (EdgesPool.Allocate(1)) Edge(&Src, &Dst, EK); - Src.OutgoingEdges.push_back(E); - Dst.IncomingEdges.push_back(E); - return *E; -} - -//===----------------------------------------------------------------------===// -// Split Proposals -//===----------------------------------------------------------------------===// - -/// Represents a module splitting proposal. -/// -/// Proposals are made of N BitVectors, one for each partition, where each bit -/// set indicates that the node is present and should be copied inside that -/// partition. -/// -/// Proposals have several metrics attached so they can be compared/sorted, -/// which the driver to try multiple strategies resultings in multiple proposals -/// and choose the best one out of them. -class SplitProposal { -public: - SplitProposal(const SplitGraph &SG, unsigned MaxPartitions) : SG(&SG) { - Partitions.resize(MaxPartitions, {0, SG.createNodesBitVector()}); - } + const Function *Fn = nullptr; + DenseSet Dependencies; + /// Whether \p Fn or any of its \ref Dependencies contains an indirect call. + bool HasIndirectCall = false; + /// Whether any of \p Fn's dependencies cannot be duplicated. + bool HasNonDuplicatableDependecy = false; - void setName(StringRef NewName) { Name = NewName; } - StringRef getName() const { return Name; } - - const BitVector &operator[](unsigned PID) const { - return Partitions[PID].second; - } - - void add(unsigned PID, const BitVector &BV) { - Partitions[PID].second |= BV; - updateScore(PID); - } - - void print(raw_ostream &OS) const; - LLVM_DUMP_METHOD void dump() const { print(dbgs()); } - - // Find the cheapest partition (lowest cost). In case of ties, always returns - // the highest partition number. - unsigned findCheapestPartition() const; - - /// Calculate the CodeSize and Bottleneck scores. - void calculateScores(); - -#ifndef NDEBUG - void verifyCompleteness() const; -#endif - - /// Only available after \ref calculateScores is called. - /// - /// A positive number indicating the % of code duplication that this proposal - /// creates. e.g. 0.2 means this proposal adds roughly 20% code size by - /// duplicating some functions across partitions. - /// - /// Value is always rounded up to 3 decimal places. - /// - /// A perfect score would be 0.0, and anything approaching 1.0 is very bad. - double getCodeSizeScore() const { return CodeSizeScore; } - - /// Only available after \ref calculateScores is called. - /// - /// A number between [0, 1] which indicates how big of a bottleneck is - /// expected from the largest partition. - /// - /// A score of 1.0 means the biggest partition is as big as the source module, - /// so build time will be equal to or greater than the build time of the - /// initial input. - /// - /// Value is always rounded up to 3 decimal places. - /// - /// This is one of the metrics used to estimate this proposal's build time. - double getBottleneckScore() const { return BottleneckScore; } - -private: - void updateScore(unsigned PID) { - assert(SG); - for (auto &[PCost, Nodes] : Partitions) { - TotalCost -= PCost; - PCost = SG->calculateCost(Nodes); - TotalCost += PCost; - } - } - - /// \see getCodeSizeScore - double CodeSizeScore = 0.0; - /// \see getBottleneckScore - double BottleneckScore = 0.0; - /// Aggregated cost of all partitions CostType TotalCost = 0; - const SplitGraph *SG = nullptr; - std::string Name; - - std::vector> Partitions; -}; - -void SplitProposal::print(raw_ostream &OS) const { - assert(SG); - - OS << "[proposal] " << Name << ", total cost:" << TotalCost - << ", code size score:" << format("%0.3f", CodeSizeScore) - << ", bottleneck score:" << format("%0.3f", BottleneckScore) << '\n'; - for (const auto &[PID, Part] : enumerate(Partitions)) { - const auto &[Cost, NodeIDs] = Part; - OS << " - P" << PID << " nodes:" << NodeIDs.count() << " cost: " << Cost - << '|' << formatRatioOf(Cost, SG->getModuleCost()) << "%\n"; - } -} - -unsigned SplitProposal::findCheapestPartition() const { - assert(!Partitions.empty()); - CostType CurCost = std::numeric_limits::max(); - unsigned CurPID = InvalidPID; - for (const auto &[Idx, Part] : enumerate(Partitions)) { - if (Part.first <= CurCost) { - CurPID = Idx; - CurCost = Part.first; - } - } - assert(CurPID != InvalidPID); - return CurPID; -} - -void SplitProposal::calculateScores() { - if (Partitions.empty()) - return; - - assert(SG); - CostType LargestPCost = 0; - for (auto &[PCost, Nodes] : Partitions) { - if (PCost > LargestPCost) - LargestPCost = PCost; + /// \returns true if this function and its dependencies can be considered + /// large according to \p Threshold. + bool isLarge(CostType Threshold) const { + return TotalCost > Threshold && !Dependencies.empty(); } - - CostType ModuleCost = SG->getModuleCost(); - CodeSizeScore = double(TotalCost) / ModuleCost; - assert(CodeSizeScore >= 0.0); - - BottleneckScore = double(LargestPCost) / ModuleCost; - - CodeSizeScore = std::ceil(CodeSizeScore * 100.0) / 100.0; - BottleneckScore = std::ceil(BottleneckScore * 100.0) / 100.0; -} - -#ifndef NDEBUG -void SplitProposal::verifyCompleteness() const { - if (Partitions.empty()) - return; - - BitVector Result = Partitions[0].second; - for (const auto &P : drop_begin(Partitions)) - Result |= P.second; - assert(Result.all() && "some nodes are missing from this proposal!"); -} -#endif - -//===-- RecursiveSearchStrategy -------------------------------------------===// - -/// Partitioning algorithm. -/// -/// This is a recursive search algorithm that can explore multiple possiblities. -/// -/// When a cluster of nodes can go into more than one partition, and we haven't -/// reached maximum search depth, we recurse and explore both options and their -/// consequences. Both branches will yield a proposal, and the driver will grade -/// both and choose the best one. -/// -/// If max depth is reached, we will use some heuristics to make a choice. Most -/// of the time we will just use the least-pressured (cheapest) partition, but -/// if a cluster is particularly big and there is a good amount of overlap with -/// an existing partition, we will choose that partition instead. -class RecursiveSearchSplitting { -public: - using SubmitProposalFn = function_ref; - - RecursiveSearchSplitting(const SplitGraph &SG, unsigned NumParts, - SubmitProposalFn SubmitProposal); - - void run(); - -private: - struct WorkListEntry { - WorkListEntry(const BitVector &BV) : Cluster(BV) {} - - unsigned NumNonEntryNodes = 0; - CostType TotalCost = 0; - CostType CostExcludingGraphEntryPoints = 0; - BitVector Cluster; - }; - - /// Collects all graph entry points's clusters and sort them so the most - /// expensive clusters are viewed first. This will merge clusters together if - /// they share a non-copyable dependency. - void setupWorkList(); - - /// Recursive function that assigns the worklist item at \p Idx into a - /// partition of \p SP. - /// - /// \p Depth is the current search depth. When this value is equal to - /// \ref MaxDepth, we can no longer recurse. - /// - /// This function only recurses if there is more than one possible assignment, - /// otherwise it is iterative to avoid creating a call stack that is as big as - /// \ref WorkList. - void pickPartition(unsigned Depth, unsigned Idx, SplitProposal SP); - - /// \return A pair: first element is the PID of the partition that has the - /// most similarities with \p Entry, or \ref InvalidPID if no partition was - /// found with at least one element in common. The second element is the - /// aggregated cost of all dependencies in common between \p Entry and that - /// partition. - std::pair - findMostSimilarPartition(const WorkListEntry &Entry, const SplitProposal &SP); - - const SplitGraph &SG; - unsigned NumParts; - SubmitProposalFn SubmitProposal; - - // A Cluster is considered large when its cost, excluding entry points, - // exceeds this value. - CostType LargeClusterThreshold = 0; - unsigned NumProposalsSubmitted = 0; - SmallVector WorkList; }; -RecursiveSearchSplitting::RecursiveSearchSplitting( - const SplitGraph &SG, unsigned NumParts, SubmitProposalFn SubmitProposal) - : SG(SG), NumParts(NumParts), SubmitProposal(SubmitProposal) { - // arbitrary max value as a safeguard. Anything above 10 will already be - // slow, this is just a max value to prevent extreme resource exhaustion or - // unbounded run time. - if (MaxDepth > 16) - report_fatal_error("[amdgpu-split-module] search depth of " + - Twine(MaxDepth) + " is too high!"); - LargeClusterThreshold = - (LargeFnFactor != 0.0) - ? CostType(((SG.getModuleCost() / NumParts) * LargeFnFactor)) - : std::numeric_limits::max(); - LLVM_DEBUG(dbgs() << "[recursive search] large cluster threshold set at " - << LargeClusterThreshold << "\n"); -} - -void RecursiveSearchSplitting::run() { - { - SplitModuleTimer SMT("recursive_search_prepare", "preparing worklist"); - setupWorkList(); +/// Calculates how much overlap there is between \p A and \p B. +/// \return A number between 0.0 and 1.0, where 1.0 means A == B and 0.0 means A +/// and B have no shared elements. Kernels do not count in overlap calculation. +static float calculateOverlap(const DenseSet &A, + const DenseSet &B) { + DenseSet Total; + for (const auto *F : A) { + if (!isEntryPoint(F)) + Total.insert(F); } - { - SplitModuleTimer SMT("recursive_search_pick", "partitioning"); - SplitProposal SP(SG, NumParts); - pickPartition(/*BranchDepth=*/0, /*Idx=*/0, SP); - } -} + if (Total.empty()) + return 0.0f; -void RecursiveSearchSplitting::setupWorkList() { - // e.g. if A and B are two worklist item, and they both call a non copyable - // dependency C, this does: - // A=C - // B=C - // => NodeEC will create a single group (A, B, C) and we create a new - // WorkList entry for that group. - - EquivalenceClasses NodeEC; - for (const SplitGraph::Node *N : SG.nodes()) { - if (!N->isGraphEntryPoint()) + unsigned NumCommon = 0; + for (const auto *F : B) { + if (isEntryPoint(F)) continue; - NodeEC.insert(N->getID()); - N->visitAllDependencies([&](const SplitGraph::Node &Dep) { - if (&Dep != N && Dep.isNonCopyable()) - NodeEC.unionSets(N->getID(), Dep.getID()); - }); + auto [It, Inserted] = Total.insert(F); + if (!Inserted) + ++NumCommon; } - for (auto I = NodeEC.begin(), E = NodeEC.end(); I != E; ++I) { - if (!I->isLeader()) - continue; + return static_cast(NumCommon) / Total.size(); +} - BitVector Cluster = SG.createNodesBitVector(); - for (auto MI = NodeEC.member_begin(I); MI != NodeEC.member_end(); ++MI) { - const SplitGraph::Node &N = SG.getNode(*MI); - if (N.isGraphEntryPoint()) - N.getDependencies(Cluster); - } - WorkList.emplace_back(std::move(Cluster)); - } +/// Performs all of the partitioning work on \p M. +/// \param SML Log Helper +/// \param M Module to partition. +/// \param NumParts Number of partitions to create. +/// \param ModuleCost Total cost of all functions in \p M. +/// \param FnCosts Map of Function -> Cost +/// \param WorkList Functions and their dependencies to process in order. +/// \returns The created partitions (a vector of size \p NumParts ) +static std::vector> +doPartitioning(SplitModuleLogger &SML, Module &M, unsigned NumParts, + CostType ModuleCost, + const DenseMap &FnCosts, + const SmallVector &WorkList) { + + SML << "\n--Partitioning Starts--\n"; + + // Calculate a "large function threshold". When more than one function's total + // import cost exceeds this value, we will try to assign it to an existing + // partition to reduce the amount of duplication needed. + // + // e.g. let two functions X and Y have a import cost of ~10% of the module, we + // assign X to a partition as usual, but when we get to Y, we check if it's + // worth also putting it in Y's partition. + const CostType LargeFnThreshold = + LargeFnFactor ? CostType(((ModuleCost / NumParts) * LargeFnFactor)) + : std::numeric_limits::max(); + + std::vector> Partitions; + Partitions.resize(NumParts); + + // Assign functions to partitions, and try to keep the partitions more or + // less balanced. We do that through a priority queue sorted in reverse, so we + // can always look at the partition with the least content. + // + // There are some cases where we will be deliberately unbalanced though. + // - Large functions: we try to merge with existing partitions to reduce code + // duplication. + // - Functions with indirect or external calls always go in the first + // partition (P0). + auto ComparePartitions = [](const std::pair &a, + const std::pair &b) { + // When two partitions have the same cost, assign to the one with the + // biggest ID first. This allows us to put things in P0 last, because P0 may + // have other stuff added later. + if (a.second == b.second) + return a.first < b.first; + return a.second > b.second; + }; - // Calculate costs and other useful information. - for (WorkListEntry &Entry : WorkList) { - for (unsigned NodeID : Entry.Cluster.set_bits()) { - const SplitGraph::Node &N = SG.getNode(NodeID); - const CostType Cost = N.getIndividualCost(); + // We can't use priority_queue here because we need to be able to access any + // element. This makes this a bit inefficient as we need to sort it again + // everytime we change it, but it's a very small array anyway (likely under 64 + // partitions) so it's a cheap operation. + std::vector> BalancingQueue; + for (unsigned I = 0; I < NumParts; ++I) + BalancingQueue.emplace_back(I, 0); + + // Helper function to handle assigning a function to a partition. This takes + // care of updating the balancing queue. + const auto AssignToPartition = [&](PartitionID PID, + const FunctionWithDependencies &FWD) { + auto &FnsInPart = Partitions[PID]; + FnsInPart.insert(FWD.Fn); + FnsInPart.insert(FWD.Dependencies.begin(), FWD.Dependencies.end()); + + SML << "assign " << getName(*FWD.Fn) << " to P" << PID << "\n -> "; + if (!FWD.Dependencies.empty()) { + SML << FWD.Dependencies.size() << " dependencies added\n"; + }; + + // Update the balancing queue. we scan backwards because in the common case + // the partition is at the end. + for (auto &[QueuePID, Cost] : reverse(BalancingQueue)) { + if (QueuePID == PID) { + CostType NewCost = 0; + for (auto *Fn : Partitions[PID]) + NewCost += FnCosts.at(Fn); + + SML << "[Updating P" << PID << " Cost]:" << Cost << " -> " << NewCost; + if (Cost) { + SML << " (" << unsigned(((float(NewCost) / Cost) - 1) * 100) + << "% increase)"; + } + SML << '\n'; - Entry.TotalCost += Cost; - if (!N.isGraphEntryPoint()) { - Entry.CostExcludingGraphEntryPoints += Cost; - ++Entry.NumNonEntryNodes; + Cost = NewCost; } } - } - sort(WorkList, [](const WorkListEntry &LHS, const WorkListEntry &RHS) { - return LHS.TotalCost > RHS.TotalCost; - }); - - LLVM_DEBUG({ - dbgs() << "[recursive search] worklist:\n"; - for (const auto &[Idx, Entry] : enumerate(WorkList)) { - dbgs() << " - [" << Idx << "]: "; - for (unsigned NodeID : Entry.Cluster.set_bits()) - dbgs() << NodeID << " "; - dbgs() << "(total_cost:" << Entry.TotalCost - << ", cost_excl_entries:" << Entry.CostExcludingGraphEntryPoints - << ")\n"; - } - }); -} + sort(BalancingQueue, ComparePartitions); + }; -void RecursiveSearchSplitting::pickPartition(unsigned Depth, unsigned Idx, - SplitProposal SP) { - while (Idx < WorkList.size()) { - // Step 1: Determine candidate PIDs. - // - const WorkListEntry &Entry = WorkList[Idx]; - const BitVector &Cluster = Entry.Cluster; - - // Default option is to do load-balancing, AKA assign to least pressured - // partition. - const unsigned CheapestPID = SP.findCheapestPartition(); - assert(CheapestPID != InvalidPID); - - // Explore assigning to the kernel that contains the most dependencies in - // common. - const auto [MostSimilarPID, SimilarDepsCost] = - findMostSimilarPartition(Entry, SP); - - // We can chose to explore only one path if we only have one valid path, or - // if we reached maximum search depth and can no longer branch out. - unsigned SinglePIDToTry = InvalidPID; - if (MostSimilarPID == InvalidPID) // no similar PID found - SinglePIDToTry = CheapestPID; - else if (MostSimilarPID == CheapestPID) // both landed on the same PID - SinglePIDToTry = CheapestPID; - else if (Depth >= MaxDepth) { - // We have to choose one path. Use a heuristic to guess which one will be - // more appropriate. - if (Entry.CostExcludingGraphEntryPoints > LargeClusterThreshold) { - // Check if the amount of code in common makes it worth it. - assert(SimilarDepsCost && Entry.CostExcludingGraphEntryPoints); - const double Ratio = - SimilarDepsCost / Entry.CostExcludingGraphEntryPoints; - assert(Ratio >= 0.0 && Ratio <= 1.0); - if (LargeFnOverlapForMerge > Ratio) { - // For debug, just print "L", so we'll see "L3=P3" for instance, which - // will mean we reached max depth and chose P3 based on this - // heuristic. - LLVM_DEBUG(dbgs() << 'L'); - SinglePIDToTry = MostSimilarPID; - } - } else - SinglePIDToTry = CheapestPID; + for (auto &CurFn : WorkList) { + // When a function has indirect calls, it must stay in the first partition + // alongside every reachable non-entry function. This is a nightmare case + // for splitting as it severely limits what we can do. + if (CurFn.HasIndirectCall) { + SML << "Function with indirect call(s): " << getName(*CurFn.Fn) + << " defaulting to P0\n"; + AssignToPartition(0, CurFn); + continue; } - // Step 2: Explore candidates. - - // When we only explore one possible path, and thus branch depth doesn't - // increase, do not recurse, iterate instead. - if (SinglePIDToTry != InvalidPID) { - LLVM_DEBUG(dbgs() << Idx << "=P" << SinglePIDToTry << ' '); - // Only one path to explore, don't clone SP, don't increase depth. - SP.add(SinglePIDToTry, Cluster); - ++Idx; + // When a function has non duplicatable dependencies, we have to keep it in + // the first partition as well. This is a conservative approach, a + // finer-grained approach could keep track of which dependencies are + // non-duplicatable exactly and just make sure they're grouped together. + if (CurFn.HasNonDuplicatableDependecy) { + SML << "Function with externally visible dependency " + << getName(*CurFn.Fn) << " defaulting to P0\n"; + AssignToPartition(0, CurFn); continue; } - assert(MostSimilarPID != InvalidPID); - - // We explore multiple paths: recurse at increased depth, then stop this - // function. - - LLVM_DEBUG(dbgs() << '\n'); - - // lb = load balancing = put in cheapest partition - { - SplitProposal BranchSP = SP; - LLVM_DEBUG(dbgs().indent(Depth) - << " [lb] " << Idx << "=P" << CheapestPID << "? "); - BranchSP.add(CheapestPID, Cluster); - pickPartition(Depth + 1, Idx + 1, BranchSP); - } + // Be smart with large functions to avoid duplicating their dependencies. + if (CurFn.isLarge(LargeFnThreshold)) { + assert(LargeFnOverlapForMerge >= 0.0f && LargeFnOverlapForMerge <= 1.0f); + SML << "Large Function: " << getName(*CurFn.Fn) + << " - looking for partition with at least " + << format("%0.2f", LargeFnOverlapForMerge * 100) << "% overlap\n"; + + bool Assigned = false; + for (const auto &[PID, Fns] : enumerate(Partitions)) { + float Overlap = calculateOverlap(CurFn.Dependencies, Fns); + SML << " => " << format("%0.2f", Overlap * 100) << "% overlap with P" + << PID << '\n'; + if (Overlap > LargeFnOverlapForMerge) { + SML << " selecting P" << PID << '\n'; + AssignToPartition(PID, CurFn); + Assigned = true; + } + } - // ms = most similar = put in partition with the most in common - { - SplitProposal BranchSP = SP; - LLVM_DEBUG(dbgs().indent(Depth) - << " [ms] " << Idx << "=P" << MostSimilarPID << "? "); - BranchSP.add(MostSimilarPID, Cluster); - pickPartition(Depth + 1, Idx + 1, BranchSP); + if (Assigned) + continue; } - return; + // Normal "load-balancing", assign to partition with least pressure. + auto [PID, CurCost] = BalancingQueue.back(); + AssignToPartition(PID, CurFn); } - // Step 3: If we assigned all WorkList items, submit the proposal. - - assert(Idx == WorkList.size()); - assert(NumProposalsSubmitted <= (2u << MaxDepth) && - "Search got out of bounds?"); - SP.setName("recursive_search (depth=" + std::to_string(Depth) + ") #" + - std::to_string(NumProposalsSubmitted++)); - LLVM_DEBUG(dbgs() << '\n'); - SubmitProposal(SP); -} - -std::pair -RecursiveSearchSplitting::findMostSimilarPartition(const WorkListEntry &Entry, - const SplitProposal &SP) { - if (!Entry.NumNonEntryNodes) - return {InvalidPID, 0}; - - // We take the partition that is the most similar using Cost as a metric. - // So we take the set of nodes in common, compute their aggregated cost, and - // pick the partition with the highest cost in common. - unsigned ChosenPID = InvalidPID; - CostType ChosenCost = 0; - for (unsigned PID = 0; PID < NumParts; ++PID) { - BitVector BV = SP[PID]; - BV &= Entry.Cluster; // FIXME: & doesn't work between BVs?! - - if (BV.none()) - continue; - - const CostType Cost = SG.calculateCost(BV); - - if (ChosenPID == InvalidPID || ChosenCost < Cost || - (ChosenCost == Cost && PID > ChosenPID)) { - ChosenPID = PID; - ChosenCost = Cost; + if (SML) { + CostType ModuleCostOr1 = ModuleCost ? ModuleCost : 1; + for (const auto &[Idx, Part] : enumerate(Partitions)) { + CostType Cost = 0; + for (auto *Fn : Part) + Cost += FnCosts.at(Fn); + SML << "P" << Idx << " has a total cost of " << Cost << " (" + << format("%0.2f", (float(Cost) / ModuleCostOr1) * 100) + << "% of source module)\n"; } - } - - return {ChosenPID, ChosenCost}; -} -//===----------------------------------------------------------------------===// -// DOTGraph Printing Support -//===----------------------------------------------------------------------===// - -const SplitGraph::Node *mapEdgeToDst(const SplitGraph::Edge *E) { - return E->Dst; -} - -using SplitGraphEdgeDstIterator = - mapped_iterator; - -} // namespace - -template <> struct GraphTraits { - using NodeRef = const SplitGraph::Node *; - using nodes_iterator = SplitGraph::nodes_iterator; - using ChildIteratorType = SplitGraphEdgeDstIterator; - - using EdgeRef = const SplitGraph::Edge *; - using ChildEdgeIteratorType = SplitGraph::edges_iterator; - - static NodeRef getEntryNode(NodeRef N) { return N; } - - static ChildIteratorType child_begin(NodeRef Ref) { - return {Ref->outgoing_edges().begin(), mapEdgeToDst}; - } - static ChildIteratorType child_end(NodeRef Ref) { - return {Ref->outgoing_edges().end(), mapEdgeToDst}; - } - - static nodes_iterator nodes_begin(const SplitGraph &G) { - return G.nodes().begin(); - } - static nodes_iterator nodes_end(const SplitGraph &G) { - return G.nodes().end(); - } -}; - -template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { - DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} - - static std::string getGraphName(const SplitGraph &SG) { - return SG.getModule().getName().str(); - } - - std::string getNodeLabel(const SplitGraph::Node *N, const SplitGraph &SG) { - return N->getName().str(); - } - - static std::string getNodeDescription(const SplitGraph::Node *N, - const SplitGraph &SG) { - std::string Result; - if (N->isEntryFunctionCC()) - Result += "entry-fn-cc "; - if (N->isNonCopyable()) - Result += "non-copyable "; - Result += "cost:" + std::to_string(N->getIndividualCost()); - return Result; - } - - static std::string getNodeAttributes(const SplitGraph::Node *N, - const SplitGraph &SG) { - return N->hasAnyIncomingEdges() ? "" : "color=\"red\""; + SML << "--Partitioning Done--\n\n"; } - static std::string getEdgeAttributes(const SplitGraph::Node *N, - SplitGraphEdgeDstIterator EI, - const SplitGraph &SG) { + // Check no functions were missed. +#ifndef NDEBUG + DenseSet AllFunctions; + for (const auto &Part : Partitions) + AllFunctions.insert(Part.begin(), Part.end()); - switch ((*EI.getCurrent())->Kind) { - case SplitGraph::EdgeKind::DirectCall: - return ""; - case SplitGraph::EdgeKind::IndirectCall: - return "style=\"dashed\""; + for (auto &Fn : M) { + if (!Fn.isDeclaration() && !AllFunctions.contains(&Fn)) { + assert(AllFunctions.contains(&Fn) && "Missed a function?!"); } - llvm_unreachable("Unknown SplitGraph::EdgeKind enum"); } -}; - -//===----------------------------------------------------------------------===// -// Driver -//===----------------------------------------------------------------------===// - -namespace { +#endif -// If we didn't externalize GVs, then local GVs need to be conservatively -// imported into every module (including their initializers), and then cleaned -// up afterwards. -static bool needsConservativeImport(const GlobalValue *GV) { - if (const auto *Var = dyn_cast(GV)) - return Var->hasLocalLinkage(); - return isa(GV); + return Partitions; } -/// Prints a summary of the partition \p N, represented by module \p M, to \p -/// OS. -static void printPartitionSummary(raw_ostream &OS, unsigned N, const Module &M, - unsigned PartCost, unsigned ModuleCost) { - OS << "*** Partition P" << N << " ***\n"; - - for (const auto &Fn : M) { - if (!Fn.isDeclaration()) - OS << " - [function] " << Fn.getName() << "\n"; - } - - for (const auto &GV : M.globals()) { - if (GV.hasInitializer()) - OS << " - [global] " << GV.getName() << "\n"; +static void externalize(GlobalValue &GV) { + if (GV.hasLocalLinkage()) { + GV.setLinkage(GlobalValue::ExternalLinkage); + GV.setVisibility(GlobalValue::HiddenVisibility); } - OS << "Partition contains " << formatRatioOf(PartCost, ModuleCost) - << "% of the source\n"; -} - -static void evaluateProposal(SplitProposal &Best, SplitProposal New) { - SplitModuleTimer SMT("proposal_evaluation", "proposal ranking algorithm"); - - New.calculateScores(); - - LLVM_DEBUG({ - New.verifyCompleteness(); - if (DebugProposalSearch) - New.print(dbgs()); - }); - - const double CurBScore = Best.getBottleneckScore(); - const double CurCSScore = Best.getCodeSizeScore(); - const double NewBScore = New.getBottleneckScore(); - const double NewCSScore = New.getCodeSizeScore(); - - // TODO: Improve this - // We can probably lower the precision of the comparison at first - // e.g. if we have - // - (Current): BScore: 0.489 CSCore 1.105 - // - (New): BScore: 0.475 CSCore 1.305 - // Currently we'd choose the new one because the bottleneck score is - // lower, but the new one duplicates more code. It may be worth it to - // discard the new proposal as the impact on build time is negligible. - - // Compare them - bool IsBest = false; - if (NewBScore < CurBScore) - IsBest = true; - else if (NewBScore == CurBScore) - IsBest = (NewCSScore < CurCSScore); // Use code size as tie breaker. - - if (IsBest) - Best = std::move(New); - - LLVM_DEBUG(if (DebugProposalSearch) { - if (IsBest) - dbgs() << "[search] new best proposal!\n"; - else - dbgs() << "[search] discarding - not profitable\n"; - }); -} - -/// Trivial helper to create an identical copy of \p M. -static std::unique_ptr cloneAll(const Module &M) { - ValueToValueMapTy VMap; - return CloneModule(M, VMap, [&](const GlobalValue *GV) { return true; }); + // Unnamed entities must be named consistently between modules. setName will + // give a distinct name to each such entity. + if (!GV.hasName()) + GV.setName("__llvmsplit_unnamed"); } -/// Writes \p SG as a DOTGraph to \ref ModuleDotCfgDir if requested. -static void writeDOTGraph(const SplitGraph &SG) { - if (ModuleDotCfgOutput.empty()) - return; - - std::error_code EC; - raw_fd_ostream OS(ModuleDotCfgOutput, EC); - if (EC) { - errs() << "[" DEBUG_TYPE "]: cannot open '" << ModuleDotCfgOutput - << "' - DOTGraph will not be printed\n"; +static bool hasDirectCaller(const Function &Fn) { + for (auto &U : Fn.uses()) { + if (auto *CB = dyn_cast(U.getUser()); CB && CB->isCallee(&U)) + return true; } - WriteGraph(OS, SG, /*ShortName=*/false, - /*Title=*/SG.getModule().getName()); + return false; } static void splitAMDGPUModule( - GetTTIFn GetTTI, Module &M, unsigned NumParts, + GetTTIFn GetTTI, Module &M, unsigned N, function_ref MPart)> ModuleCallback) { + + SplitModuleLogger SML(M); + CallGraph CG(M); // Externalize functions whose address are taken. @@ -1341,8 +639,8 @@ static void splitAMDGPUModule( for (auto &Fn : M) { if (Fn.hasAddressTaken()) { if (Fn.hasLocalLinkage()) { - LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName() - << " because its address is taken\n"); + SML << "[externalize] " << Fn.getName() + << " because its address is taken\n"; } externalize(Fn); } @@ -1353,179 +651,138 @@ static void splitAMDGPUModule( if (!NoExternalizeGlobals) { for (auto &GV : M.globals()) { if (GV.hasLocalLinkage()) - LLVM_DEBUG(dbgs() << "[externalize] GV " << GV.getName() << '\n'); + SML << "[externalize] GV " << GV.getName() << '\n'; externalize(GV); } } // Start by calculating the cost of every function in the module, as well as // the module's overall cost. - FunctionsCostMap FnCosts; - const CostType ModuleCost = calculateFunctionCosts(GetTTI, M, FnCosts); - - // Build the SplitGraph, which represents the module's functions and models - // their dependencies accurately. - SplitGraph SG(M, FnCosts, ModuleCost); - SG.buildGraph(CG); - - if (SG.empty()) { - LLVM_DEBUG( - dbgs() - << "[!] no nodes in graph, input is empty - no splitting possible\n"); - ModuleCallback(cloneAll(M)); - return; + DenseMap FnCosts; + const CostType ModuleCost = calculateFunctionCosts(SML, GetTTI, M, FnCosts); + + // First, gather ever kernel into the worklist. + SmallVector WorkList; + for (auto &Fn : M) { + if (isEntryPoint(&Fn) && !Fn.isDeclaration()) + WorkList.emplace_back(SML, CG, FnCosts, &Fn); } - LLVM_DEBUG({ - dbgs() << "[graph] nodes:\n"; - for (const SplitGraph::Node *N : SG.nodes()) { - dbgs() << " - [" << N->getID() << "]: " << N->getName() << " " - << (N->isGraphEntryPoint() ? "(entry)" : "") << "\n"; + // Then, find missing functions that need to be considered as additional + // roots. These can't be called in theory, but in practice we still have to + // handle them to avoid linker errors. + { + DenseSet SeenFunctions; + for (const auto &FWD : WorkList) { + SeenFunctions.insert(FWD.Fn); + SeenFunctions.insert(FWD.Dependencies.begin(), FWD.Dependencies.end()); } - }); - writeDOTGraph(SG); - - LLVM_DEBUG(dbgs() << "[search] testing splitting strategies\n"); - - std::optional Proposal; - const auto EvaluateProposal = [&](SplitProposal SP) { - if (!Proposal) - Proposal = std::move(SP); - else - evaluateProposal(*Proposal, std::move(SP)); - }; - - // TODO: It would be very easy to create new strategies by just adding a base - // class to RecursiveSearchSplitting and abstracting it away. - RecursiveSearchSplitting(SG, NumParts, EvaluateProposal).run(); - LLVM_DEBUG(if (Proposal) dbgs() << "[search done] selected proposal: " - << Proposal->getName() << "\n";); - - if (!Proposal) { - LLVM_DEBUG(dbgs() << "[!] no proposal made, no splitting possible!\n"); - ModuleCallback(cloneAll(M)); - return; + for (auto &Fn : M) { + // If this function is not part of any kernel's dependencies and isn't + // directly called, consider it as a root. + if (!Fn.isDeclaration() && !isEntryPoint(&Fn) && + !SeenFunctions.count(&Fn) && !hasDirectCaller(Fn)) { + WorkList.emplace_back(SML, CG, FnCosts, &Fn); + } + } } - LLVM_DEBUG(Proposal->print(dbgs());); + // Sort the worklist so the most expensive roots are seen first. + sort(WorkList, [&](auto &A, auto &B) { + // Sort by total cost, and if the total cost is identical, sort + // alphabetically. + if (A.TotalCost == B.TotalCost) + return A.Fn->getName() < B.Fn->getName(); + return A.TotalCost > B.TotalCost; + }); - std::optional SummariesOS; - if (!PartitionSummariesOutput.empty()) { - std::error_code EC; - SummariesOS.emplace(PartitionSummariesOutput, EC); - if (EC) - errs() << "[" DEBUG_TYPE "]: cannot open '" << PartitionSummariesOutput - << "' - Partition summaries will not be printed\n"; + if (SML) { + SML << "Worklist\n"; + for (const auto &FWD : WorkList) { + SML << "[root] " << getName(*FWD.Fn) << " (totalCost:" << FWD.TotalCost + << " indirect:" << FWD.HasIndirectCall + << " hasNonDuplicatableDep:" << FWD.HasNonDuplicatableDependecy + << ")\n"; + // Sort function names before printing to ensure determinism. + SmallVector SortedDepNames; + SortedDepNames.reserve(FWD.Dependencies.size()); + for (const auto *Dep : FWD.Dependencies) + SortedDepNames.push_back(getName(*Dep)); + sort(SortedDepNames); + + for (const auto &Name : SortedDepNames) + SML << " [dependency] " << Name << '\n'; + } } - for (unsigned PID = 0; PID < NumParts; ++PID) { - SplitModuleTimer SMT2("modules_creation", - "creating modules for each partition"); - LLVM_DEBUG(dbgs() << "[split] creating new modules\n"); + // This performs all of the partitioning work. + auto Partitions = doPartitioning(SML, M, N, ModuleCost, FnCosts, WorkList); + assert(Partitions.size() == N); + + // If we didn't externalize GVs, then local GVs need to be conservatively + // imported into every module (including their initializers), and then cleaned + // up afterwards. + const auto NeedsConservativeImport = [&](const GlobalValue *GV) { + // We conservatively import private/internal GVs into every module and clean + // them up afterwards. + const auto *Var = dyn_cast(GV); + return Var && Var->hasLocalLinkage(); + }; - DenseSet FnsInPart; - for (unsigned NodeID : (*Proposal)[PID].set_bits()) - FnsInPart.insert(&SG.getNode(NodeID).getFunction()); + SML << "Creating " << N << " modules...\n"; + unsigned TotalFnImpls = 0; + for (unsigned I = 0; I < N; ++I) { + const auto &FnsInPart = Partitions[I]; ValueToValueMapTy VMap; - CostType PartCost = 0; std::unique_ptr MPart( CloneModule(M, VMap, [&](const GlobalValue *GV) { // Functions go in their assigned partition. - if (const auto *Fn = dyn_cast(GV)) { - if (FnsInPart.contains(Fn)) { - PartCost += SG.getCost(*Fn); - return true; - } - return false; - } + if (const auto *Fn = dyn_cast(GV)) + return FnsInPart.contains(Fn); + + if (NeedsConservativeImport(GV)) + return true; // Everything else goes in the first partition. - return needsConservativeImport(GV) || PID == 0; + return I == 0; })); - // FIXME: Aliases aren't seen often, and their handling isn't perfect so - // bugs are possible. - // Clean-up conservatively imported GVs without any users. - for (auto &GV : make_early_inc_range(MPart->global_values())) { - if (needsConservativeImport(&GV) && GV.use_empty()) + for (auto &GV : make_early_inc_range(MPart->globals())) { + if (NeedsConservativeImport(&GV) && GV.use_empty()) GV.eraseFromParent(); } - if (SummariesOS) - printPartitionSummary(*SummariesOS, PID, *MPart, PartCost, ModuleCost); - - LLVM_DEBUG( - printPartitionSummary(dbgs(), PID, *MPart, PartCost, ModuleCost)); - + unsigned NumAllFns = 0, NumKernels = 0; + for (auto &Cur : *MPart) { + if (!Cur.isDeclaration()) { + ++NumAllFns; + if (isEntryPoint(&Cur)) + ++NumKernels; + } + } + TotalFnImpls += NumAllFns; + SML << " - Module " << I << " with " << NumAllFns << " functions (" + << NumKernels << " kernels)\n"; ModuleCallback(std::move(MPart)); } + + SML << TotalFnImpls << " function definitions across all modules (" + << format("%0.2f", (float(TotalFnImpls) / FnCosts.size()) * 100) + << "% of original module)\n"; } } // namespace PreservedAnalyses AMDGPUSplitModulePass::run(Module &M, ModuleAnalysisManager &MAM) { - SplitModuleTimer SMT( - "total", "total pass runtime (incl. potentially waiting for lockfile)"); - FunctionAnalysisManager &FAM = MAM.getResult(M).getManager(); const auto TTIGetter = [&FAM](Function &F) -> const TargetTransformInfo & { return FAM.getResult(F); }; - - bool Done = false; -#ifndef NDEBUG - if (UseLockFile) { - SmallString<128> LockFilePath; - sys::path::system_temp_directory(/*ErasedOnReboot=*/true, LockFilePath); - sys::path::append(LockFilePath, "amdgpu-split-module-debug"); - LLVM_DEBUG(dbgs() << DEBUG_TYPE " using lockfile '" << LockFilePath - << "'\n"); - - while (true) { - llvm::LockFileManager Locked(LockFilePath.str()); - switch (Locked) { - case LockFileManager::LFS_Error: - LLVM_DEBUG( - dbgs() << "[amdgpu-split-module] unable to acquire lockfile, debug " - "output may be mangled by other processes\n"); - Locked.unsafeRemoveLockFile(); - break; - case LockFileManager::LFS_Owned: - break; - case LockFileManager::LFS_Shared: { - switch (Locked.waitForUnlock()) { - case LockFileManager::Res_Success: - break; - case LockFileManager::Res_OwnerDied: - continue; // try again to get the lock. - case LockFileManager::Res_Timeout: - LLVM_DEBUG( - dbgs() - << "[amdgpu-split-module] unable to acquire lockfile, debug " - "output may be mangled by other processes\n"); - Locked.unsafeRemoveLockFile(); - break; // give up - } - break; - } - } - - splitAMDGPUModule(TTIGetter, M, N, ModuleCallback); - Done = true; - break; - } - } -#endif - - if (!Done) - splitAMDGPUModule(TTIGetter, M, N, ModuleCallback); - - // We can change linkage/visibilities in the input, consider that nothing is - // preserved just to be safe. This pass runs last anyway. - return PreservedAnalyses::none(); + splitAMDGPUModule(TTIGetter, M, N, ModuleCallback); + // We don't change the original module. + return PreservedAnalyses::all(); } -} // namespace llvm diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8b8452a2b78c80..10fef901f77181 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -79,7 +79,7 @@ enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly }; static cl::opt ImplicitItMode( "arm-implicit-it", cl::init(ImplicitItModeTy::ARMOnly), - cl::desc("Allow conditional instructions outdside of an IT block"), + cl::desc("Allow conditional instructions outside of an IT block"), cl::values(clEnumValN(ImplicitItModeTy::Always, "always", "Accept in both ISAs, emit implicit ITs in Thumb"), clEnumValN(ImplicitItModeTy::Never, "never", @@ -441,7 +441,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands, unsigned MnemonicOpsEndInd, unsigned ListIndex); - int tryParseRegister(bool AllowOutofBoundReg = false); + MCRegister tryParseRegister(bool AllowOutofBoundReg = false); bool tryParseRegisterWithWriteBack(OperandVector &); int tryParseShiftRegister(OperandVector &); std::optional tryParseShiftToken(); @@ -4205,7 +4205,7 @@ bool ARMAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, EndLoc = Tok.getEndLoc(); Reg = tryParseRegister(); - return Reg == (unsigned)-1; + return !Reg; } ParseStatus ARMAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, @@ -4216,59 +4216,59 @@ ParseStatus ARMAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, } /// Try to parse a register name. The token must be an Identifier when called, -/// and if it is a register name the token is eaten and the register number is -/// returned. Otherwise return -1. -int ARMAsmParser::tryParseRegister(bool AllowOutOfBoundReg) { +/// and if it is a register name the token is eaten and the register is +/// returned. Otherwise return an invalid MCRegister. +MCRegister ARMAsmParser::tryParseRegister(bool AllowOutOfBoundReg) { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier)) return -1; + if (Tok.isNot(AsmToken::Identifier)) + return MCRegister(); std::string lowerCase = Tok.getString().lower(); - unsigned RegNum = MatchRegisterName(lowerCase); - if (!RegNum) { - RegNum = StringSwitch(lowerCase) - .Case("r13", ARM::SP) - .Case("r14", ARM::LR) - .Case("r15", ARM::PC) - .Case("ip", ARM::R12) - // Additional register name aliases for 'gas' compatibility. - .Case("a1", ARM::R0) - .Case("a2", ARM::R1) - .Case("a3", ARM::R2) - .Case("a4", ARM::R3) - .Case("v1", ARM::R4) - .Case("v2", ARM::R5) - .Case("v3", ARM::R6) - .Case("v4", ARM::R7) - .Case("v5", ARM::R8) - .Case("v6", ARM::R9) - .Case("v7", ARM::R10) - .Case("v8", ARM::R11) - .Case("sb", ARM::R9) - .Case("sl", ARM::R10) - .Case("fp", ARM::R11) - .Default(0); - } - if (!RegNum) { + MCRegister Reg = MatchRegisterName(lowerCase); + if (!Reg) { + Reg = StringSwitch(lowerCase) + .Case("r13", ARM::SP) + .Case("r14", ARM::LR) + .Case("r15", ARM::PC) + .Case("ip", ARM::R12) + // Additional register name aliases for 'gas' compatibility. + .Case("a1", ARM::R0) + .Case("a2", ARM::R1) + .Case("a3", ARM::R2) + .Case("a4", ARM::R3) + .Case("v1", ARM::R4) + .Case("v2", ARM::R5) + .Case("v3", ARM::R6) + .Case("v4", ARM::R7) + .Case("v5", ARM::R8) + .Case("v6", ARM::R9) + .Case("v7", ARM::R10) + .Case("v8", ARM::R11) + .Case("sb", ARM::R9) + .Case("sl", ARM::R10) + .Case("fp", ARM::R11) + .Default(MCRegister()); + } + if (!Reg) { // Check for aliases registered via .req. Canonicalize to lower case. // That's more consistent since register names are case insensitive, and // it's how the original entry was passed in from MC/MCParser/AsmParser. auto Entry = RegisterReqs.find(lowerCase); // If no match, return failure. if (Entry == RegisterReqs.end()) - return -1; + return MCRegister(); Parser.Lex(); // Eat identifier token. return Entry->getValue(); } // Some FPUs only have 16 D registers, so D16-D31 are invalid - if (!AllowOutOfBoundReg && !hasD32() && RegNum >= ARM::D16 && - RegNum <= ARM::D31) - return -1; + if (!AllowOutOfBoundReg && !hasD32() && Reg >= ARM::D16 && Reg <= ARM::D31) + return MCRegister(); Parser.Lex(); // Eat identifier token. - return RegNum; + return Reg; } std::optional ARMAsmParser::tryParseShiftToken() { @@ -4356,7 +4356,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { SMLoc L = Parser.getTok().getLoc(); EndLoc = Parser.getTok().getEndLoc(); ShiftReg = tryParseRegister(); - if (ShiftReg == -1) { + if (!ShiftReg) { Error(L, "expected immediate or register in shift operand"); return -1; } @@ -4387,12 +4387,11 @@ bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { MCAsmParser &Parser = getParser(); SMLoc RegStartLoc = Parser.getTok().getLoc(); SMLoc RegEndLoc = Parser.getTok().getEndLoc(); - int RegNo = tryParseRegister(); - if (RegNo == -1) + MCRegister Reg = tryParseRegister(); + if (!Reg) return true; - Operands.push_back( - ARMOperand::CreateReg(RegNo, RegStartLoc, RegEndLoc, *this)); + Operands.push_back(ARMOperand::CreateReg(Reg, RegStartLoc, RegEndLoc, *this)); const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { @@ -4619,8 +4618,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, // Check the first register in the list to see what register class // this is a list of. - int Reg = tryParseRegister(); - if (Reg == -1) + MCRegister Reg = tryParseRegister(); + if (!Reg) return Error(RegLoc, "register expected"); if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE) return Error(RegLoc, "pseudo-register not allowed"); @@ -4634,7 +4633,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, Reg = getDRegFromQReg(Reg); EReg = MRI->getEncodingValue(Reg); Registers.emplace_back(EReg, Reg); - ++Reg; + Reg = Reg + 1; } const MCRegisterClass *RC; if (Reg == ARM::RA_AUTH_CODE || @@ -4663,8 +4662,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, return Error(RegLoc, "pseudo-register not allowed"); Parser.Lex(); // Eat the minus. SMLoc AfterMinusLoc = Parser.getTok().getLoc(); - int EndReg = tryParseRegister(AllowOutOfBoundReg); - if (EndReg == -1) + MCRegister EndReg = tryParseRegister(AllowOutOfBoundReg); + if (!EndReg) return Error(AfterMinusLoc, "register expected"); if (EndReg == ARM::RA_AUTH_CODE) return Error(AfterMinusLoc, "pseudo-register not allowed"); @@ -4696,10 +4695,10 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, } Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); - int OldReg = Reg; + MCRegister OldReg = Reg; const AsmToken RegTok = Parser.getTok(); Reg = tryParseRegister(AllowOutOfBoundReg); - if (Reg == -1) + if (!Reg) return Error(RegLoc, "register expected"); if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE) return Error(RegLoc, "pseudo-register not allowed"); @@ -4755,7 +4754,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, ") in register list"); } if (isQReg) { - EReg = MRI->getEncodingValue(++Reg); + Reg = Reg + 1; + EReg = MRI->getEncodingValue(Reg); Registers.emplace_back(EReg, Reg); } } @@ -4835,8 +4835,8 @@ ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) { // use the custom matcher to convert to list if necessary if (!hasMVE() && Parser.getTok().is(AsmToken::Identifier)) { SMLoc E = Parser.getTok().getEndLoc(); - int Reg = tryParseRegister(); - if (Reg == -1) + MCRegister Reg = tryParseRegister(); + if (!Reg) return ParseStatus::NoMatch; if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) { ParseStatus Res = parseVectorLane(LaneKind, LaneIndex, E); @@ -4889,12 +4889,12 @@ ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) { Parser.Lex(); // Eat '{' token. SMLoc RegLoc = Parser.getTok().getLoc(); - int Reg = tryParseRegister(); - if (Reg == -1) + MCRegister Reg = tryParseRegister(); + if (!Reg) return Error(RegLoc, "register expected"); unsigned Count = 1; int Spacing = 0; - unsigned FirstReg = Reg; + MCRegister FirstReg = Reg; if (hasMVE() && !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) return Error(Parser.getTok().getLoc(), @@ -4905,7 +4905,7 @@ ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) { FirstReg = Reg = getDRegFromQReg(Reg); Spacing = 1; // double-spacing requires explicit D registers, otherwise // it's ambiguous with four-register single spaced. - ++Reg; + Reg = Reg + 1; ++Count; } @@ -4923,8 +4923,8 @@ ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) { "sequential registers in double spaced list"); Parser.Lex(); // Eat the minus. SMLoc AfterMinusLoc = Parser.getTok().getLoc(); - int EndReg = tryParseRegister(); - if (EndReg == -1) + MCRegister EndReg = tryParseRegister(); + if (!EndReg) return Error(AfterMinusLoc, "register expected"); // Allow Q regs and just interpret them as the two D sub-registers. if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) @@ -4957,9 +4957,9 @@ ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) { } Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); - int OldReg = Reg; + MCRegister OldReg = Reg; Reg = tryParseRegister(); - if (Reg == -1) + if (!Reg) return Error(RegLoc, "register expected"); if (hasMVE()) { @@ -4983,7 +4983,7 @@ ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) { Reg = getDRegFromQReg(Reg); if (Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); - ++Reg; + Reg = Reg + 1; Count += 2; // Parse the lane specifier if present. VectorLaneTy NextLaneKind; @@ -5674,8 +5674,8 @@ ParseStatus ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { } SMLoc E = Parser.getTok().getEndLoc(); - int Reg = tryParseRegister(); - if (Reg == -1) { + MCRegister Reg = tryParseRegister(); + if (!Reg) { if (!haveEaten) return ParseStatus::NoMatch; return Error(Parser.getTok().getLoc(), "register expected"); @@ -5752,8 +5752,8 @@ ParseStatus ARMAsmParser::parseAM3Offset(OperandVector &Operands) { } Tok = Parser.getTok(); - int Reg = tryParseRegister(); - if (Reg == -1) { + MCRegister Reg = tryParseRegister(); + if (!Reg) { if (!haveEaten) return ParseStatus::NoMatch; return Error(Tok.getLoc(), "register expected"); @@ -5935,8 +5935,8 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { Parser.Lex(); // Eat left bracket token. const AsmToken &BaseRegTok = Parser.getTok(); - int BaseRegNum = tryParseRegister(); - if (BaseRegNum == -1) + MCRegister BaseReg = tryParseRegister(); + if (!BaseReg) return Error(BaseRegTok.getLoc(), "register expected"); // The next token must either be a comma, a colon or a closing bracket. @@ -5950,7 +5950,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { Parser.Lex(); // Eat right bracket token. Operands.push_back(ARMOperand::CreateMem( - BaseRegNum, nullptr, 0, ARM_AM::no_shift, 0, 0, false, S, E, *this)); + BaseReg, nullptr, 0, ARM_AM::no_shift, 0, 0, false, S, E, *this)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. It's rather odd, but syntactically valid. @@ -6006,7 +6006,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { // Don't worry about range checking the value here. That's handled by // the is*() predicates. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, + Operands.push_back(ARMOperand::CreateMem(BaseReg, nullptr, 0, ARM_AM::no_shift, 0, Align, false, S, E, *this, AlignmentLoc)); @@ -6050,7 +6050,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { AdjustedOffset = CE; } else AdjustedOffset = Offset; - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, AdjustedOffset, 0, + Operands.push_back(ARMOperand::CreateMem(BaseReg, AdjustedOffset, 0, ARM_AM::no_shift, 0, 0, false, S, E, *this)); @@ -6082,8 +6082,8 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { } E = Parser.getTok().getLoc(); - int OffsetRegNum = tryParseRegister(); - if (OffsetRegNum == -1) + MCRegister OffsetReg = tryParseRegister(); + if (!OffsetReg) return Error(E, "register expected"); // If there's a shift operator, handle it. @@ -6101,7 +6101,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum, + Operands.push_back(ARMOperand::CreateMem(BaseReg, nullptr, OffsetReg, ShiftType, ShiftImm, 0, isNegative, S, E, *this)); @@ -12077,16 +12077,16 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { // Parse fpreg SMLoc FPRegLoc = Parser.getTok().getLoc(); - int FPReg = tryParseRegister(); + MCRegister FPReg = tryParseRegister(); - if (check(FPReg == -1, FPRegLoc, "frame pointer register expected") || + if (check(!FPReg, FPRegLoc, "frame pointer register expected") || Parser.parseComma()) return true; // Parse spreg SMLoc SPRegLoc = Parser.getTok().getLoc(); - int SPReg = tryParseRegister(); - if (check(SPReg == -1, SPRegLoc, "stack pointer register expected") || + MCRegister SPReg = tryParseRegister(); + if (check(!SPReg, SPRegLoc, "stack pointer register expected") || check(SPReg != ARM::SP && SPReg != UC.getFPReg(), SPRegLoc, "register should be either $sp or the latest fp register")) return true; @@ -12404,8 +12404,8 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { return Error(L, "unexpected .movsp directive"); SMLoc SPRegLoc = Parser.getTok().getLoc(); - int SPReg = tryParseRegister(); - if (SPReg == -1) + MCRegister SPReg = tryParseRegister(); + if (!SPReg) return Error(SPRegLoc, "register expected"); if (SPReg == ARM::SP || SPReg == ARM::PC) return Error(SPRegLoc, "sp and pc are not permitted in .movsp directive"); @@ -12542,8 +12542,8 @@ bool ARMAsmParser::parseDirectiveSEHSaveRegs(SMLoc L, bool Wide) { /// parseDirectiveSEHSaveSP /// ::= .seh_save_sp bool ARMAsmParser::parseDirectiveSEHSaveSP(SMLoc L) { - int Reg = tryParseRegister(); - if (Reg == -1 || !MRI->getRegClass(ARM::GPRRegClassID).contains(Reg)) + MCRegister Reg = tryParseRegister(); + if (!Reg || !MRI->getRegClass(ARM::GPRRegClassID).contains(Reg)) return Error(L, "expected GPR"); unsigned Index = MRI->getEncodingValue(Reg); if (Index > 14 || Index == 13) diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 383dfcc31117c1..c016b2dd91dc67 100644 --- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -72,7 +72,7 @@ class AVRAsmParser : public MCTargetAsmParser { int parseRegisterName(); int parseRegister(bool RestoreOnFailure = false); bool tryParseRegisterOperand(OperandVector &Operands); - bool tryParseExpression(OperandVector &Operands); + bool tryParseExpression(OperandVector &Operands, int64_t offset); bool tryParseRelocExpression(OperandVector &Operands); void eatComma(); @@ -418,7 +418,7 @@ bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) { return false; } -bool AVRAsmParser::tryParseExpression(OperandVector &Operands) { +bool AVRAsmParser::tryParseExpression(OperandVector &Operands, int64_t offset) { SMLoc S = Parser.getTok().getLoc(); if (!tryParseRelocExpression(Operands)) @@ -437,6 +437,11 @@ bool AVRAsmParser::tryParseExpression(OperandVector &Operands) { if (getParser().parseExpression(Expression)) return true; + if (offset) { + Expression = MCBinaryExpr::createAdd( + Expression, MCConstantExpr::create(offset, getContext()), getContext()); + } + SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(AVROperand::CreateImm(Expression, S, E)); return false; @@ -529,8 +534,9 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) { [[fallthrough]]; case AsmToken::LParen: case AsmToken::Integer: + return tryParseExpression(Operands, 0); case AsmToken::Dot: - return tryParseExpression(Operands); + return tryParseExpression(Operands, 2); case AsmToken::Plus: case AsmToken::Minus: { // If the sign preceeds a number, parse the number, @@ -540,7 +546,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands, bool maybeReg) { case AsmToken::BigNum: case AsmToken::Identifier: case AsmToken::Real: - if (!tryParseExpression(Operands)) + if (!tryParseExpression(Operands, 0)) return false; break; default: @@ -643,6 +649,7 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info, // These specific operands should be treated as addresses/symbols/labels, // other than registers. bool maybeReg = true; + if (OperandNum == 1) { std::array Insts = {"lds", "adiw", "sbiw", "ldi"}; for (auto Inst : Insts) { diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 0d29912bee2646..388d58a82214d1 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -94,6 +94,9 @@ static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); + + // Jumps are relative to the current instruction. + Value -= 1; } /// 22-bit absolute fixup. @@ -513,15 +516,10 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm, switch ((unsigned)Fixup.getKind()) { default: return Fixup.getKind() >= FirstLiteralRelocationKind; - // Fixups which should always be recorded as relocations. case AVR::fixup_7_pcrel: case AVR::fixup_13_pcrel: - // Do not force relocation for PC relative branch like 'rjmp .', - // 'rcall . - off' and 'breq . + off'. - if (const auto *SymA = Target.getSymA()) - if (SymA->getSymbol().getName().size() == 0) - return false; - [[fallthrough]]; + // Always resolve relocations for PC-relative branches + return false; case AVR::fixup_call: return true; } diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h index f7bc6f958470b9..f07ae4c9baf1c6 100644 --- a/llvm/lib/Target/BPF/BPF.h +++ b/llvm/lib/Target/BPF/BPF.h @@ -28,6 +28,7 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM); FunctionPass *createBPFMISimplifyPatchablePass(); FunctionPass *createBPFMIPeepholePass(); FunctionPass *createBPFMIPreEmitPeepholePass(); +FunctionPass *createBPFMIPreEmitCheckingPass(); InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &, const BPFSubtarget &, @@ -36,6 +37,7 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &, void initializeBPFCheckAndAdjustIRPass(PassRegistry&); void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &); void initializeBPFMIPeepholePass(PassRegistry &); +void initializeBPFMIPreEmitCheckingPass(PassRegistry &); void initializeBPFMIPreEmitPeepholePass(PassRegistry &); void initializeBPFMISimplifyPatchablePass(PassRegistry &); diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 4baeeb017699d6..6c750af5c2fd92 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -786,45 +786,13 @@ let Predicates = [BPFNoALU32] in { def : Pat<(i64 (extloadi32 ADDRri:$src)), (i64 (LDW ADDRri:$src))>; } -// Atomic XADD for BPFNoALU32 -class XADD - : TYPE_LD_ST { - bits<4> dst; - bits<20> addr; - - let Inst{51-48} = addr{19-16}; // base reg - let Inst{55-52} = dst; - let Inst{47-32} = addr{15-0}; // offset - let Inst{7-4} = BPF_ADD.Value; - let BPFClass = BPF_STX; -} - // Atomic add, and, or, xor -class ATOMIC_NOFETCH - : TYPE_LD_ST + : TYPE_LD_ST { - bits<4> dst; - bits<20> addr; - - let Inst{51-48} = addr{19-16}; // base reg - let Inst{55-52} = dst; - let Inst{47-32} = addr{15-0}; // offset - let Inst{7-4} = Opc.Value; - let BPFClass = BPF_STX; -} - -class ATOMIC32_NOFETCH - : TYPE_LD_ST { bits<4> dst; bits<20> addr; @@ -838,16 +806,23 @@ class ATOMIC32_NOFETCH let Constraints = "$dst = $val" in { let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { - def XADDW32 : ATOMIC32_NOFETCH; - def XANDW32 : ATOMIC32_NOFETCH; - def XORW32 : ATOMIC32_NOFETCH; - def XXORW32 : ATOMIC32_NOFETCH; + def XADDW32 : ATOMIC_NOFETCH; + def XANDW32 : ATOMIC_NOFETCH; + def XORW32 : ATOMIC_NOFETCH; + def XXORW32 : ATOMIC_NOFETCH; } + def XADDW : ATOMIC_NOFETCH; + def XADDD : ATOMIC_NOFETCH; + def XANDD : ATOMIC_NOFETCH; + def XORD : ATOMIC_NOFETCH; + def XXORD : ATOMIC_NOFETCH; +} - def XADDD : ATOMIC_NOFETCH; - def XANDD : ATOMIC_NOFETCH; - def XORD : ATOMIC_NOFETCH; - def XXORD : ATOMIC_NOFETCH; +let Predicates = [BPFNoALU32] in { + def : Pat<(atomic_load_add_i32 ADDRri:$addr, GPR:$val), + (XADDW ADDRri:$addr, GPR:$val)>; + def : Pat<(atomic_load_add_i64 ADDRri:$addr, GPR:$val), + (XADDD ADDRri:$addr, GPR:$val)>; } // Atomic Fetch-and- operations @@ -887,13 +862,6 @@ class XFALU32; - def XFADDW : XFALU64; - } -} - let Constraints = "$dst = $val" in { let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { def XFADDW32 : XFALU32; @@ -902,7 +870,9 @@ let Constraints = "$dst = $val" in { def XFXORW32 : XFALU32; } - def XFADDD : XFALU64; + let Predicates = [BPFHasALU32] in { + def XFADDD : XFALU64; + } def XFANDD : XFALU64; def XFORD : XFALU64; def XFXORD : XFALU64; diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp new file mode 100644 index 00000000000000..24224f6c1e9e66 --- /dev/null +++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp @@ -0,0 +1,181 @@ +//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs checking to signal errors for certain illegal usages at +// MachineInstruction layer. Specially, the result of XADD{32,64} insn should +// not be used. The pass is done at the PreEmit pass right before the +// machine code is emitted at which point the register liveness information +// is still available. +// +//===----------------------------------------------------------------------===// + +#include "BPF.h" +#include "BPFInstrInfo.h" +#include "BPFTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "bpf-mi-checking" + +namespace { + +struct BPFMIPreEmitChecking : public MachineFunctionPass { + + static char ID; + MachineFunction *MF; + const TargetRegisterInfo *TRI; + + BPFMIPreEmitChecking() : MachineFunctionPass(ID) { + initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry()); + } + +private: + // Initialize class variables. + void initialize(MachineFunction &MFParm); + + void processAtomicInsts(); + +public: + // Main entry point for this pass. + bool runOnMachineFunction(MachineFunction &MF) override { + if (!skipFunction(MF.getFunction())) { + initialize(MF); + processAtomicInsts(); + } + return false; + } +}; + +// Initialize class variables. +void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) { + MF = &MFParm; + TRI = MF->getSubtarget().getRegisterInfo(); + LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n"); +} + +// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not +// used. +// +// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the +// source and destination operands of XADD are GPR32, there is no sub-register +// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we +// will raise false alarm on GPR32 Def. +// +// To support GPR32 Def, ideally we could just enable sub-registr liveness track +// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires +// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF. +// +// However, sub-register liveness tracking module inside LLVM is actually +// designed for the situation where one register could be split into more than +// one sub-registers for which case each sub-register could have their own +// liveness and kill one of them doesn't kill others. So, tracking liveness for +// each make sense. +// +// For BPF, each 64-bit register could only have one 32-bit sub-register. This +// is exactly the case which LLVM think brings no benefits for doing +// sub-register tracking, because the live range of sub-register must always +// equal to its parent register, therefore liveness tracking is disabled even +// the back-end has implemented enableSubRegLiveness. The detailed information +// is at r232695: +// +// Author: Matthias Braun +// Date: Thu Mar 19 00:21:58 2015 +0000 +// Do not track subregister liveness when it brings no benefits +// +// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo +// sub-register always has the same liveness as its parent register, LLVM is +// already attaching a implicit 64-bit register Def whenever the there is +// a sub-register Def. The liveness of the implicit 64-bit Def is available. +// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could +// be: +// +// $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0), +// implicit killed $r9, implicit-def dead $r9 +// +// Even though w9 is not marked as Dead, the parent register r9 is marked as +// Dead correctly, and it is safe to use such information or our purpose. +static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) { + const MCRegisterClass *GPR64RegClass = + &BPFMCRegisterClasses[BPF::GPRRegClassID]; + std::vector GPR32LiveDefs; + std::vector GPR64DeadDefs; + + for (const MachineOperand &MO : MI.operands()) { + bool RegIsGPR64; + + if (!MO.isReg() || MO.isUse()) + continue; + + RegIsGPR64 = GPR64RegClass->contains(MO.getReg()); + if (!MO.isDead()) { + // It is a GPR64 live Def, we are sure it is live. */ + if (RegIsGPR64) + return true; + // It is a GPR32 live Def, we are unsure whether it is really dead due to + // no sub-register liveness tracking. Push it to vector for deferred + // check. + GPR32LiveDefs.push_back(MO.getReg()); + continue; + } + + // Record any GPR64 dead Def as some unmarked GPR32 could be alias of its + // low 32-bit. + if (RegIsGPR64) + GPR64DeadDefs.push_back(MO.getReg()); + } + + // No GPR32 live Def, safe to return false. + if (GPR32LiveDefs.empty()) + return false; + + // No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore + // must be truely live, safe to return true. + if (GPR64DeadDefs.empty()) + return true; + + // Otherwise, return true if any aliased SuperReg of GPR32 is not dead. + for (auto I : GPR32LiveDefs) + for (MCPhysReg SR : TRI->superregs(I)) + if (!llvm::is_contained(GPR64DeadDefs, SR)) + return true; + + return false; +} + +void BPFMIPreEmitChecking::processAtomicInsts() { + for (MachineBasicBlock &MBB : *MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() != BPF::XADDW && MI.getOpcode() != BPF::XADDD) + continue; + + LLVM_DEBUG(MI.dump()); + if (hasLiveDefs(MI, TRI)) { + DebugLoc Empty; + const DebugLoc &DL = MI.getDebugLoc(); + const Function &F = MF->getFunction(); + F.getContext().diagnose(DiagnosticInfoUnsupported{ + F, "Invalid usage of the XADD return value", DL}); + } + } + } +} + +} // namespace + +INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking", + "BPF PreEmit Checking", false, false) + +char BPFMIPreEmitChecking::ID = 0; +FunctionPass *llvm::createBPFMIPreEmitCheckingPass() { + return new BPFMIPreEmitChecking(); +} diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp index 64b115b8fc8afa..7d91fa8bb824cf 100644 --- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp +++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp @@ -178,6 +178,7 @@ void BPFPassConfig::addMachineSSAOptimization() { } void BPFPassConfig::addPreEmitPass() { + addPass(createBPFMIPreEmitCheckingPass()); if (getOptLevel() != CodeGenOptLevel::None) if (!DisableMIPeephole) addPass(createBPFMIPreEmitPeepholePass()); diff --git a/llvm/lib/Target/BPF/CMakeLists.txt b/llvm/lib/Target/BPF/CMakeLists.txt index 253660d4d62e37..eade4cacb7100e 100644 --- a/llvm/lib/Target/BPF/CMakeLists.txt +++ b/llvm/lib/Target/BPF/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_target(BPFCodeGen BPFSubtarget.cpp BPFTargetMachine.cpp BPFMIPeephole.cpp + BPFMIChecking.cpp BPFMISimplifyPatchable.cpp BTFDebug.cpp diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index c2ae4a0734b6a7..b8f1cdfd2cb354 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -1291,14 +1291,32 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, Imm = SignExtend64<32>(Imm); for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) { - unsigned Opc = Inst.Opc; - if (Opc == LoongArch::LU12I_W) - Out.emitInstruction(MCInstBuilder(Opc).addReg(DestReg).addImm(Inst.Imm), - getSTI()); - else + switch (Inst.Opc) { + case LoongArch::LU12I_W: Out.emitInstruction( - MCInstBuilder(Opc).addReg(DestReg).addReg(SrcReg).addImm(Inst.Imm), + MCInstBuilder(Inst.Opc).addReg(DestReg).addImm(Inst.Imm), getSTI()); + break; + case LoongArch::ADDI_W: + case LoongArch::ORI: + case LoongArch::LU32I_D: + case LoongArch::LU52I_D: + Out.emitInstruction( + MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm( + Inst.Imm), getSTI()); + break; + case LoongArch::BSTRINS_D: + Out.emitInstruction(MCInstBuilder(Inst.Opc) + .addReg(DestReg) + .addReg(SrcReg) + .addReg(SrcReg) + .addImm(Inst.Imm >> 32) + .addImm(Inst.Imm & 0xFF), + getSTI()); + break; + default: + llvm_unreachable("unexpected opcode generated by LoongArchMatInt"); + } SrcReg = DestReg; } } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index b6ade6b978d2ce..70ed1e6fbdbdac 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -62,10 +62,26 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { // The instructions in the sequence are handled here. for (LoongArchMatInt::Inst &Inst : LoongArchMatInt::generateInstSeq(Imm)) { SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, GRLenVT); - if (Inst.Opc == LoongArch::LU12I_W) - Result = CurDAG->getMachineNode(LoongArch::LU12I_W, DL, GRLenVT, SDImm); - else + switch (Inst.Opc) { + case LoongArch::LU12I_W: + Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SDImm); + break; + case LoongArch::ADDI_W: + case LoongArch::ORI: + case LoongArch::LU32I_D: + case LoongArch::LU52I_D: Result = CurDAG->getMachineNode(Inst.Opc, DL, GRLenVT, SrcReg, SDImm); + break; + case LoongArch::BSTRINS_D: + Result = CurDAG->getMachineNode( + Inst.Opc, DL, GRLenVT, + {SrcReg, SrcReg, + CurDAG->getTargetConstant(Inst.Imm >> 32, DL, GRLenVT), + CurDAG->getTargetConstant(Inst.Imm & 0xFF, DL, GRLenVT)}); + break; + default: + llvm_unreachable("unexpected opcode generated by LoongArchMatInt"); + } SrcReg = SDValue(Result, 0); } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 9059da460f1358..d1af65192ee612 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -210,6 +210,14 @@ void LoongArchInstrInfo::movImm(MachineBasicBlock &MBB, .addImm(Inst.Imm) .setMIFlag(Flag); break; + case LoongArch::BSTRINS_D: + BuildMI(MBB, MBBI, DL, get(Inst.Opc), DstReg) + .addReg(SrcReg, RegState::Kill) + .addReg(SrcReg, RegState::Kill) + .addImm(Inst.Imm >> 32) + .addImm(Inst.Imm & 0xFF) + .setMIFlag(Flag); + break; default: assert(false && "Unknown insn emitted by LoongArchMatInt"); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp index 1509c436c81098..a7823470382756 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.cpp @@ -26,11 +26,13 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) { const int64_t Lo12 = Val & 0xFFF; InstSeq Insts; + // LU52I_D used for: Bits[63:52] | Bits[51:0]. if (Highest12 != 0 && SignExtend64<52>(Val) == 0) { Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); return Insts; } + // lo32 if (Hi20 == 0) Insts.push_back(Inst(LoongArch::ORI, Lo12)); else if (SignExtend32<1>(Lo12 >> 11) == SignExtend32<20>(Hi20)) @@ -41,11 +43,83 @@ LoongArchMatInt::InstSeq LoongArchMatInt::generateInstSeq(int64_t Val) { Insts.push_back(Inst(LoongArch::ORI, Lo12)); } + // hi32 + // Higher20 if (SignExtend32<1>(Hi20 >> 19) != SignExtend32<20>(Higher20)) Insts.push_back(Inst(LoongArch::LU32I_D, SignExtend64<20>(Higher20))); + // Highest12 if (SignExtend32<1>(Higher20 >> 19) != SignExtend32<12>(Highest12)) Insts.push_back(Inst(LoongArch::LU52I_D, SignExtend64<12>(Highest12))); + size_t N = Insts.size(); + if (N < 3) + return Insts; + + // When the number of instruction sequences is greater than 2, we have the + // opportunity to optimize using the BSTRINS_D instruction. The scenario is as + // follows: + // + // N of Insts = 3 + // 1. ORI + LU32I_D + LU52I_D => ORI + BSTRINS_D, TmpVal = ORI + // 2. ADDI_W + LU32I_D + LU52I_D => ADDI_W + BSTRINS_D, TmpVal = ADDI_W + // 3. LU12I_W + ORI + LU32I_D => ORI + BSTRINS_D, TmpVal = ORI + // 4. LU12I_W + LU32I_D + LU52I_D => LU12I_W + BSTRINS_D, TmpVal = LU12I_W + // + // N of Insts = 4 + // 5. LU12I_W + ORI + LU32I_D + LU52I_D => LU12I_W + ORI + BSTRINS_D + // => ORI + LU52I_D + BSTRINS_D + // TmpVal = (LU12I_W | ORI) or (ORI | LU52I_D) + // The BSTRINS_D instruction will use the `TmpVal` to construct the `Val`. + uint64_t TmpVal1 = 0; + uint64_t TmpVal2 = 0; + switch (Insts[0].Opc) { + default: + llvm_unreachable("unexpected opcode"); + break; + case LoongArch::LU12I_W: + if (Insts[1].Opc == LoongArch::ORI) { + TmpVal1 = Insts[1].Imm; + if (N == 3) + break; + TmpVal2 = Insts[3].Imm << 52 | TmpVal1; + } + TmpVal1 |= Insts[0].Imm << 12; + break; + case LoongArch::ORI: + case LoongArch::ADDI_W: + TmpVal1 = Insts[0].Imm; + break; + } + + uint64_t Msb = 32; + uint64_t HighMask = ~((1ULL << (Msb + 1)) - 1); + for (; Msb < 64; ++Msb, HighMask = (HighMask << 1) + 1) { + for (uint64_t Lsb = Msb; Lsb > 0; --Lsb) { + uint64_t LowMask = (1ULL << Lsb) - 1; + uint64_t Mask = HighMask | LowMask; + uint64_t LsbToZero = TmpVal1 & ((1ULL << (Msb - Lsb + 1)) - 1); + uint64_t MsbToLsb = LsbToZero << Lsb; + if ((MsbToLsb | (TmpVal1 & Mask)) == (uint64_t)Val) { + if (Insts[1].Opc == LoongArch::ORI && N == 3) + Insts[0] = Insts[1]; + Insts.pop_back_n(2); + Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb)); + return Insts; + } + if (TmpVal2 != 0) { + LsbToZero = TmpVal2 & ((1ULL << (Msb - Lsb + 1)) - 1); + MsbToLsb = LsbToZero << Lsb; + if ((MsbToLsb | (TmpVal2 & Mask)) == (uint64_t)Val) { + Insts[0] = Insts[1]; + Insts[1] = Insts[3]; + Insts.pop_back_n(2); + Insts.push_back(Inst(LoongArch::BSTRINS_D, Msb << 32 | Lsb)); + return Insts; + } + } + } + } + return Insts; } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h index be1b425894de1a..3a3c12c353fb8e 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMatInt.h @@ -16,6 +16,7 @@ namespace llvm { namespace LoongArchMatInt { struct Inst { unsigned Opc; + // Imm: Opc's imm operand, if Opc == BSTRINS_D, Imm = MSB << 32 | LSB. int64_t Imm; Inst(unsigned Opc, int64_t Imm) : Opc(Opc), Imm(Imm) {} }; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 09928dcc1f489a..e990325ac38279 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -891,16 +891,30 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA}; // TODO: support more vp ops. - static const unsigned ZvfhminPromoteVPOps[] = { - ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, - ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS, - ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT, - ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, - ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, - ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, - ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, - ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM}; + static const unsigned ZvfhminPromoteVPOps[] = {ISD::VP_FADD, + ISD::VP_FSUB, + ISD::VP_FMUL, + ISD::VP_FDIV, + ISD::VP_FMA, + ISD::VP_REDUCE_FADD, + ISD::VP_REDUCE_SEQ_FADD, + ISD::VP_REDUCE_FMIN, + ISD::VP_REDUCE_FMAX, + ISD::VP_SQRT, + ISD::VP_FMINNUM, + ISD::VP_FMAXNUM, + ISD::VP_FCEIL, + ISD::VP_FFLOOR, + ISD::VP_FROUND, + ISD::VP_FROUNDEVEN, + ISD::VP_FROUNDTOZERO, + ISD::VP_FRINT, + ISD::VP_FNEARBYINT, + ISD::VP_SETCC, + ISD::VP_FMINIMUM, + ISD::VP_FMAXIMUM, + ISD::VP_REDUCE_FMINIMUM, + ISD::VP_REDUCE_FMAXIMUM}; // Sets common operation actions on RVV floating-point vector types. const auto SetCommonVFPActions = [&](MVT VT) { @@ -16440,6 +16454,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SDLoc DL(N); SDValue Op0 = N->getOperand(0); MVT VT = N->getSimpleValueType(0); + + // Constant fold. + if (auto *CFP = dyn_cast(Op0)) { + APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits()); + return DAG.getConstant(Val, DL, VT); + } + // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the // conversion is unnecessary and can be replaced with the FMV_W_X_RV64 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X. diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 0f8f9442877e33..6df3b951f5a06f 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -70,6 +70,7 @@ class RISCVVectorPeephole : public MachineFunctionPass { bool isAllOnesMask(const MachineInstr *MaskDef) const; std::optional getConstant(const MachineOperand &VL) const; + bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const; /// Maps uses of V0 to the corresponding def of V0. DenseMap V0Defs; @@ -165,6 +166,9 @@ bool RISCVVectorPeephole::tryToReduceVL(MachineInstr &MI) const { if (VL.isIdenticalTo(SrcVL) || !isVLKnownLE(VL, SrcVL)) return false; + if (!ensureDominates(VL, *Src)) + return false; + if (VL.isImm()) SrcVL.ChangeToImmediate(VL.getImm()); else if (VL.isReg()) @@ -456,6 +460,26 @@ static bool dominates(MachineBasicBlock::const_iterator A, return &*I == A; } +/// If the register in \p MO doesn't dominate \p Src, try to move \p Src so it +/// does. Returns false if doesn't dominate and we can't move. \p MO must be in +/// the same basic block as \Src. +bool RISCVVectorPeephole::ensureDominates(const MachineOperand &MO, + MachineInstr &Src) const { + assert(MO.getParent()->getParent() == Src.getParent()); + if (!MO.isReg() || MO.getReg() == RISCV::NoRegister) + return true; + + MachineInstr *Def = MRI->getVRegDef(MO.getReg()); + if (Def->getParent() == Src.getParent() && !dominates(Def, Src)) { + if (!isSafeToMove(Src, *Def->getNextNode())) + return false; + // FIXME: Update V0Defs + Src.moveBefore(Def->getNextNode()); + } + + return true; +} + /// If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL /// into it. /// @@ -501,15 +525,8 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) { return false; // If the new passthru doesn't dominate Src, try to move Src so it does. - if (Passthru.getReg() != RISCV::NoRegister) { - MachineInstr *PassthruDef = MRI->getVRegDef(Passthru.getReg()); - if (PassthruDef->getParent() == Src->getParent() && - !dominates(PassthruDef, Src)) { - if (!isSafeToMove(*Src, *PassthruDef->getNextNode())) - return false; - Src->moveBefore(PassthruDef->getNextNode()); - } - } + if (!ensureDominates(Passthru, *Src)) + return false; if (SrcPassthru.getReg() != Passthru.getReg()) { SrcPassthru.setReg(Passthru.getReg()); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4578ff7f715146..5cc084f3ab1387 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -275,8 +275,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(Op, T, Expand); // But saturating fp_to_int converstions are - for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) + for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) { setOperationAction(Op, MVT::v4i32, Custom); + if (Subtarget->hasFP16()) { + setOperationAction(Op, MVT::v8i16, Custom); + } + } // Support vector extending for (auto T : MVT::integer_fixedlen_vector_valuetypes()) { @@ -2475,6 +2479,9 @@ SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op, if (ResT == MVT::v4i32 && SatVT == MVT::i32) return Op; + if (ResT == MVT::v8i16 && SatVT == MVT::i16) + return Op; + return SDValue(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 887278e9c12ef3..9d17d90f530541 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -165,8 +165,9 @@ def F16x8 : Vec { let prefix = "f16x8"; } -// TODO: Include F16x8 here when half precision is better supported. -defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; +// TODO: Remove StdVecs when the F16x8 works every where StdVecs is used. +defvar StdVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2]; +defvar AllVecs = !listconcat(StdVecs, [F16x8]); defvar IntVecs = [I8x16, I16x8, I32x4, I64x2]; //===----------------------------------------------------------------------===// @@ -188,7 +189,7 @@ defm LOAD_V128_A64 : } // Def load patterns from WebAssemblyInstrMemory.td for vector types -foreach vec = AllVecs in { +foreach vec = StdVecs in { defm : LoadPat; } @@ -217,7 +218,7 @@ defm "" : SIMDLoadSplat<16, 8>; defm "" : SIMDLoadSplat<32, 9>; defm "" : SIMDLoadSplat<64, 10>; -foreach vec = AllVecs in { +foreach vec = StdVecs in { defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; defm : LoadPat, @@ -389,7 +390,7 @@ defm STORE_V128_A64 : } // Def store patterns from WebAssemblyInstrMemory.td for vector types -foreach vec = AllVecs in { +foreach vec = StdVecs in { defm : StorePat; } @@ -513,7 +514,7 @@ defm "" : ConstVec; // Match splat(x) -> const.v128(x, ..., x) -foreach vec = AllVecs in { +foreach vec = StdVecs in { defvar numEls = !div(vec.vt.Size, vec.lane_bits); defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64)); defvar immKind = !if(isFloat, fpimm, imm); @@ -557,7 +558,7 @@ defm SHUFFLE : // Shuffles after custom lowering def wasm_shuffle_t : SDTypeProfile<1, 18, []>; def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>; -foreach vec = AllVecs in { +foreach vec = StdVecs in { // The @llvm.wasm.shuffle intrinsic has immediate arguments that become TargetConstants. def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y), (i32 timm:$m0), (i32 timm:$m1), @@ -627,7 +628,7 @@ defm SPLAT_F16x8 : "f16x8.splat\t$dst, $x", "f16x8.splat", 0x120>; // scalar_to_vector leaves high lanes undefined, so can be a splat -foreach vec = AllVecs in +foreach vec = StdVecs in def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))), (!cast("SPLAT_"#vec) $x)>; @@ -762,7 +763,7 @@ multiclass SIMDConditionInt baseInst> { multiclass SIMDConditionFP baseInst> { defm "" : SIMDCondition; defm "" : SIMDCondition; - defm "" : HalfPrecisionCondition; + defm "" : HalfPrecisionCondition; } // Equality: eq @@ -880,7 +881,7 @@ defm BITSELECT : SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [], "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>; -foreach vec = AllVecs in +foreach vec = StdVecs in def : Pat<(vec.vt (int_wasm_bitselect (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))), (BITSELECT $v1, $v2, $c)>; @@ -906,7 +907,7 @@ def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)), (BITSELECT $v2, $v1, $c)>; // Also implement vselect in terms of bitselect -foreach vec = AllVecs in +foreach vec = StdVecs in def : Pat<(vec.vt (vselect (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))), (BITSELECT $v1, $v2, $c)>; @@ -916,7 +917,7 @@ defm SELECT_V128 : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [], "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>; -foreach vec = AllVecs in { +foreach vec = StdVecs in { def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)), (SELECT_V128 $lhs, $rhs, $cond)>; @@ -1217,7 +1218,7 @@ multiclass SIMDUnaryFP baseInst> { // Unlike F32x4 and F64x2 there's not a gap in the opcodes between "neg" and // "sqrt" so subtract one from the offset. defm "" : HalfPrecisionUnary; + !add(baseInst,!if(!eq(name, "sqrt"), 79, 80))>; } // Absolute value: abs @@ -1238,10 +1239,10 @@ defm CEIL : SIMDUnary; defm FLOOR : SIMDUnary; defm TRUNC: SIMDUnary; defm NEAREST: SIMDUnary; -defm CEIL : HalfPrecisionUnary; -defm FLOOR : HalfPrecisionUnary; -defm TRUNC : HalfPrecisionUnary; -defm NEAREST : HalfPrecisionUnary; +defm CEIL : HalfPrecisionUnary; +defm FLOOR : HalfPrecisionUnary; +defm TRUNC : HalfPrecisionUnary; +defm NEAREST : HalfPrecisionUnary; // WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>; @@ -1260,7 +1261,7 @@ def : Pat<(v8f16 (froundeven (v8f16 V128:$src))), (NEAREST_F16x8 V128:$src)>; multiclass SIMDBinaryFP baseInst> { defm "" : SIMDBinary; defm "" : SIMDBinary; - defm "" : HalfPrecisionBinary; + defm "" : HalfPrecisionBinary; } // Addition: add @@ -1361,8 +1362,8 @@ multiclass HalfPrecisionConvert; defm "" : SIMDConvert; -defm "" : HalfPrecisionConvert; -defm "" : HalfPrecisionConvert; +defm "" : HalfPrecisionConvert; +defm "" : HalfPrecisionConvert; // Support the saturating variety as well. def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>; @@ -1370,6 +1371,11 @@ def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>; def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>; def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>; +def trunc_s_sat16 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i16)>; +def trunc_u_sat16 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i16)>; +def : Pat<(v8i16 (trunc_s_sat16 (v8f16 V128:$src))), (fp_to_sint_I16x8 $src)>; +def : Pat<(v8i16 (trunc_u_sat16 (v8f16 V128:$src))), (fp_to_uint_I16x8 $src)>; + def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def trunc_sat_zero_s : SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>; @@ -1388,8 +1394,8 @@ defm "" : SIMDConvert; defm "" : SIMDConvert; defm "" : SIMDConvert; defm "" : SIMDConvert; -defm "" : HalfPrecisionConvert; -defm "" : HalfPrecisionConvert; +defm "" : HalfPrecisionConvert; +defm "" : HalfPrecisionConvert; // Extending operations // TODO: refactor this to be uniform for i64x2 if the numbering is not changed. @@ -1532,7 +1538,7 @@ multiclass SIMDMADD simdopA, bits<32> simdopS, list defm "" : SIMDMADD; defm "" : SIMDMADD; -defm "" : SIMDMADD; +defm "" : SIMDMADD; //===----------------------------------------------------------------------===// // Laneselect diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp index 9cc5ed5d89ad70..a62fb7f723cdbc 100644 --- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp +++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp @@ -3009,15 +3009,15 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { case Intrinsic::x86_avx512_vpermi2var_d_128: case Intrinsic::x86_avx512_vpermi2var_d_256: case Intrinsic::x86_avx512_vpermi2var_d_512: - case Intrinsic::x86_avx512_vpermi2var_hi_128: - case Intrinsic::x86_avx512_vpermi2var_hi_256: - case Intrinsic::x86_avx512_vpermi2var_hi_512: - case Intrinsic::x86_avx512_vpermi2var_pd_128: - case Intrinsic::x86_avx512_vpermi2var_pd_256: - case Intrinsic::x86_avx512_vpermi2var_pd_512: - case Intrinsic::x86_avx512_vpermi2var_ps_128: - case Intrinsic::x86_avx512_vpermi2var_ps_256: - case Intrinsic::x86_avx512_vpermi2var_ps_512: + case Intrinsic::x86_avx512_vpermi2var_hi_128: + case Intrinsic::x86_avx512_vpermi2var_hi_256: + case Intrinsic::x86_avx512_vpermi2var_hi_512: + case Intrinsic::x86_avx512_vpermi2var_pd_128: + case Intrinsic::x86_avx512_vpermi2var_pd_256: + case Intrinsic::x86_avx512_vpermi2var_pd_512: + case Intrinsic::x86_avx512_vpermi2var_ps_128: + case Intrinsic::x86_avx512_vpermi2var_ps_256: + case Intrinsic::x86_avx512_vpermi2var_ps_512: case Intrinsic::x86_avx512_vpermi2var_q_128: case Intrinsic::x86_avx512_vpermi2var_q_256: case Intrinsic::x86_avx512_vpermi2var_q_512: diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 603a1565e48c45..79746201133bdd 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1762,54 +1762,52 @@ static void addNoReturnAttrs(const SCCNodeSet &SCCNodes, } } -static bool -allBBPathsGoThroughCold(BasicBlock *BB, - SmallDenseMap &Visited) { - // If BB contains a cold callsite this path through the CG is cold. - // Ignore whether the instructions actually are guranteed to transfer - // execution. Divergent behavior is considered unlikely. - if (any_of(*BB, [](Instruction &I) { - if (auto *CB = dyn_cast(&I)) - return CB->hasFnAttr(Attribute::Cold); - return false; - })) { - Visited[BB] = true; - return true; - } - - auto Succs = successors(BB); - // We found a path that doesn't go through any cold callsite. - if (Succs.empty()) - return false; +static bool allPathsGoThroughCold(Function &F) { + SmallDenseMap ColdPaths; + ColdPaths[&F.front()] = false; + SmallVector Jobs; + Jobs.push_back(&F.front()); + + while (!Jobs.empty()) { + BasicBlock *BB = Jobs.pop_back_val(); + + // If block contains a cold callsite this path through the CG is cold. + // Ignore whether the instructions actually are guaranteed to transfer + // execution. Divergent behavior is considered unlikely. + if (any_of(*BB, [](Instruction &I) { + if (auto *CB = dyn_cast(&I)) + return CB->hasFnAttr(Attribute::Cold); + return false; + })) { + ColdPaths[BB] = true; + continue; + } - // We didn't find a cold callsite in this BB, so check that all successors - // contain a cold callsite (or that their successors do). - // Potential TODO: We could use static branch hints to assume certain - // successor paths are inherently cold, irrespective of if they contain a cold - // callsite. - for (auto *Succ : Succs) { - // Start with false, this is necessary to ensure we don't turn loops into - // cold. - auto R = Visited.try_emplace(Succ, false); - if (!R.second) { - if (R.first->second) - continue; + auto Succs = successors(BB); + // We found a path that doesn't go through any cold callsite. + if (Succs.empty()) return false; + + // We didn't find a cold callsite in this BB, so check that all successors + // contain a cold callsite (or that their successors do). + // Potential TODO: We could use static branch hints to assume certain + // successor paths are inherently cold, irrespective of if they contain a + // cold callsite. + for (BasicBlock *Succ : Succs) { + // Start with false, this is necessary to ensure we don't turn loops into + // cold. + auto [Iter, Inserted] = ColdPaths.try_emplace(Succ, false); + if (!Inserted) { + if (Iter->second) + continue; + return false; + } + Jobs.push_back(Succ); } - if (!allBBPathsGoThroughCold(Succ, Visited)) - return false; - Visited[Succ] = true; } - return true; } -static bool allPathsGoThroughCold(Function &F) { - SmallDenseMap Visited; - Visited[&F.front()] = false; - return allBBPathsGoThroughCold(&F.front(), Visited); -} - // Set the cold function attribute if possible. static void addColdAttrs(const SCCNodeSet &SCCNodes, SmallSet &Changed) { diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 36f5cd7fd9e6cb..69e5835bee8a5e 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -598,24 +598,6 @@ void HWAddressSanitizer::initializeModule() { LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n"); TargetTriple = Triple(M.getTargetTriple()); - for (auto &F : M.functions()) { - // Remove memory attributes that are invalid with HWASan. - // HWASan checks read from shadow, which invalidates memory(argmem: *) - // Short granule checks on function arguments read from the argument memory - // (last byte of the granule), which invalidates writeonly. - // - // This is not only true for sanitized functions, because AttrInfer can - // infer those attributes on libc functions, which is not true if those - // are instrumented (Android) or intercepted. - - // nobuiltin makes sure later passes don't restore assumptions about - // the function. - F.addFnAttr(llvm::Attribute::NoBuiltin); - F.removeFnAttr(llvm::Attribute::Memory); - for (auto &A : F.args()) - A.removeAttr(llvm::Attribute::WriteOnly); - } - // x86_64 currently has two modes: // - Intel LAM (default) // - pointer aliasing (heap only) @@ -1640,6 +1622,14 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, assert(!ShadowBase); + // Remove memory attributes that are about to become invalid. + // HWASan checks read from shadow, which invalidates memory(argmem: *) + // Short granule checks on function arguments read from the argument memory + // (last byte of the granule), which invalidates writeonly. + F.removeFnAttr(llvm::Attribute::Memory); + for (auto &A : F.args()) + A.removeAttr(llvm::Attribute::WriteOnly); + BasicBlock::iterator InsertPt = F.getEntryBlock().begin(); IRBuilder<> EntryIRB(&F.getEntryBlock(), InsertPt); emitPrologue(EntryIRB, diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 526ae4e8834396..86c7dceffc5245 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2537,14 +2537,19 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, Value *InvariantRHS, ICmpInst &ICmp, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT) { - assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); + bool IsSigned = ICmpInst::isSigned(Pred); + // Try to represent VariantLHS as sum of invariant and variant operands. using namespace PatternMatch; Value *VariantOp, *InvariantOp; - if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp)))) + if (IsSigned && + !match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + if (!IsSigned && + !match(VariantLHS, m_NUWAdd(m_Value(VariantOp), m_Value(InvariantOp)))) return false; // LHS itself is a loop-variant, try to represent it in the form: @@ -2559,17 +2564,20 @@ static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS, // normal linear arithmetics). Overflows make things much more complicated, so // we want to avoid this. auto &DL = L.getHeader()->getDataLayout(); - bool ProvedNoOverflowAfterReassociate = - computeOverflowForSignedSub(InvariantRHS, InvariantOp, - SimplifyQuery(DL, DT, AC, &ICmp)) == - llvm::OverflowResult::NeverOverflows; - if (!ProvedNoOverflowAfterReassociate) + SimplifyQuery SQ(DL, DT, AC, &ICmp); + if (IsSigned && computeOverflowForSignedSub(InvariantRHS, InvariantOp, SQ) != + llvm::OverflowResult::NeverOverflows) + return false; + if (!IsSigned && + computeOverflowForUnsignedSub(InvariantRHS, InvariantOp, SQ) != + llvm::OverflowResult::NeverOverflows) return false; auto *Preheader = L.getLoopPreheader(); assert(Preheader && "Loop is not in simplify form?"); IRBuilder<> Builder(Preheader->getTerminator()); - Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op", - /*HasNUW*/ false, /*HasNSW*/ true); + Value *NewCmpOp = + Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op", + /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned); ICmp.setPredicate(Pred); ICmp.setOperand(0, VariantOp); ICmp.setOperand(1, NewCmpOp); @@ -2584,14 +2592,19 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS, Value *InvariantRHS, ICmpInst &ICmp, Loop &L, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU, AssumptionCache *AC, DominatorTree *DT) { - assert(ICmpInst::isSigned(Pred) && "Not supported yet!"); assert(!L.isLoopInvariant(VariantLHS) && "Precondition."); assert(L.isLoopInvariant(InvariantRHS) && "Precondition."); + bool IsSigned = ICmpInst::isSigned(Pred); + // Try to represent VariantLHS as sum of invariant and variant operands. using namespace PatternMatch; Value *VariantOp, *InvariantOp; - if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp)))) + if (IsSigned && + !match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp)))) + return false; + if (!IsSigned && + !match(VariantLHS, m_NUWSub(m_Value(VariantOp), m_Value(InvariantOp)))) return false; bool VariantSubtracted = false; @@ -2613,16 +2626,26 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS, // "C1 - C2" does not overflow. auto &DL = L.getHeader()->getDataLayout(); SimplifyQuery SQ(DL, DT, AC, &ICmp); - if (VariantSubtracted) { + if (VariantSubtracted && IsSigned) { // C1 - LV < C2 --> LV > C1 - C2 if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, SQ) != llvm::OverflowResult::NeverOverflows) return false; - } else { + } else if (VariantSubtracted && !IsSigned) { + // C1 - LV < C2 --> LV > C1 - C2 + if (computeOverflowForUnsignedSub(InvariantOp, InvariantRHS, SQ) != + llvm::OverflowResult::NeverOverflows) + return false; + } else if (!VariantSubtracted && IsSigned) { // LV - C1 < C2 --> LV < C1 + C2 if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, SQ) != llvm::OverflowResult::NeverOverflows) return false; + } else { // !VariantSubtracted && !IsSigned + // LV - C1 < C2 --> LV < C1 + C2 + if (computeOverflowForUnsignedAdd(InvariantOp, InvariantRHS, SQ) != + llvm::OverflowResult::NeverOverflows) + return false; } auto *Preheader = L.getLoopPreheader(); assert(Preheader && "Loop is not in simplify form?"); @@ -2630,9 +2653,9 @@ static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS, Value *NewCmpOp = VariantSubtracted ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op", - /*HasNUW*/ false, /*HasNSW*/ true) + /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned) : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op", - /*HasNUW*/ false, /*HasNSW*/ true); + /*HasNUW*/ !IsSigned, /*HasNSW*/ IsSigned); ICmp.setPredicate(Pred); ICmp.setOperand(0, VariantOp); ICmp.setOperand(1, NewCmpOp); @@ -2650,10 +2673,6 @@ static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS)))) return false; - // TODO: Support unsigned predicates? - if (!ICmpInst::isSigned(Pred)) - return false; - // Put variant operand to LHS position. if (L.isLoopInvariant(LHS)) { std::swap(LHS, RHS); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index cf00299812bb7f..d378c6c3a4b01c 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -937,7 +937,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::NoUnwind: case Attribute::NoSanitizeBounds: case Attribute::NoSanitizeCoverage: - case Attribute::NoSanitizeRealtime: case Attribute::NullPointerIsValid: case Attribute::OptimizeForDebugging: case Attribute::OptForFuzzing: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index f1f2d522f1cbaa..8a8d8afece6cb4 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1210,39 +1210,31 @@ Value *llvm::createAnyOfTargetReduction(IRBuilderBase &Builder, Value *Src, Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, Value *Src, RecurKind RdxKind) { auto *SrcVecEltTy = cast(Src->getType())->getElementType(); + auto getIdentity = [&]() { + Intrinsic::ID ID = getReductionIntrinsicID(RdxKind); + unsigned Opc = getArithmeticReductionInstruction(ID); + return ConstantExpr::getBinOpIdentity(Opc, SrcVecEltTy); + }; switch (RdxKind) { case RecurKind::Add: - return Builder.CreateAddReduce(Src); case RecurKind::Mul: - return Builder.CreateMulReduce(Src); case RecurKind::And: - return Builder.CreateAndReduce(Src); case RecurKind::Or: - return Builder.CreateOrReduce(Src); case RecurKind::Xor: - return Builder.CreateXorReduce(Src); - case RecurKind::FMulAdd: - case RecurKind::FAdd: - return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), - Src); - case RecurKind::FMul: - return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); case RecurKind::SMax: - return Builder.CreateIntMaxReduce(Src, true); case RecurKind::SMin: - return Builder.CreateIntMinReduce(Src, true); case RecurKind::UMax: - return Builder.CreateIntMaxReduce(Src, false); case RecurKind::UMin: - return Builder.CreateIntMinReduce(Src, false); case RecurKind::FMax: - return Builder.CreateFPMaxReduce(Src); case RecurKind::FMin: - return Builder.CreateFPMinReduce(Src); case RecurKind::FMinimum: - return Builder.CreateFPMinimumReduce(Src); case RecurKind::FMaximum: - return Builder.CreateFPMaximumReduce(Src); + return Builder.CreateUnaryIntrinsic(getReductionIntrinsicID(RdxKind), Src); + case RecurKind::FMulAdd: + case RecurKind::FAdd: + return Builder.CreateFAddReduce(getIdentity(), Src); + case RecurKind::FMul: + return Builder.CreateFMulReduce(getIdentity(), Src); default: llvm_unreachable("Unhandled opcode"); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6babfd1eee9108..fa05b8dd22426f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7147,7 +7147,12 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, if (!OrigLoop->contains(CondI) || !CostCtx.SkipCostComputation.insert(CondI).second) continue; - Cost += CostCtx.getLegacyCost(CondI, VF); + InstructionCost CondICost = CostCtx.getLegacyCost(CondI, VF); + LLVM_DEBUG({ + dbgs() << "Cost of " << CondICost << " for VF " << VF + << ": exit condition instruction " << *CondI << "\n"; + }); + Cost += CondICost; for (Value *Op : CondI->operands()) { auto *OpI = dyn_cast(Op); if (!OpI || any_of(OpI->users(), [&ExitInstrs, this](User *U) { @@ -7250,10 +7255,9 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, /// not have corresponding recipes in \p Plan and are not marked to be ignored /// in \p CostCtx. This means the VPlan contains simplification that the legacy /// cost-model did not account for. -static bool -planContainsAdditionalSimplifications(VPlan &Plan, ElementCount VF, - VPCostContext &CostCtx, Loop *TheLoop, - LoopVectorizationCostModel &CM) { +static bool planContainsAdditionalSimplifications(VPlan &Plan, + VPCostContext &CostCtx, + Loop *TheLoop) { // First collect all instructions for the recipes in Plan. auto GetInstructionForCost = [](const VPRecipeBase *R) -> Instruction * { if (auto *S = dyn_cast(R)) @@ -7284,16 +7288,13 @@ planContainsAdditionalSimplifications(VPlan &Plan, ElementCount VF, // Return true if the loop contains any instructions that are not also part of // the VPlan or are skipped for VPlan-based cost computations. This indicates // that the VPlan contains extra simplifications. - return any_of( - TheLoop->blocks(), [&SeenInstrs, VF, &CostCtx, &CM](BasicBlock *BB) { - return any_of(*BB, [&SeenInstrs, VF, &CostCtx, &CM](Instruction &I) { - if (isa(&I)) - return false; - return !SeenInstrs.contains(&I) && - !CostCtx.skipCostComputation(&I, true) && - !CM.canTruncateToMinimalBitwidth(&I, VF); - }); - }); + return any_of(TheLoop->blocks(), [&SeenInstrs, &CostCtx](BasicBlock *BB) { + return any_of(*BB, [&SeenInstrs, &CostCtx](Instruction &I) { + if (isa(&I)) + return false; + return !SeenInstrs.contains(&I) && !CostCtx.skipCostComputation(&I, true); + }); + }); } #endif @@ -7364,8 +7365,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { precomputeCosts(BestPlan, BestFactor.Width, CostCtx); assert((BestFactor.Width == LegacyVF.Width || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), - BestFactor.Width, CostCtx, - OrigLoop, CM)) && + CostCtx, OrigLoop)) && " VPlan cost model and legacy cost model disagreed"); assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) && "when vectorizing, the scalar cost must be computed."); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index edb2567fa057b3..3d41c978281351 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -260,6 +260,20 @@ static FixedVectorType *getWidenedType(Type *ScalarTy, unsigned VF) { VF * getNumElements(ScalarTy)); } +/// Returns the number of elements of the given type \p Ty, not less than \p Sz, +/// which forms type, which splits by \p TTI into whole vector types during +/// legalization. +static unsigned getFullVectorNumberOfElements(const TargetTransformInfo &TTI, + Type *Ty, unsigned Sz) { + if (!isValidElementType(Ty)) + return PowerOf2Ceil(Sz); + // Find the number of elements, which forms full vectors. + const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz)); + if (NumParts == 0 || NumParts == Sz) + return PowerOf2Ceil(Sz); + return PowerOf2Ceil(divideCeil(Sz, NumParts)) * NumParts; +} + static void transformScalarShuffleIndiciesToVector(unsigned VecTyNumElements, SmallVectorImpl &Mask) { // The ShuffleBuilder implementation use shufflevector to splat an "element". @@ -1224,6 +1238,22 @@ static bool doesNotNeedToSchedule(ArrayRef VL) { (all_of(VL, isUsedOutsideBlock) || all_of(VL, areAllOperandsNonInsts)); } +/// Returns true if widened type of \p Ty elements with size \p Sz represents +/// full vector type, i.e. adding extra element results in extra parts upon type +/// legalization. +static bool hasFullVectorsOnly(const TargetTransformInfo &TTI, Type *Ty, + unsigned Sz) { + if (Sz <= 1) + return false; + if (!isValidElementType(Ty) && !isa(Ty)) + return false; + if (has_single_bit(Sz)) + return true; + const unsigned NumParts = TTI.getRegUsageForType(getWidenedType(Ty, Sz)); + return NumParts > 0 && NumParts != Sz && has_single_bit(Sz / NumParts) && + Sz % NumParts == 0; +} + namespace slpvectorizer { /// Bottom Up SLP Vectorizer. @@ -2467,7 +2497,9 @@ class BoUpSLP { } // TODO: Check if we can remove a check for non-power-2 number of // scalars after full support of non-power-2 vectorization. - return UniqueValues.size() != 2 && has_single_bit(UniqueValues.size()); + return UniqueValues.size() != 2 && + hasFullVectorsOnly(*R.TTI, (*UniqueValues.begin())->getType(), + UniqueValues.size()); }; // If the initial strategy fails for any of the operand indexes, then we @@ -2605,7 +2637,7 @@ class BoUpSLP { int Score = LookAhead.getScoreAtLevelRec(Candidates[I].first, Candidates[I].second, /*U1=*/nullptr, /*U2=*/nullptr, - /*Level=*/1, std::nullopt); + /*CurrLevel=*/1, std::nullopt); if (Score > BestScore) { BestScore = Score; Index = I; @@ -2864,6 +2896,14 @@ class BoUpSLP { /// avoid issues with def-use order. Value *vectorizeTree(TreeEntry *E, bool PostponedPHIs); + /// Returns vectorized operand node, that matches the order of the scalars + /// operand number \p NodeIdx in entry \p E. + TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, unsigned NodeIdx); + const TreeEntry *getMatchedVectorizedOperand(const TreeEntry *E, + unsigned NodeIdx) const { + return const_cast(this)->getMatchedVectorizedOperand(E, NodeIdx); + } + /// Vectorize a single entry in the tree, the \p Idx-th operand of the entry /// \p E. /// \param PostponedPHIs true, if need to postpone emission of phi nodes to @@ -3268,8 +3308,9 @@ class BoUpSLP { SmallVectorImpl *AltScalars = nullptr) const; /// Return true if this is a non-power-of-2 node. - bool isNonPowOf2Vec() const { - bool IsNonPowerOf2 = !has_single_bit(Scalars.size()); + bool isNonPowOf2Vec(const TargetTransformInfo &TTI) const { + bool IsNonPowerOf2 = !hasFullVectorsOnly( + TTI, getValueType(Scalars.front()), Scalars.size()); assert((!IsNonPowerOf2 || ReuseShuffleIndices.empty()) && "Reshuffling not supported with non-power-of-2 vectors yet."); return IsNonPowerOf2; @@ -3447,7 +3488,7 @@ class BoUpSLP { if (UserTreeIdx.UserTE) { Last->UserTreeIndices.push_back(UserTreeIdx); - assert((!Last->isNonPowOf2Vec() || Last->ReorderIndices.empty()) && + assert((!Last->isNonPowOf2Vec(*TTI) || Last->ReorderIndices.empty()) && "Reordering isn't implemented for non-power-of-2 nodes yet"); } return Last; @@ -4353,7 +4394,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) { if (!isValidElementType(ScalarTy)) return std::nullopt; auto *VecTy = getWidenedType(ScalarTy, NumScalars); - int NumParts = TTI->getNumberOfParts(VecTy); + int NumParts = TTI->getRegUsageForType(VecTy); if (NumParts == 0 || NumParts >= NumScalars) NumParts = 1; SmallVector ExtractMask; @@ -4725,7 +4766,7 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( // Check the order of pointer operands or that all pointers are the same. bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, Order); // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet. - if (!Order.empty() && !has_single_bit(VL.size())) { + if (!Order.empty() && !hasFullVectorsOnly(*TTI, ScalarTy, Sz)) { assert(VectorizeNonPowerOf2 && "non-power-of-2 number of loads only " "supported with VectorizeNonPowerOf2"); return LoadsState::Gather; @@ -4779,12 +4820,13 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( }); }); const unsigned AbsoluteDiff = std::abs(*Diff); - if (IsPossibleStrided && (IsAnyPointerUsedOutGraph || - ((Sz > MinProfitableStridedLoads || - (AbsoluteDiff <= MaxProfitableLoadStride * Sz && - has_single_bit(AbsoluteDiff))) && - AbsoluteDiff > Sz) || - *Diff == -(static_cast(Sz) - 1))) { + if (IsPossibleStrided && + (IsAnyPointerUsedOutGraph || + ((Sz > MinProfitableStridedLoads || + (AbsoluteDiff <= MaxProfitableLoadStride * Sz && + hasFullVectorsOnly(*TTI, ScalarTy, AbsoluteDiff))) && + AbsoluteDiff > Sz) || + *Diff == -(static_cast(Sz) - 1))) { int Stride = *Diff / static_cast(Sz - 1); if (*Diff == Stride * static_cast(Sz - 1)) { Align Alignment = @@ -4812,16 +4854,68 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( } } } - auto CheckForShuffledLoads = [&, &TTI = *TTI](Align CommonAlignment) { + // Correctly identify compare the cost of loads + shuffles rather than + // strided/masked gather loads. Returns true if vectorized + shuffles + // representation is better than just gather. + auto CheckForShuffledLoads = [&, &TTI = *TTI](Align CommonAlignment, + bool ProfitableGatherPointers) { + // Compare masked gather cost and loads + insert subvector costs. + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + auto [ScalarGEPCost, VectorGEPCost] = + getGEPCosts(TTI, PointerOps, PointerOps.front(), + Instruction::GetElementPtr, CostKind, ScalarTy, VecTy); + // Estimate the cost of masked gather GEP. If not a splat, roughly + // estimate as a buildvector, otherwise estimate as splat. + APInt DemandedElts = APInt::getAllOnes(VecTy->getNumElements()); + VectorType *PtrVecTy = + getWidenedType(PointerOps.front()->getType()->getScalarType(), + VecTy->getNumElements()); + if (static_cast(count_if( + PointerOps, IsaPred)) < PointerOps.size() - 1 || + any_of(PointerOps, [&](Value *V) { + return getUnderlyingObject(V) != + getUnderlyingObject(PointerOps.front()); + })) + VectorGEPCost += TTI.getScalarizationOverhead( + PtrVecTy, DemandedElts, /*Insert=*/true, /*Extract=*/false, CostKind); + else + VectorGEPCost += + TTI.getScalarizationOverhead( + PtrVecTy, APInt::getOneBitSet(VecTy->getNumElements(), 0), + /*Insert=*/true, /*Extract=*/false, CostKind) + + ::getShuffleCost(TTI, TTI::SK_Broadcast, PtrVecTy, std::nullopt, + CostKind); + // The cost of scalar loads. + InstructionCost ScalarLoadsCost = + std::accumulate(VL.begin(), VL.end(), InstructionCost(), + [&](InstructionCost C, Value *V) { + return C + TTI.getInstructionCost( + cast(V), CostKind); + }) + + ScalarGEPCost; + // The cost of masked gather. + InstructionCost MaskedGatherCost = + TTI.getGatherScatterOpCost( + Instruction::Load, VecTy, cast(VL0)->getPointerOperand(), + /*VariableMask=*/false, CommonAlignment, CostKind) + + (ProfitableGatherPointers ? 0 : VectorGEPCost); + InstructionCost GatherCost = + TTI.getScalarizationOverhead(VecTy, DemandedElts, /*Insert=*/true, + /*Extract=*/false, CostKind) + + ScalarLoadsCost; + // The list of loads is small or perform partial check already - directly + // compare masked gather cost and gather cost. + constexpr unsigned ListLimit = 4; + if (!TryRecursiveCheck || VL.size() < ListLimit) + return MaskedGatherCost - GatherCost >= -SLPCostThreshold; unsigned Sz = DL->getTypeSizeInBits(ScalarTy); - unsigned MinVF = getMinVF(Sz); - unsigned MaxVF = std::max(bit_floor(VL.size() / 2), MinVF); - MaxVF = std::min(getMaximumVF(Sz, Instruction::Load), MaxVF); - for (unsigned VF = MaxVF; VF >= MinVF; VF /= 2) { - unsigned VectorizedCnt = 0; + unsigned MinVF = getMinVF(2 * Sz); + DemandedElts.clearAllBits(); + // Iterate through possible vectorization factors and check if vectorized + + // shuffles is better than just gather. + for (unsigned VF = VL.size() / 2; VF >= MinVF; VF /= 2) { SmallVector States; - for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; - Cnt += VF, ++VectorizedCnt) { + for (unsigned Cnt = 0, End = VL.size(); Cnt + VF <= End; Cnt += VF) { ArrayRef Slice = VL.slice(Cnt, VF); SmallVector Order; SmallVector PointerOps; @@ -4829,8 +4923,10 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( canVectorizeLoads(Slice, Slice.front(), Order, PointerOps, /*TryRecursiveCheck=*/false); // Check that the sorted loads are consecutive. - if (LS == LoadsState::Gather) - break; + if (LS == LoadsState::Gather) { + DemandedElts.setBits(Cnt, Cnt + VF); + continue; + } // If need the reorder - consider as high-cost masked gather for now. if ((LS == LoadsState::Vectorize || LS == LoadsState::StridedVectorize) && @@ -4838,79 +4934,92 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( LS = LoadsState::ScatterVectorize; States.push_back(LS); } + if (DemandedElts.isAllOnes()) + // All loads gathered - try smaller VF. + continue; // Can be vectorized later as a serie of loads/insertelements. - if (VectorizedCnt == VL.size() / VF) { - // Compare masked gather cost and loads + insersubvector costs. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - auto [ScalarGEPCost, VectorGEPCost] = - getGEPCosts(TTI, PointerOps, PointerOps.front(), - Instruction::GetElementPtr, CostKind, ScalarTy, VecTy); - InstructionCost MaskedGatherCost = - TTI.getGatherScatterOpCost(Instruction::Load, VecTy, - cast(VL0)->getPointerOperand(), - /*VariableMask=*/false, CommonAlignment, - CostKind) + - VectorGEPCost - ScalarGEPCost; - InstructionCost VecLdCost = 0; - auto *SubVecTy = getWidenedType(ScalarTy, VF); - for (auto [I, LS] : enumerate(States)) { - auto *LI0 = cast(VL[I * VF]); - switch (LS) { - case LoadsState::Vectorize: { - auto [ScalarGEPCost, VectorGEPCost] = - getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF), - LI0->getPointerOperand(), Instruction::Load, - CostKind, ScalarTy, SubVecTy); - VecLdCost += TTI.getMemoryOpCost( - Instruction::Load, SubVecTy, LI0->getAlign(), - LI0->getPointerAddressSpace(), CostKind, - TTI::OperandValueInfo()) + - VectorGEPCost - ScalarGEPCost; - break; - } - case LoadsState::StridedVectorize: { - auto [ScalarGEPCost, VectorGEPCost] = - getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF), - LI0->getPointerOperand(), Instruction::Load, - CostKind, ScalarTy, SubVecTy); - VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy, - LI0->getPointerOperand(), - /*VariableMask=*/false, - CommonAlignment, CostKind) + - VectorGEPCost - ScalarGEPCost; - break; - } - case LoadsState::ScatterVectorize: { - auto [ScalarGEPCost, VectorGEPCost] = getGEPCosts( - TTI, ArrayRef(PointerOps).slice(I * VF, VF), - LI0->getPointerOperand(), Instruction::GetElementPtr, CostKind, - ScalarTy, SubVecTy); - VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy, - LI0->getPointerOperand(), - /*VariableMask=*/false, - CommonAlignment, CostKind) + - VectorGEPCost - ScalarGEPCost; - break; - } - case LoadsState::Gather: - llvm_unreachable( - "Expected only consecutive, strided or masked gather loads."); - } - SmallVector ShuffleMask(VL.size()); - for (int Idx : seq(0, VL.size())) - ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx; + InstructionCost VecLdCost = 0; + if (!DemandedElts.isZero()) { + VecLdCost = + TTI.getScalarizationOverhead(VecTy, DemandedElts, /*Insert=*/true, + /*Extract=*/false, CostKind) + + ScalarGEPCost; + for (unsigned Idx : seq(VL.size())) + if (DemandedElts[Idx]) + VecLdCost += + TTI.getInstructionCost(cast(VL[Idx]), CostKind); + } + auto *SubVecTy = getWidenedType(ScalarTy, VF); + for (auto [I, LS] : enumerate(States)) { + auto *LI0 = cast(VL[I * VF]); + InstructionCost VectorGEPCost = + (LS == LoadsState::ScatterVectorize && ProfitableGatherPointers) + ? 0 + : getGEPCosts(TTI, ArrayRef(PointerOps).slice(I * VF, VF), + LI0->getPointerOperand(), + Instruction::GetElementPtr, CostKind, ScalarTy, + SubVecTy) + .second; + if (LS == LoadsState::ScatterVectorize) { + if (static_cast( + count_if(PointerOps, IsaPred)) < + PointerOps.size() - 1 || + any_of(PointerOps, [&](Value *V) { + return getUnderlyingObject(V) != + getUnderlyingObject(PointerOps.front()); + })) + VectorGEPCost += TTI.getScalarizationOverhead( + SubVecTy, APInt::getAllOnes(VF), + /*Insert=*/true, /*Extract=*/false, CostKind); + else + VectorGEPCost += TTI.getScalarizationOverhead( + SubVecTy, APInt::getOneBitSet(VF, 0), + /*Insert=*/true, /*Extract=*/false, CostKind) + + ::getShuffleCost(TTI, TTI::SK_Broadcast, SubVecTy, + std::nullopt, CostKind); + } + switch (LS) { + case LoadsState::Vectorize: + VecLdCost += + TTI.getMemoryOpCost(Instruction::Load, SubVecTy, LI0->getAlign(), + LI0->getPointerAddressSpace(), CostKind, + TTI::OperandValueInfo()) + + VectorGEPCost; + break; + case LoadsState::StridedVectorize: + VecLdCost += TTI.getStridedMemoryOpCost(Instruction::Load, SubVecTy, + LI0->getPointerOperand(), + /*VariableMask=*/false, + CommonAlignment, CostKind) + + VectorGEPCost; + break; + case LoadsState::ScatterVectorize: + VecLdCost += TTI.getGatherScatterOpCost(Instruction::Load, SubVecTy, + LI0->getPointerOperand(), + /*VariableMask=*/false, + CommonAlignment, CostKind) + + VectorGEPCost; + break; + case LoadsState::Gather: + // Gathers are already calculated - ignore. + continue; + } + SmallVector ShuffleMask(VL.size()); + for (int Idx : seq(0, VL.size())) + ShuffleMask[Idx] = Idx / VF == I ? VL.size() + Idx % VF : Idx; + if (I > 0) VecLdCost += ::getShuffleCost(TTI, TTI::SK_InsertSubvector, VecTy, ShuffleMask, CostKind, I * VF, SubVecTy); - } - // If masked gather cost is higher - better to vectorize, so - // consider it as a gather node. It will be better estimated - // later. - if (MaskedGatherCost >= VecLdCost) - return true; } + // If masked gather cost is higher - better to vectorize, so + // consider it as a gather node. It will be better estimated + // later. + if (MaskedGatherCost >= VecLdCost && + VecLdCost - GatherCost < -SLPCostThreshold) + return true; } - return false; + return MaskedGatherCost - GatherCost >= -SLPCostThreshold; }; // TODO: need to improve analysis of the pointers, if not all of them are // GEPs or have > 2 operands, we end up with a gather node, which just @@ -4931,7 +5040,8 @@ BoUpSLP::LoadsState BoUpSLP::canVectorizeLoads( !TTI->forceScalarizeMaskedGather(VecTy, CommonAlignment)) { // Check if potential masked gather can be represented as series // of loads + insertsubvectors. - if (TryRecursiveCheck && CheckForShuffledLoads(CommonAlignment)) { + if (TryRecursiveCheck && + CheckForShuffledLoads(CommonAlignment, ProfitableGatherPointers)) { // If masked gather cost is higher - better to vectorize, so // consider it as a gather node. It will be better estimated // later. @@ -5121,7 +5231,7 @@ static bool areTwoInsertFromSameBuildVector( std::optional BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // FIXME: Vectorizing is not supported yet for non-power-of-2 ops. - if (TE.isNonPowOf2Vec()) + if (TE.isNonPowOf2Vec(*TTI)) return std::nullopt; // No need to reorder if need to shuffle reuses, still need to shuffle the @@ -5155,8 +5265,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { } } if (Sz == 2 && TE.getVectorFactor() == 4 && - TTI->getNumberOfParts(getWidenedType(TE.Scalars.front()->getType(), - 2 * TE.getVectorFactor())) == 1) + TTI->getRegUsageForType(getWidenedType(TE.Scalars.front()->getType(), + 2 * TE.getVectorFactor())) == 1) return std::nullopt; if (!ShuffleVectorInst::isOneUseSingleSourceMask(TE.ReuseShuffleIndices, Sz)) { @@ -5505,7 +5615,7 @@ void BoUpSLP::reorderTopToBottom() { // Reorder the graph nodes according to their vectorization factor. for (unsigned VF = VectorizableTree.front()->getVectorFactor(); VF > 1; - VF /= 2) { + VF -= 2) { auto It = VFToOrderedEntries.find(VF); if (It == VFToOrderedEntries.end()) continue; @@ -5678,7 +5788,7 @@ bool BoUpSLP::canReorderOperands( ArrayRef ReorderableGathers, SmallVectorImpl &GatherOps) { // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet. - if (UserTE->isNonPowOf2Vec()) + if (UserTE->isNonPowOf2Vec(*TTI)) return false; for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) { @@ -5853,7 +5963,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) { auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0)); const auto AllowsReordering = [&](const TreeEntry *TE) { // FIXME: Reordering isn't implemented for non-power-of-2 nodes yet. - if (TE->isNonPowOf2Vec()) + if (TE->isNonPowOf2Vec(*TTI)) return false; if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() || (TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) || @@ -6499,7 +6609,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( case Instruction::ExtractElement: { bool Reuse = canReuseExtract(VL, VL0, CurrentOrder); // FIXME: Vectorizing is not supported yet for non-power-of-2 ops. - if (!has_single_bit(VL.size())) + if (!hasFullVectorsOnly(*TTI, VL0->getType(), VL.size())) return TreeEntry::NeedToGather; if (Reuse || !CurrentOrder.empty()) return TreeEntry::Vectorize; @@ -6909,7 +7019,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, ReuseShuffleIndices.clear(); } else { // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) { + if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec(*TTI)) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); @@ -6922,7 +7032,8 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, return isa(V) || !isConstant(V); })) || - !llvm::has_single_bit(NumUniqueScalarValues)) { + !hasFullVectorsOnly(*TTI, UniqueValues.front()->getType(), + NumUniqueScalarValues)) { if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.MainOp->isSafeToRemove() && all_of(UniqueValues, [=](Value *V) { @@ -6930,7 +7041,9 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, areAllUsersVectorized(cast(V), UserIgnoreList); })) { - unsigned PWSz = PowerOf2Ceil(UniqueValues.size()); + // Find the number of elements, which forms full vectors. + unsigned PWSz = getFullVectorNumberOfElements( + *TTI, UniqueValues.front()->getType(), UniqueValues.size()); if (PWSz == VL.size()) { ReuseShuffleIndices.clear(); } else { @@ -6964,6 +7077,55 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } } + // Check if this is a duplicate of another entry. + if (TreeEntry *E = getTreeEntry(S.OpValue)) { + LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); + if (!E->isSame(VL)) { + auto It = MultiNodeScalars.find(S.OpValue); + if (It != MultiNodeScalars.end()) { + auto *TEIt = find_if(It->getSecond(), + [&](TreeEntry *ME) { return ME->isSame(VL); }); + if (TEIt != It->getSecond().end()) + E = *TEIt; + else + E = nullptr; + } else { + E = nullptr; + } + } + if (!E) { + if (!doesNotNeedToBeScheduled(S.OpValue)) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndices); + return; + } + SmallPtrSet Nodes; + Nodes.insert(getTreeEntry(S.OpValue)); + for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue)) + Nodes.insert(E); + SmallPtrSet Values(VL.begin(), VL.end()); + if (any_of(Nodes, [&](const TreeEntry *E) { + return all_of(E->Scalars, + [&](Value *V) { return Values.contains(V); }); + })) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndices); + return; + } + } else { + // Record the reuse of the tree node. FIXME, currently this is only used + // to properly draw the graph rather than for the actual vectorization. + E->UserTreeIndices.push_back(UserTreeIdx); + LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue + << ".\n"); + return; + } + } + // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of // a load), in which case peek through to include it in the tree, without // ballooning over-budget. @@ -7095,55 +7257,6 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // We now know that this is a vector of instructions of the same type from // the same block. - // Check if this is a duplicate of another entry. - if (TreeEntry *E = getTreeEntry(S.OpValue)) { - LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); - if (!E->isSame(VL)) { - auto It = MultiNodeScalars.find(S.OpValue); - if (It != MultiNodeScalars.end()) { - auto *TEIt = find_if(It->getSecond(), - [&](TreeEntry *ME) { return ME->isSame(VL); }); - if (TEIt != It->getSecond().end()) - E = *TEIt; - else - E = nullptr; - } else { - E = nullptr; - } - } - if (!E) { - if (!doesNotNeedToBeScheduled(S.OpValue)) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); - return; - } - SmallPtrSet Nodes; - Nodes.insert(getTreeEntry(S.OpValue)); - for (const TreeEntry *E : MultiNodeScalars.lookup(S.OpValue)) - Nodes.insert(E); - SmallPtrSet Values(VL.begin(), VL.end()); - if (any_of(Nodes, [&](const TreeEntry *E) { - return all_of(E->Scalars, - [&](Value *V) { return Values.contains(V); }); - })) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); - return; - } - } else { - // Record the reuse of the tree node. FIXME, currently this is only used - // to properly draw the graph rather than for the actual vectorization. - E->UserTreeIndices.push_back(UserTreeIdx); - LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue - << ".\n"); - return; - } - } - // Check that none of the instructions in the bundle are already in the tree. for (Value *V : VL) { if ((!IsScatterVectorizeUserTE && !isa(V)) || @@ -9141,7 +9254,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } assert(!CommonMask.empty() && "Expected non-empty common mask."); auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size()); - unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); + unsigned NumParts = TTI.getRegUsageForType(MaskVecTy); if (NumParts == 0 || NumParts >= Mask.size()) NumParts = 1; unsigned SliceSize = getPartNumElems(Mask.size(), NumParts); @@ -9158,7 +9271,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { } assert(!CommonMask.empty() && "Expected non-empty common mask."); auto *MaskVecTy = getWidenedType(ScalarTy, Mask.size()); - unsigned NumParts = TTI.getNumberOfParts(MaskVecTy); + unsigned NumParts = TTI.getRegUsageForType(MaskVecTy); if (NumParts == 0 || NumParts >= Mask.size()) NumParts = 1; unsigned SliceSize = getPartNumElems(Mask.size(), NumParts); @@ -9362,22 +9475,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { const BoUpSLP::TreeEntry *BoUpSLP::getOperandEntry(const TreeEntry *E, unsigned Idx) const { - Value *Op = E->getOperand(Idx).front(); - if (const TreeEntry *TE = getTreeEntry(Op)) { - if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) { - return EI.EdgeIdx == Idx && EI.UserTE == E; - }) != TE->UserTreeIndices.end()) - return TE; - auto MIt = MultiNodeScalars.find(Op); - if (MIt != MultiNodeScalars.end()) { - for (const TreeEntry *TE : MIt->second) { - if (find_if(TE->UserTreeIndices, [&](const EdgeInfo &EI) { - return EI.EdgeIdx == Idx && EI.UserTE == E; - }) != TE->UserTreeIndices.end()) - return TE; - } - } - } + if (const TreeEntry *VE = getMatchedVectorizedOperand(E, Idx)) + return VE; const auto *It = find_if(VectorizableTree, [&](const std::unique_ptr &TE) { return TE->isGather() && @@ -9678,7 +9777,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, unsigned const NumElts = SrcVecTy->getNumElements(); unsigned const NumScalars = VL.size(); - unsigned NumOfParts = TTI->getNumberOfParts(SrcVecTy); + unsigned NumOfParts = TTI->getRegUsageForType(SrcVecTy); SmallVector InsertMask(NumElts, PoisonMaskElem); unsigned OffsetBeg = *getElementIndex(VL.front()); @@ -10894,7 +10993,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { // Keep original scalar if number of externally used instructions in // the same entry is not power of 2. It may help to do some extra // vectorization for now. - KeepScalar = ScalarUsesCount <= 1 || !has_single_bit(ScalarUsesCount); + KeepScalar = + ScalarUsesCount <= 1 || + !hasFullVectorsOnly(*TTI, EU.Scalar->getType(), ScalarUsesCount); } if (KeepScalar) { ExternalUsesAsOriginalScalar.insert(EU.Scalar); @@ -11587,13 +11688,14 @@ BoUpSLP::isGatherShuffledEntry( if (TE == VectorizableTree.front().get()) return {}; // FIXME: Gathering for non-power-of-2 nodes not implemented yet. - if (TE->isNonPowOf2Vec()) + if (TE->isNonPowOf2Vec(*TTI)) return {}; Mask.assign(VL.size(), PoisonMaskElem); assert(TE->UserTreeIndices.size() == 1 && "Expected only single user of the gather node."); - assert(VL.size() % NumParts == 0 && - "Number of scalars must be divisible by NumParts."); + // Number of scalars must be divisible by NumParts. + if (VL.size() % NumParts != 0) + return {}; unsigned SliceSize = getPartNumElems(VL.size(), NumParts); SmallVector> Res; for (unsigned Part : seq(NumParts)) { @@ -12521,10 +12623,9 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis { } }; -Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, - bool PostponedPHIs) { - ValueList &VL = E->getOperand(NodeIdx); - const unsigned VF = VL.size(); +BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, + unsigned NodeIdx) { + ArrayRef VL = E->getOperand(NodeIdx); InstructionsState S = getSameOpcode(VL, *TLI); // Special processing for GEPs bundle, which may include non-gep values. if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) { @@ -12532,109 +12633,113 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, if (It != VL.end()) S = getSameOpcode(*It, *TLI); } - if (S.getOpcode()) { - auto CheckSameVE = [&](const TreeEntry *VE) { - return VE->isSame(VL) && - (any_of(VE->UserTreeIndices, - [E, NodeIdx](const EdgeInfo &EI) { - return EI.UserTE == E && EI.EdgeIdx == NodeIdx; - }) || - any_of(VectorizableTree, - [E, NodeIdx, VE](const std::unique_ptr &TE) { - return TE->isOperandGatherNode({E, NodeIdx}) && - VE->isSame(TE->Scalars); - })); + if (!S.getOpcode()) + return nullptr; + auto CheckSameVE = [&](const TreeEntry *VE) { + return VE->isSame(VL) && + (any_of(VE->UserTreeIndices, + [E, NodeIdx](const EdgeInfo &EI) { + return EI.UserTE == E && EI.EdgeIdx == NodeIdx; + }) || + any_of(VectorizableTree, + [E, NodeIdx, VE](const std::unique_ptr &TE) { + return TE->isOperandGatherNode( + {const_cast(E), NodeIdx}) && + VE->isSame(TE->Scalars); + })); + }; + TreeEntry *VE = getTreeEntry(S.OpValue); + if (VE && CheckSameVE(VE)) + return VE; + auto It = MultiNodeScalars.find(S.OpValue); + if (It != MultiNodeScalars.end()) { + auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) { + return TE != VE && CheckSameVE(TE); + }); + if (I != It->getSecond().end()) + return *I; + } + return nullptr; +} + +Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx, + bool PostponedPHIs) { + ValueList &VL = E->getOperand(NodeIdx); + const unsigned VF = VL.size(); + if (TreeEntry *VE = getMatchedVectorizedOperand(E, NodeIdx)) { + auto FinalShuffle = [&](Value *V, ArrayRef Mask) { + // V may be affected by MinBWs. + // We want ShuffleInstructionBuilder to correctly support REVEC. The key + // factor is the number of elements, not their type. + Type *ScalarTy = cast(V->getType())->getElementType(); + unsigned NumElements = getNumElements(VL.front()->getType()); + ShuffleInstructionBuilder ShuffleBuilder( + NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements) + : ScalarTy, + Builder, *this); + ShuffleBuilder.add(V, Mask); + SmallVector> SubVectors( + E->CombinedEntriesWithIndices.size()); + transform(E->CombinedEntriesWithIndices, SubVectors.begin(), + [&](const auto &P) { + return std::make_pair(VectorizableTree[P.first].get(), + P.second); + }); + return ShuffleBuilder.finalize(std::nullopt, SubVectors); }; - TreeEntry *VE = getTreeEntry(S.OpValue); - bool IsSameVE = VE && CheckSameVE(VE); - if (!IsSameVE) { - auto It = MultiNodeScalars.find(S.OpValue); - if (It != MultiNodeScalars.end()) { - auto *I = find_if(It->getSecond(), [&](const TreeEntry *TE) { - return TE != VE && CheckSameVE(TE); - }); - if (I != It->getSecond().end()) { - VE = *I; - IsSameVE = true; - } - } - } - if (IsSameVE) { - auto FinalShuffle = [&](Value *V, ArrayRef Mask) { - // V may be affected by MinBWs. - // We want ShuffleInstructionBuilder to correctly support REVEC. The key - // factor is the number of elements, not their type. - Type *ScalarTy = cast(V->getType())->getElementType(); - unsigned NumElements = getNumElements(VL.front()->getType()); - ShuffleInstructionBuilder ShuffleBuilder( - NumElements != 1 ? FixedVectorType::get(ScalarTy, NumElements) - : ScalarTy, - Builder, *this); - ShuffleBuilder.add(V, Mask); - SmallVector> SubVectors( - E->CombinedEntriesWithIndices.size()); - transform(E->CombinedEntriesWithIndices, SubVectors.begin(), - [&](const auto &P) { - return std::make_pair(VectorizableTree[P.first].get(), - P.second); - }); - return ShuffleBuilder.finalize(std::nullopt, SubVectors); - }; - Value *V = vectorizeTree(VE, PostponedPHIs); - if (VF * getNumElements(VL[0]->getType()) != - cast(V->getType())->getNumElements()) { - if (!VE->ReuseShuffleIndices.empty()) { - // Reshuffle to get only unique values. - // If some of the scalars are duplicated in the vectorization - // tree entry, we do not vectorize them but instead generate a - // mask for the reuses. But if there are several users of the - // same entry, they may have different vectorization factors. - // This is especially important for PHI nodes. In this case, we - // need to adapt the resulting instruction for the user - // vectorization factor and have to reshuffle it again to take - // only unique elements of the vector. Without this code the - // function incorrectly returns reduced vector instruction with - // the same elements, not with the unique ones. - - // block: - // %phi = phi <2 x > { .., %entry} {%shuffle, %block} - // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> - // ... (use %2) - // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} - // br %block - SmallVector Mask(VF, PoisonMaskElem); - for (auto [I, V] : enumerate(VL)) { - if (isa(V)) - continue; - Mask[I] = VE->findLaneForValue(V); - } - V = FinalShuffle(V, Mask); - } else { - assert(VF < cast(V->getType())->getNumElements() && - "Expected vectorization factor less " - "than original vector size."); - SmallVector UniformMask(VF, 0); - std::iota(UniformMask.begin(), UniformMask.end(), 0); - V = FinalShuffle(V, UniformMask); + Value *V = vectorizeTree(VE, PostponedPHIs); + if (VF * getNumElements(VL[0]->getType()) != + cast(V->getType())->getNumElements()) { + if (!VE->ReuseShuffleIndices.empty()) { + // Reshuffle to get only unique values. + // If some of the scalars are duplicated in the vectorization + // tree entry, we do not vectorize them but instead generate a + // mask for the reuses. But if there are several users of the + // same entry, they may have different vectorization factors. + // This is especially important for PHI nodes. In this case, we + // need to adapt the resulting instruction for the user + // vectorization factor and have to reshuffle it again to take + // only unique elements of the vector. Without this code the + // function incorrectly returns reduced vector instruction with + // the same elements, not with the unique ones. + + // block: + // %phi = phi <2 x > { .., %entry} {%shuffle, %block} + // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0> + // ... (use %2) + // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0} + // br %block + SmallVector Mask(VF, PoisonMaskElem); + for (auto [I, V] : enumerate(VL)) { + if (isa(V)) + continue; + Mask[I] = VE->findLaneForValue(V); } - } - // Need to update the operand gather node, if actually the operand is not a - // vectorized node, but the buildvector/gather node, which matches one of - // the vectorized nodes. - if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) { - return EI.UserTE == E && EI.EdgeIdx == NodeIdx; - }) == VE->UserTreeIndices.end()) { - auto *It = find_if( - VectorizableTree, [&](const std::unique_ptr &TE) { - return TE->isGather() && - TE->UserTreeIndices.front().UserTE == E && - TE->UserTreeIndices.front().EdgeIdx == NodeIdx; - }); - assert(It != VectorizableTree.end() && "Expected gather node operand."); - (*It)->VectorizedValue = V; - } - return V; + V = FinalShuffle(V, Mask); + } else { + assert(VF < cast(V->getType())->getNumElements() && + "Expected vectorization factor less " + "than original vector size."); + SmallVector UniformMask(VF, 0); + std::iota(UniformMask.begin(), UniformMask.end(), 0); + V = FinalShuffle(V, UniformMask); + } + } + // Need to update the operand gather node, if actually the operand is not a + // vectorized node, but the buildvector/gather node, which matches one of + // the vectorized nodes. + if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) { + return EI.UserTE == E && EI.EdgeIdx == NodeIdx; + }) == VE->UserTreeIndices.end()) { + auto *It = + find_if(VectorizableTree, [&](const std::unique_ptr &TE) { + return TE->isGather() && TE->UserTreeIndices.front().UserTE == E && + TE->UserTreeIndices.front().EdgeIdx == NodeIdx; + }); + assert(It != VectorizableTree.end() && "Expected gather node operand."); + (*It)->VectorizedValue = V; } + return V; } // Find the corresponding gather entry and vectorize it. @@ -12729,7 +12834,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, SmallVector> Entries; Type *OrigScalarTy = GatheredScalars.front()->getType(); auto *VecTy = getWidenedType(ScalarTy, GatheredScalars.size()); - unsigned NumParts = TTI->getNumberOfParts(VecTy); + unsigned NumParts = TTI->getRegUsageForType(VecTy); if (NumParts == 0 || NumParts >= GatheredScalars.size()) NumParts = 1; if (!all_of(GatheredScalars, IsaPred)) { @@ -13137,7 +13242,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } bool IsReverseOrder = isReverseOrder(E->ReorderIndices); - auto FinalShuffle = [&](Value *V, const TreeEntry *E, VectorType *VecTy) { + auto FinalShuffle = [&](Value *V, const TreeEntry *E) { ShuffleInstructionBuilder ShuffleBuilder(ScalarTy, Builder, *this); if (E->getOpcode() == Instruction::Store && E->State == TreeEntry::Vectorize) { @@ -13197,7 +13302,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { PH->getParent()->getFirstInsertionPt()); Builder.SetCurrentDebugLocation(PH->getDebugLoc()); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; if (PostponedPHIs) @@ -13249,7 +13354,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (const TreeEntry *TE = getTreeEntry(V)) V = TE->VectorizedValue; setInsertPointAfterBundle(E); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; return V; } @@ -13259,7 +13364,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *Ptr = LI->getPointerOperand(); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); Value *NewV = propagateMetadata(V, E->Scalars); - NewV = FinalShuffle(NewV, E, VecTy); + NewV = FinalShuffle(NewV, E); E->VectorizedValue = NewV; return NewV; } @@ -13474,7 +13579,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = (VecOpcode != ShuffleOrOp && VecOpcode == Instruction::BitCast) ? InVec : Builder.CreateCast(VecOpcode, InVec, VecTy); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13518,7 +13623,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { propagateIRFlags(V, E->Scalars, VL0); // Do not cast for cmps. VecTy = cast(V->getType()); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13571,7 +13676,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { assert(getNumElements(Cond->getType()) == TrueNumElements && "Cannot vectorize Instruction::Select"); Value *V = Builder.CreateSelect(Cond, True, False); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13593,7 +13698,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13611,7 +13716,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } Value *V = Builder.CreateFreeze(Op); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13655,7 +13760,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { auto *CI = dyn_cast(Op); return CI && CI->getValue().countr_one() >= It->second.first; })) { - V = FinalShuffle(I == 0 ? RHS : LHS, E, VecTy); + V = FinalShuffle(I == 0 ? RHS : LHS, E); E->VectorizedValue = V; ++NumVectorInstructions; return V; @@ -13688,7 +13793,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { I->setHasNoUnsignedWrap(/*b=*/false); } - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13780,7 +13885,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { } Value *V = propagateMetadata(NewLI, E->Scalars); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; return V; @@ -13794,7 +13899,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (VecValue->getType() != VecTy) VecValue = Builder.CreateIntCast(VecValue, VecTy, GetOperandSignedness(0)); - VecValue = FinalShuffle(VecValue, E, VecTy); + VecValue = FinalShuffle(VecValue, E); Value *Ptr = SI->getPointerOperand(); Instruction *ST; @@ -13859,7 +13964,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { V = propagateMetadata(I, GEPs); } - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -13941,7 +14046,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = Builder.CreateCall(CF, OpVecs, OpBundles); propagateIRFlags(V, E->Scalars, VL0); - V = FinalShuffle(V, E, VecTy); + V = FinalShuffle(V, E); E->VectorizedValue = V; ++NumVectorInstructions; @@ -14039,6 +14144,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { "Expected same type as operand."); if (auto *I = dyn_cast(LHS)) LHS = propagateMetadata(I, E->Scalars); + LHS = FinalShuffle(LHS, E); E->VectorizedValue = LHS; ++NumVectorInstructions; return LHS; @@ -15974,7 +16080,7 @@ void BoUpSLP::computeMinimumValueSizes() { [&](Value *V) { return AnalyzedMinBWVals.contains(V); })) return 0u; - unsigned NumParts = TTI->getNumberOfParts( + unsigned NumParts = TTI->getRegUsageForType( getWidenedType(TreeRootIT, VF * ScalarTyNumElements)); // The maximum bit width required to represent all the values that can be @@ -16031,7 +16137,7 @@ void BoUpSLP::computeMinimumValueSizes() { // use - ignore it. if (NumParts > 1 && NumParts == - TTI->getNumberOfParts(getWidenedType( + TTI->getRegUsageForType(getWidenedType( IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF))) return 0u; @@ -16892,7 +16998,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, for (unsigned I = NextInst; I < MaxInst; ++I) { unsigned ActualVF = std::min(MaxInst - I, VF); - if (!has_single_bit(ActualVF)) + if (!hasFullVectorsOnly(*TTI, ScalarTy, ActualVF)) continue; if (MaxVFOnly && ActualVF < MaxVF) @@ -19148,7 +19254,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Undefs come last. assert(U1 && U2 && "The only thing left should be undef & undef."); - continue; } return false; }; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f84317ba51257a..c9cee652d2d326 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1802,18 +1802,18 @@ void VPReductionRecipe::execute(VPTransformState &State) { (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), PrevInChain, NewVecOp); PrevInChain = NewRed; + NextInChain = NewRed; } else { PrevInChain = State.get(getChainOp(), Part, /*IsScalar*/ true); NewRed = createTargetReduction(State.Builder, RdxDesc, NewVecOp); + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) + NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), + NewRed, PrevInChain); + else + NextInChain = State.Builder.CreateBinOp( + (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, + PrevInChain); } - if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { - NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(), - NewRed, PrevInChain); - } else if (IsOrdered) - NextInChain = NewRed; - else - NextInChain = State.Builder.CreateBinOp( - (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), NewRed, PrevInChain); State.set(this, NextInChain, Part, /*IsScalar*/ true); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index ee7c7cea0b7670..9796ee64f6ef90 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -878,6 +878,17 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { // value with the others blended into it. unsigned StartIndex = 0; + for (unsigned I = 0; I != Blend->getNumIncomingValues(); ++I) { + // If a value's mask is used only by the blend then is can be deadcoded. + // TODO: Find the most expensive mask that can be deadcoded, or a mask + // that's used by multiple blends where it can be removed from them all. + VPValue *Mask = Blend->getMask(I); + if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) { + StartIndex = I; + break; + } + } + SmallVector OperandsWithMask; OperandsWithMask.push_back(Blend->getIncomingValue(StartIndex)); @@ -956,6 +967,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) && X == X1 && Y == Y1) { R.getVPSingleValue()->replaceAllUsesWith(X); + R.eraseFromParent(); return; } diff --git a/llvm/test/Analysis/CostModel/RISCV/cast-half.ll b/llvm/test/Analysis/CostModel/RISCV/cast-half.ll new file mode 100644 index 00000000000000..e20d24c27eb8b4 --- /dev/null +++ b/llvm/test/Analysis/CostModel/RISCV/cast-half.ll @@ -0,0 +1,1469 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefixes=RV32ZVFH +; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfhmin -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefixes=RV32ZVFHMIN +; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefixes=RV64ZVFH +; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin -passes="print" -cost-kind=throughput 2>&1 -disable-output | FileCheck %s -check-prefixes=RV64ZVFHMIN + +define void @fptosi() { +; RV32ZVFH-LABEL: 'fptosi' +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64f16_nxv64i64 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV32ZVFHMIN-LABEL: 'fptosi' +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64f16_nxv64i64 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFH-LABEL: 'fptosi' +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64i64 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFHMIN-LABEL: 'fptosi' +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64i64 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> + %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> + %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> + %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> + %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> + %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> + %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> + %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> + %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> + %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> + %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> + %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> + %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> + %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> + %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> + %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> + %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> + %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> + %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> + %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> + %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> + %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> + %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> + %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> + %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> + %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> + %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> + %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> + %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> + %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> + %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> + %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> + %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> + %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> + %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> + %nxv1f16_nxv1i8 = fptosi undef to + %nxv1f16_nxv1i16 = fptosi undef to + %nxv1f16_nxv1i32 = fptosi undef to + %nxv1f16_nxv1i64 = fptosi undef to + %nxv1f16_nxv1i1 = fptosi undef to + %nxv2f16_nxv2i8 = fptosi undef to + %nxv2f16_nxv2i16 = fptosi undef to + %nxv2f16_nxv2i32 = fptosi undef to + %nxv2f16_nxv2i64 = fptosi undef to + %nxv2f16_nxv2i1 = fptosi undef to + %nxv4f16_nxv4i8 = fptosi undef to + %nxv4f16_nxv4i16 = fptosi undef to + %nxv4f16_nxv4i32 = fptosi undef to + %nxv4f16_nxv4i64 = fptosi undef to + %nxv4f16_nxv4i1 = fptosi undef to + %nxv8f16_nxv8i8 = fptosi undef to + %nxv8f16_nxv8i16 = fptosi undef to + %nxv8f16_nxv8i32 = fptosi undef to + %nxv8f16_nxv8i64 = fptosi undef to + %nxv8f16_nxv8i1 = fptosi undef to + %nxv16f16_nxv16i8 = fptosi undef to + %nxv16f16_nxv16i16 = fptosi undef to + %nxv16f16_nxv16i32 = fptosi undef to + %nxv16f16_nxv16i64 = fptosi undef to + %nxv16f16_nxv16i1 = fptosi undef to + %nxv32f16_nxv32i8 = fptosi undef to + %nxv32f16_nxv32i16 = fptosi undef to + %nxv32f16_nxv32i32 = fptosi undef to + %nxv32f16_nxv32i64 = fptosi undef to + %nxv32f16_nxv32i1 = fptosi undef to + %nxv64f16_nxv64i8 = fptosi undef to + %nxv64f16_nxv64i16 = fptosi undef to + %nxv64f16_nxv64i32 = fptosi undef to + %nxv64f16_nxv64i64 = fptosi undef to + %nxv64f16_nxv64i1 = fptosi undef to + ret void +} + +define void @fptoui() { +; RV32ZVFH-LABEL: 'fptoui' +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64f16_nxv64i64 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV32ZVFHMIN-LABEL: 'fptoui' +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64f16_nxv64i64 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFH-LABEL: 'fptoui' +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64i64 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFHMIN-LABEL: 'fptoui' +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64i64 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> + %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> + %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> + %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> + %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> + %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> + %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> + %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> + %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> + %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> + %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> + %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> + %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> + %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> + %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> + %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> + %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> + %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> + %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> + %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> + %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> + %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> + %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> + %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> + %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> + %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> + %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> + %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> + %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> + %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> + %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> + %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> + %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> + %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> + %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> + %nxv1f16_nxv1i8 = fptoui undef to + %nxv1f16_nxv1i16 = fptoui undef to + %nxv1f16_nxv1i32 = fptoui undef to + %nxv1f16_nxv1i64 = fptoui undef to + %nxv1f16_nxv1i1 = fptoui undef to + %nxv2f16_nxv2i8 = fptoui undef to + %nxv2f16_nxv2i16 = fptoui undef to + %nxv2f16_nxv2i32 = fptoui undef to + %nxv2f16_nxv2i64 = fptoui undef to + %nxv2f16_nxv2i1 = fptoui undef to + %nxv4f16_nxv4i8 = fptoui undef to + %nxv4f16_nxv4i16 = fptoui undef to + %nxv4f16_nxv4i32 = fptoui undef to + %nxv4f16_nxv4i64 = fptoui undef to + %nxv4f16_nxv4i1 = fptoui undef to + %nxv8f16_nxv8i8 = fptoui undef to + %nxv8f16_nxv8i16 = fptoui undef to + %nxv8f16_nxv8i32 = fptoui undef to + %nxv8f16_nxv8i64 = fptoui undef to + %nxv8f16_nxv8i1 = fptoui undef to + %nxv16f16_nxv16i8 = fptoui undef to + %nxv16f16_nxv16i16 = fptoui undef to + %nxv16f16_nxv16i32 = fptoui undef to + %nxv16f16_nxv16i64 = fptoui undef to + %nxv16f16_nxv16i1 = fptoui undef to + %nxv32f16_nxv32i8 = fptoui undef to + %nxv32f16_nxv32i16 = fptoui undef to + %nxv32f16_nxv32i32 = fptoui undef to + %nxv32f16_nxv32i64 = fptoui undef to + %nxv32f16_nxv32i1 = fptoui undef to + %nxv64f16_nxv64i8 = fptoui undef to + %nxv64f16_nxv64i16 = fptoui undef to + %nxv64f16_nxv64i32 = fptoui undef to + %nxv64f16_nxv64i64 = fptoui undef to + %nxv64f16_nxv64i1 = fptoui undef to + ret void +} + +define void @sitofp() { +; RV32ZVFH-LABEL: 'sitofp' +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV32ZVFHMIN-LABEL: 'sitofp' +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFH-LABEL: 'sitofp' +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFHMIN-LABEL: 'sitofp' +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> + %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> + %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> + %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> + %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> + %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> + %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> + %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> + %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> + %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> + %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> + %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> + %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> + %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> + %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> + %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> + %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> + %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> + %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> + %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> + %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> + %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> + %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> + %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> + %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> + %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> + %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> + %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> + %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> + %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> + %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> + %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> + %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> + %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> + %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> + %nxv1i8_nxv1f16 = sitofp undef to + %nxv1i16_nxv1f16 = sitofp undef to + %nxv1i32_nxv1f16 = sitofp undef to + %nxv1i64_nxv1f16 = sitofp undef to + %nxv1i1_nxv1f16 = sitofp undef to + %nxv2i8_nxv2f16 = sitofp undef to + %nxv2i16_nxv2f16 = sitofp undef to + %nxv2i32_nxv2f16 = sitofp undef to + %nxv2i64_nxv2f16 = sitofp undef to + %nxv2i1_nxv2f16 = sitofp undef to + %nxv4i8_nxv4f16 = sitofp undef to + %nxv4i16_nxv4f16 = sitofp undef to + %nxv4i32_nxv4f16 = sitofp undef to + %nxv4i64_nxv4f16 = sitofp undef to + %nxv4i1_nxv4f16 = sitofp undef to + %nxv8i8_nxv8f16 = sitofp undef to + %nxv8i16_nxv8f16 = sitofp undef to + %nxv8i32_nxv8f16 = sitofp undef to + %nxv8i64_nxv8f16 = sitofp undef to + %nxv8i1_nxv8f16 = sitofp undef to + %nxv16i8_nxv16f16 = sitofp undef to + %nxv16i16_nxv16f16 = sitofp undef to + %nxv16i32_nxv16f16 = sitofp undef to + %nxv16i64_nxv16f16 = sitofp undef to + %nxv16i1_nxv16f16 = sitofp undef to + %nxv32i8_nxv32f16 = sitofp undef to + %nxv32i16_nxv32f16 = sitofp undef to + %nxv32i32_nxv32f16 = sitofp undef to + %nxv32i64_nxv32f16 = sitofp undef to + %nxv32i1_nxv32f16 = sitofp undef to + %nxv64i8_nxv64f16 = sitofp undef to + %nxv64i16_nxv64f16 = sitofp undef to + %nxv64i32_nxv64f16 = sitofp undef to + %nxv64i64_nxv64f16 = sitofp undef to + %nxv64i1_nxv64f16 = sitofp undef to + ret void +} + +define void @uitofp() { +; RV32ZVFH-LABEL: 'uitofp' +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp undef to +; RV32ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV32ZVFHMIN-LABEL: 'uitofp' +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp undef to +; RV32ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFH-LABEL: 'uitofp' +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp undef to +; RV64ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; RV64ZVFHMIN-LABEL: 'uitofp' +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp undef to +; RV64ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> + %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> + %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> + %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> + %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> + %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> + %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> + %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> + %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> + %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> + %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> + %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> + %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> + %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> + %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> + %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> + %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> + %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> + %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> + %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> + %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> + %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> + %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> + %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> + %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> + %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> + %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> + %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> + %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> + %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> + %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> + %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> + %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> + %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> + %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> + %nxv1i8_nxv1f16 = uitofp undef to + %nxv1i16_nxv1f16 = uitofp undef to + %nxv1i32_nxv1f16 = uitofp undef to + %nxv1i64_nxv1f16 = uitofp undef to + %nxv1i1_nxv1f16 = uitofp undef to + %nxv2i8_nxv2f16 = uitofp undef to + %nxv2i16_nxv2f16 = uitofp undef to + %nxv2i32_nxv2f16 = uitofp undef to + %nxv2i64_nxv2f16 = uitofp undef to + %nxv2i1_nxv2f16 = uitofp undef to + %nxv4i8_nxv4f16 = uitofp undef to + %nxv4i16_nxv4f16 = uitofp undef to + %nxv4i32_nxv4f16 = uitofp undef to + %nxv4i64_nxv4f16 = uitofp undef to + %nxv4i1_nxv4f16 = uitofp undef to + %nxv8i8_nxv8f16 = uitofp undef to + %nxv8i16_nxv8f16 = uitofp undef to + %nxv8i32_nxv8f16 = uitofp undef to + %nxv8i64_nxv8f16 = uitofp undef to + %nxv8i1_nxv8f16 = uitofp undef to + %nxv16i8_nxv16f16 = uitofp undef to + %nxv16i16_nxv16f16 = uitofp undef to + %nxv16i32_nxv16f16 = uitofp undef to + %nxv16i64_nxv16f16 = uitofp undef to + %nxv16i1_nxv16f16 = uitofp undef to + %nxv32i8_nxv32f16 = uitofp undef to + %nxv32i16_nxv32f16 = uitofp undef to + %nxv32i32_nxv32f16 = uitofp undef to + %nxv32i64_nxv32f16 = uitofp undef to + %nxv32i1_nxv32f16 = uitofp undef to + %nxv64i8_nxv64f16 = uitofp undef to + %nxv64i16_nxv64f16 = uitofp undef to + %nxv64i32_nxv64f16 = uitofp undef to + %nxv64i64_nxv64f16 = uitofp undef to + %nxv64i1_nxv64f16 = uitofp undef to + ret void +} diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll index e90fab9fbc8c46..ccc9101e7b0cdd 100644 --- a/llvm/test/Analysis/CostModel/RISCV/cast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll @@ -1718,652 +1718,442 @@ define void @fptrunc() { define void @fptosi() { ; RV32-LABEL: 'fptosi' -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptosi <2 x float> undef to <2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptosi <2 x double> undef to <2 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptosi <2 x float> undef to <2 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptosi <2 x double> undef to <2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptosi <2 x float> undef to <2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptosi <2 x double> undef to <2 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptosi <4 x float> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptosi <4 x double> undef to <4 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f32_v64i64 = fptosi <64 x float> undef to <64 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptosi <64 x double> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f32_v128i16 = fptosi <128 x float> undef to <128 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f64_v128i16 = fptosi <128 x double> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptosi <128 x float> undef to <128 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f64_v128i32 = fptosi <128 x double> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f32_v128i64 = fptosi <128 x float> undef to <128 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptosi <128 x double> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_nxv8i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f64_nxv16i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_nxv16i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_nxv32i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f64_nxv32i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f64_nxv32i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f32_nxv32i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f32_nxv64i16 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f64_nxv64i16 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f64_nxv64i32 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64f16_nxv64i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64f32_nxv64i64 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv64f64_nxv64i64 = fptosi undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptosi undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; RV64-LABEL: 'fptosi' -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptosi <2 x float> undef to <2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptosi <2 x double> undef to <2 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptosi <2 x float> undef to <2 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptosi <2 x double> undef to <2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptosi <2 x float> undef to <2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptosi <2 x double> undef to <2 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptosi <4 x float> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptosi <4 x double> undef to <4 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f32_v64i64 = fptosi <64 x float> undef to <64 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptosi <64 x double> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f32_v128i16 = fptosi <128 x float> undef to <128 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f64_v128i16 = fptosi <128 x double> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptosi <128 x float> undef to <128 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f64_v128i32 = fptosi <128 x double> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f32_v128i64 = fptosi <128 x float> undef to <128 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptosi <128 x double> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_nxv8i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f64_nxv16i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_nxv16i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_nxv32i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f64_nxv32i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f64_nxv32i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f32_nxv32i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f32_nxv64i16 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f64_nxv64i16 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f64_nxv64i32 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f32_nxv64i64 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv64f64_nxv64i64 = fptosi undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptosi undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %v2f16_v2i8 = fptosi <2 x half> undef to <2 x i8> %v2f32_v2i8 = fptosi <2 x float> undef to <2 x i8> %v2f64_v2i8 = fptosi <2 x double> undef to <2 x i8> - %v2f16_v2i16 = fptosi <2 x half> undef to <2 x i16> %v2f32_v2i16 = fptosi <2 x float> undef to <2 x i16> %v2f64_v2i16 = fptosi <2 x double> undef to <2 x i16> - %v2f16_v2i32 = fptosi <2 x half> undef to <2 x i32> %v2f32_v2i32 = fptosi <2 x float> undef to <2 x i32> %v2f64_v2i32 = fptosi <2 x double> undef to <2 x i32> - %v2f16_v2i64 = fptosi <2 x half> undef to <2 x i64> %v2f32_v2i64 = fptosi <2 x float> undef to <2 x i64> %v2f64_v2i64 = fptosi <2 x double> undef to <2 x i64> - %v2f16_v2i1 = fptosi <2 x half> undef to <2 x i1> %v2f32_v2i1 = fptosi <2 x float> undef to <2 x i1> %v2f64_v2i1 = fptosi <2 x double> undef to <2 x i1> - %v4f16_v4i8 = fptosi <4 x half> undef to <4 x i8> %v4f32_v4i8 = fptosi <4 x float> undef to <4 x i8> %v4f64_v4i8 = fptosi <4 x double> undef to <4 x i8> - %v4f16_v4i16 = fptosi <4 x half> undef to <4 x i16> %v4f32_v4i16 = fptosi <4 x float> undef to <4 x i16> %v4f64_v4i16 = fptosi <4 x double> undef to <4 x i16> - %v4f16_v4i32 = fptosi <4 x half> undef to <4 x i32> %v4f32_v4i32 = fptosi <4 x float> undef to <4 x i32> %v4f64_v4i32 = fptosi <4 x double> undef to <4 x i32> - %v4f16_v4i64 = fptosi <4 x half> undef to <4 x i64> %v4f32_v4i64 = fptosi <4 x float> undef to <4 x i64> %v4f64_v4i64 = fptosi <4 x double> undef to <4 x i64> - %v4f16_v4i1 = fptosi <4 x half> undef to <4 x i1> %v4f32_v4i1 = fptosi <4 x float> undef to <4 x i1> %v4f64_v4i1 = fptosi <4 x double> undef to <4 x i1> - %v8f16_v8i8 = fptosi <8 x half> undef to <8 x i8> %v8f32_v8i8 = fptosi <8 x float> undef to <8 x i8> %v8f64_v8i8 = fptosi <8 x double> undef to <8 x i8> - %v8f16_v8i16 = fptosi <8 x half> undef to <8 x i16> %v8f32_v8i16 = fptosi <8 x float> undef to <8 x i16> %v8f64_v8i16 = fptosi <8 x double> undef to <8 x i16> - %v8f16_v8i32 = fptosi <8 x half> undef to <8 x i32> %v8f32_v8i32 = fptosi <8 x float> undef to <8 x i32> %v8f64_v8i32 = fptosi <8 x double> undef to <8 x i32> - %v8f16_v8i64 = fptosi <8 x half> undef to <8 x i64> %v8f32_v8i64 = fptosi <8 x float> undef to <8 x i64> %v8f64_v8i64 = fptosi <8 x double> undef to <8 x i64> - %v8f16_v8i1 = fptosi <8 x half> undef to <8 x i1> %v8f32_v8i1 = fptosi <8 x float> undef to <8 x i1> %v8f64_v8i1 = fptosi <8 x double> undef to <8 x i1> - %v16f16_v16i8 = fptosi <16 x half> undef to <16 x i8> %v16f32_v16i8 = fptosi <16 x float> undef to <16 x i8> %v16f64_v16i8 = fptosi <16 x double> undef to <16 x i8> - %v16f16_v16i16 = fptosi <16 x half> undef to <16 x i16> %v16f32_v16i16 = fptosi <16 x float> undef to <16 x i16> %v16f64_v16i16 = fptosi <16 x double> undef to <16 x i16> - %v16f16_v16i32 = fptosi <16 x half> undef to <16 x i32> %v16f32_v16i32 = fptosi <16 x float> undef to <16 x i32> %v16f64_v16i32 = fptosi <16 x double> undef to <16 x i32> - %v16f16_v16i64 = fptosi <16 x half> undef to <16 x i64> %v16f32_v16i64 = fptosi <16 x float> undef to <16 x i64> %v16f64_v16i64 = fptosi <16 x double> undef to <16 x i64> - %v16f16_v16i1 = fptosi <16 x half> undef to <16 x i1> %v16f32_v16i1 = fptosi <16 x float> undef to <16 x i1> %v16f64_v16i1 = fptosi <16 x double> undef to <16 x i1> - %v32f16_v32i8 = fptosi <32 x half> undef to <32 x i8> %v32f32_v32i8 = fptosi <32 x float> undef to <32 x i8> %v32f64_v32i8 = fptosi <32 x double> undef to <32 x i8> - %v32f16_v32i16 = fptosi <32 x half> undef to <32 x i16> %v32f32_v32i16 = fptosi <32 x float> undef to <32 x i16> %v32f64_v32i16 = fptosi <32 x double> undef to <32 x i16> - %v32f16_v32i32 = fptosi <32 x half> undef to <32 x i32> %v32f32_v32i32 = fptosi <32 x float> undef to <32 x i32> %v32f64_v32i32 = fptosi <32 x double> undef to <32 x i32> - %v32f16_v32i64 = fptosi <32 x half> undef to <32 x i64> %v32f32_v32i64 = fptosi <32 x float> undef to <32 x i64> %v32f64_v32i64 = fptosi <32 x double> undef to <32 x i64> - %v32f16_v32i1 = fptosi <32 x half> undef to <32 x i1> %v32f32_v32i1 = fptosi <32 x float> undef to <32 x i1> %v32f64_v32i1 = fptosi <32 x double> undef to <32 x i1> - %v64f16_v64i8 = fptosi <64 x half> undef to <64 x i8> %v64f32_v64i8 = fptosi <64 x float> undef to <64 x i8> %v64f64_v64i8 = fptosi <64 x double> undef to <64 x i8> - %v64f16_v64i16 = fptosi <64 x half> undef to <64 x i16> %v64f32_v64i16 = fptosi <64 x float> undef to <64 x i16> %v64f64_v64i16 = fptosi <64 x double> undef to <64 x i16> - %v64f16_v64i32 = fptosi <64 x half> undef to <64 x i32> %v64f32_v64i32 = fptosi <64 x float> undef to <64 x i32> %v64f64_v64i32 = fptosi <64 x double> undef to <64 x i32> - %v64f16_v64i64 = fptosi <64 x half> undef to <64 x i64> %v64f32_v64i64 = fptosi <64 x float> undef to <64 x i64> %v64f64_v64i64 = fptosi <64 x double> undef to <64 x i64> - %v64f16_v64i1 = fptosi <64 x half> undef to <64 x i1> %v64f32_v64i1 = fptosi <64 x float> undef to <64 x i1> %v64f64_v64i1 = fptosi <64 x double> undef to <64 x i1> - %v128f16_v128i8 = fptosi <128 x half> undef to <128 x i8> %v128f32_v128i8 = fptosi <128 x float> undef to <128 x i8> %v128f64_v128i8 = fptosi <128 x double> undef to <128 x i8> - %v128f16_v128i16 = fptosi <128 x half> undef to <128 x i16> %v128f32_v128i16 = fptosi <128 x float> undef to <128 x i16> %v128f64_v128i16 = fptosi <128 x double> undef to <128 x i16> - %v128f16_v128i32 = fptosi <128 x half> undef to <128 x i32> %v128f32_v128i32 = fptosi <128 x float> undef to <128 x i32> %v128f64_v128i32 = fptosi <128 x double> undef to <128 x i32> - %v128f16_v128i64 = fptosi <128 x half> undef to <128 x i64> %v128f32_v128i64 = fptosi <128 x float> undef to <128 x i64> %v128f64_v128i64 = fptosi <128 x double> undef to <128 x i64> - %v128f16_v128i1 = fptosi <128 x half> undef to <128 x i1> %v128f32_v128i1 = fptosi <128 x float> undef to <128 x i1> %v128f64_v128i1 = fptosi <128 x double> undef to <128 x i1> - %nxv1f16_nxv1i8 = fptosi undef to %nxv1f32_nxv1i8 = fptosi undef to %nxv1f64_nxv1i8 = fptosi undef to - %nxv1f16_nxv1i16 = fptosi undef to %nxv1f32_nxv1i16 = fptosi undef to %nxv1f64_nxv1i16 = fptosi undef to - %nxv1f16_nxv1i32 = fptosi undef to %nxv1f32_nxv1i32 = fptosi undef to %nxv1f64_nxv1i32 = fptosi undef to - %nxv1f16_nxv1i64 = fptosi undef to %nxv1f32_nxv1i64 = fptosi undef to %nxv1f64_nxv1i64 = fptosi undef to - %nxv1f16_nxv1i1 = fptosi undef to %nxv1f32_nxv1i1 = fptosi undef to %nxv1f64_nxv1i1 = fptosi undef to - %nxv2f16_nxv2i8 = fptosi undef to %nxv2f32_nxv2i8 = fptosi undef to %nxv2f64_nxv2i8 = fptosi undef to - %nxv2f16_nxv2i16 = fptosi undef to %nxv2f32_nxv2i16 = fptosi undef to %nxv2f64_nxv2i16 = fptosi undef to - %nxv2f16_nxv2i32 = fptosi undef to %nxv2f32_nxv2i32 = fptosi undef to %nxv2f64_nxv2i32 = fptosi undef to - %nxv2f16_nxv2i64 = fptosi undef to %nxv2f32_nxv2i64 = fptosi undef to %nxv2f64_nxv2i64 = fptosi undef to - %nxv2f16_nxv2i1 = fptosi undef to %nxv2f32_nxv2i1 = fptosi undef to %nxv2f64_nxv2i1 = fptosi undef to - %nxv4f16_nxv4i8 = fptosi undef to %nxv4f32_nxv4i8 = fptosi undef to %nxv4f64_nxv4i8 = fptosi undef to - %nxv4f16_nxv4i16 = fptosi undef to %nxv4f32_nxv4i16 = fptosi undef to %nxv4f64_nxv4i16 = fptosi undef to - %nxv4f16_nxv4i32 = fptosi undef to %nxv4f32_nxv4i32 = fptosi undef to %nxv4f64_nxv4i32 = fptosi undef to - %nxv4f16_nxv4i64 = fptosi undef to %nxv4f32_nxv4i64 = fptosi undef to %nxv4f64_nxv4i64 = fptosi undef to - %nxv4f16_nxv4i1 = fptosi undef to %nxv4f32_nxv4i1 = fptosi undef to %nxv4f64_nxv4i1 = fptosi undef to - %nxv8f16_nxv8i8 = fptosi undef to %nxv8f32_nxv8i8 = fptosi undef to %nxv8f64_nxv8i8 = fptosi undef to - %nxv8f16_nxv8i16 = fptosi undef to %nxv8f32_nxv8i16 = fptosi undef to %nxv8f64_nxv8i16 = fptosi undef to - %nxv8f16_nxv8i32 = fptosi undef to %nxv8f32_nxv8i32 = fptosi undef to %nxv8f64_nxv8i32 = fptosi undef to - %nxv8f16_nxv8i64 = fptosi undef to %nxv8f32_nxv8i64 = fptosi undef to %nxv8f64_nxv8i64 = fptosi undef to - %nxv8f16_nxv8i1 = fptosi undef to %nxv8f32_nxv8i1 = fptosi undef to %nxv8f64_nxv8i1 = fptosi undef to - %nxv16f16_nxv16i8 = fptosi undef to %nxv16f32_nxv16i8 = fptosi undef to %nxv16f64_nxv16i8 = fptosi undef to - %nxv16f16_nxv16i16 = fptosi undef to %nxv16f32_nxv16i16 = fptosi undef to %nxv16f64_nxv16i16 = fptosi undef to - %nxv16f16_nxv16i32 = fptosi undef to %nxv16f32_nxv16i32 = fptosi undef to %nxv16f64_nxv16i32 = fptosi undef to - %nxv16f16_nxv16i64 = fptosi undef to %nxv16f32_nxv16i64 = fptosi undef to %nxv16f64_nxv16i64 = fptosi undef to - %nxv16f16_nxv16i1 = fptosi undef to %nxv16f32_nxv16i1 = fptosi undef to %nxv16f64_nxv16i1 = fptosi undef to - %nxv32f16_nxv32i8 = fptosi undef to %nxv32f32_nxv32i8 = fptosi undef to %nxv32f64_nxv32i8 = fptosi undef to - %nxv32f16_nxv32i16 = fptosi undef to %nxv32f32_nxv32i16 = fptosi undef to %nxv32f64_nxv32i16 = fptosi undef to - %nxv32f16_nxv32i32 = fptosi undef to %nxv32f32_nxv32i32 = fptosi undef to %nxv32f64_nxv32i32 = fptosi undef to - %nxv32f16_nxv32i64 = fptosi undef to %nxv32f32_nxv32i64 = fptosi undef to %nxv32f64_nxv32i64 = fptosi undef to - %nxv32f16_nxv32i1 = fptosi undef to %nxv32f32_nxv32i1 = fptosi undef to %nxv32f64_nxv32i1 = fptosi undef to - %nxv64f16_nxv64i8 = fptosi undef to %nxv64f32_nxv64i8 = fptosi undef to %nxv64f64_nxv64i8 = fptosi undef to - %nxv64f16_nxv64i16 = fptosi undef to %nxv64f32_nxv64i16 = fptosi undef to %nxv64f64_nxv64i16 = fptosi undef to - %nxv64f16_nxv64i32 = fptosi undef to %nxv64f32_nxv64i32 = fptosi undef to %nxv64f64_nxv64i32 = fptosi undef to - %nxv64f16_nxv64i64 = fptosi undef to %nxv64f32_nxv64i64 = fptosi undef to %nxv64f64_nxv64i64 = fptosi undef to - %nxv64f16_nxv64i1 = fptosi undef to %nxv64f32_nxv64i1 = fptosi undef to %nxv64f64_nxv64i1 = fptosi undef to @@ -2372,652 +2162,442 @@ define void @fptosi() { define void @fptoui() { ; RV32-LABEL: 'fptoui' -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptoui <2 x float> undef to <2 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptoui <2 x double> undef to <2 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptoui <2 x float> undef to <2 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptoui <2 x double> undef to <2 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptoui <2 x float> undef to <2 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptoui <2 x double> undef to <2 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptoui <4 x float> undef to <4 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptoui <4 x double> undef to <4 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptoui <4 x float> undef to <4 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptoui <4 x double> undef to <4 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i64 = fptoui <4 x float> undef to <4 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i64 = fptoui <4 x double> undef to <4 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptoui <8 x float> undef to <8 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i16 = fptoui <8 x double> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i32 = fptoui <8 x float> undef to <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i32 = fptoui <8 x double> undef to <8 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i64 = fptoui <8 x float> undef to <8 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i64 = fptoui <8 x double> undef to <8 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i16 = fptoui <16 x float> undef to <16 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16i16 = fptoui <16 x double> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i32 = fptoui <16 x float> undef to <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i32 = fptoui <16 x double> undef to <16 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i64 = fptoui <16 x float> undef to <16 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i64 = fptoui <16 x double> undef to <16 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i16 = fptoui <32 x float> undef to <32 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32i16 = fptoui <32 x double> undef to <32 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i32 = fptoui <32 x float> undef to <32 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32i32 = fptoui <32 x double> undef to <32 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i64 = fptoui <32 x float> undef to <32 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptoui <32 x double> undef to <32 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64i16 = fptoui <64 x float> undef to <64 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64i16 = fptoui <64 x double> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptoui <64 x float> undef to <64 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64i32 = fptoui <64 x double> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f32_v64i64 = fptoui <64 x float> undef to <64 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptoui <64 x double> undef to <64 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f32_v128i16 = fptoui <128 x float> undef to <128 x i16> ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f64_v128i16 = fptoui <128 x double> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptoui <128 x float> undef to <128 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f64_v128i32 = fptoui <128 x double> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f32_v128i64 = fptoui <128 x float> undef to <128 x i64> ; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptoui <128 x double> undef to <128 x i64> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_nxv8i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f64_nxv16i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_nxv16i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_nxv32i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f64_nxv32i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f64_nxv32i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f32_nxv32i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f32_nxv64i16 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f64_nxv64i16 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f64_nxv64i32 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64f16_nxv64i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64f32_nxv64i64 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv64f64_nxv64i64 = fptoui undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptoui undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; RV64-LABEL: 'fptoui' -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i16 = fptoui <2 x float> undef to <2 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_v2i16 = fptoui <2 x double> undef to <2 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i32 = fptoui <2 x float> undef to <2 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i32 = fptoui <2 x double> undef to <2 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2i64 = fptoui <2 x float> undef to <2 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_v2i64 = fptoui <2 x double> undef to <2 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i16 = fptoui <4 x float> undef to <4 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4i16 = fptoui <4 x double> undef to <4 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i32 = fptoui <4 x float> undef to <4 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i32 = fptoui <4 x double> undef to <4 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4i64 = fptoui <4 x float> undef to <4 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4i64 = fptoui <4 x double> undef to <4 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i16 = fptoui <8 x float> undef to <8 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8i16 = fptoui <8 x double> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i32 = fptoui <8 x float> undef to <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i32 = fptoui <8 x double> undef to <8 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8i64 = fptoui <8 x float> undef to <8 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8i64 = fptoui <8 x double> undef to <8 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i16 = fptoui <16 x float> undef to <16 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16i16 = fptoui <16 x double> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i32 = fptoui <16 x float> undef to <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i32 = fptoui <16 x double> undef to <16 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16i64 = fptoui <16 x float> undef to <16 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16i64 = fptoui <16 x double> undef to <16 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i16 = fptoui <32 x float> undef to <32 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32i16 = fptoui <32 x double> undef to <32 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32i32 = fptoui <32 x float> undef to <32 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32i32 = fptoui <32 x double> undef to <32 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i64 = fptoui <32 x float> undef to <32 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32f64_v32i64 = fptoui <32 x double> undef to <32 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64i16 = fptoui <64 x float> undef to <64 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64i16 = fptoui <64 x double> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64f32_v64i32 = fptoui <64 x float> undef to <64 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64i32 = fptoui <64 x double> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f32_v64i64 = fptoui <64 x float> undef to <64 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64f64_v64i64 = fptoui <64 x double> undef to <64 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f32_v128i16 = fptoui <128 x float> undef to <128 x i16> ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f64_v128i16 = fptoui <128 x double> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128f32_v128i32 = fptoui <128 x float> undef to <128 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f64_v128i32 = fptoui <128 x double> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f32_v128i64 = fptoui <128 x float> undef to <128 x i64> ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v128f64_v128i64 = fptoui <128 x double> undef to <128 x i64> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f32_nxv1i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f16_nxv1i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f32_nxv1i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1f64_nxv1i1 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv2i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv2i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f32_nxv2i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f64_nxv2i1 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f32_nxv4i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f16_nxv4i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f32_nxv4i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4i1 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_nxv8i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f16_nxv8i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f32_nxv8i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8f64_nxv8i1 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f32_nxv16i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f64_nxv16i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_nxv16i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16f64_nxv16i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f16_nxv16i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16f64_nxv16i1 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32f32_nxv32i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32f16_nxv32i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_nxv32i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f64_nxv32i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32f32_nxv32i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f64_nxv32i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f32_nxv32i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32f64_nxv32i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32f32_nxv32i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32f64_nxv32i1 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64f16_nxv64i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64f32_nxv64i8 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i8 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64f16_nxv64i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f32_nxv64i16 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f64_nxv64i16 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64f32_nxv64i32 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f64_nxv64i32 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f32_nxv64i64 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv64f64_nxv64i64 = fptoui undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64f16_nxv64i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64f32_nxv64i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64f64_nxv64i1 = fptoui undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %v2f16_v2i8 = fptoui <2 x half> undef to <2 x i8> %v2f32_v2i8 = fptoui <2 x float> undef to <2 x i8> %v2f64_v2i8 = fptoui <2 x double> undef to <2 x i8> - %v2f16_v2i16 = fptoui <2 x half> undef to <2 x i16> %v2f32_v2i16 = fptoui <2 x float> undef to <2 x i16> %v2f64_v2i16 = fptoui <2 x double> undef to <2 x i16> - %v2f16_v2i32 = fptoui <2 x half> undef to <2 x i32> %v2f32_v2i32 = fptoui <2 x float> undef to <2 x i32> %v2f64_v2i32 = fptoui <2 x double> undef to <2 x i32> - %v2f16_v2i64 = fptoui <2 x half> undef to <2 x i64> %v2f32_v2i64 = fptoui <2 x float> undef to <2 x i64> %v2f64_v2i64 = fptoui <2 x double> undef to <2 x i64> - %v2f16_v2i1 = fptoui <2 x half> undef to <2 x i1> %v2f32_v2i1 = fptoui <2 x float> undef to <2 x i1> %v2f64_v2i1 = fptoui <2 x double> undef to <2 x i1> - %v4f16_v4i8 = fptoui <4 x half> undef to <4 x i8> %v4f32_v4i8 = fptoui <4 x float> undef to <4 x i8> %v4f64_v4i8 = fptoui <4 x double> undef to <4 x i8> - %v4f16_v4i16 = fptoui <4 x half> undef to <4 x i16> %v4f32_v4i16 = fptoui <4 x float> undef to <4 x i16> %v4f64_v4i16 = fptoui <4 x double> undef to <4 x i16> - %v4f16_v4i32 = fptoui <4 x half> undef to <4 x i32> %v4f32_v4i32 = fptoui <4 x float> undef to <4 x i32> %v4f64_v4i32 = fptoui <4 x double> undef to <4 x i32> - %v4f16_v4i64 = fptoui <4 x half> undef to <4 x i64> %v4f32_v4i64 = fptoui <4 x float> undef to <4 x i64> %v4f64_v4i64 = fptoui <4 x double> undef to <4 x i64> - %v4f16_v4i1 = fptoui <4 x half> undef to <4 x i1> %v4f32_v4i1 = fptoui <4 x float> undef to <4 x i1> %v4f64_v4i1 = fptoui <4 x double> undef to <4 x i1> - %v8f16_v8i8 = fptoui <8 x half> undef to <8 x i8> %v8f32_v8i8 = fptoui <8 x float> undef to <8 x i8> %v8f64_v8i8 = fptoui <8 x double> undef to <8 x i8> - %v8f16_v8i16 = fptoui <8 x half> undef to <8 x i16> %v8f32_v8i16 = fptoui <8 x float> undef to <8 x i16> %v8f64_v8i16 = fptoui <8 x double> undef to <8 x i16> - %v8f16_v8i32 = fptoui <8 x half> undef to <8 x i32> %v8f32_v8i32 = fptoui <8 x float> undef to <8 x i32> %v8f64_v8i32 = fptoui <8 x double> undef to <8 x i32> - %v8f16_v8i64 = fptoui <8 x half> undef to <8 x i64> %v8f32_v8i64 = fptoui <8 x float> undef to <8 x i64> %v8f64_v8i64 = fptoui <8 x double> undef to <8 x i64> - %v8f16_v8i1 = fptoui <8 x half> undef to <8 x i1> %v8f32_v8i1 = fptoui <8 x float> undef to <8 x i1> %v8f64_v8i1 = fptoui <8 x double> undef to <8 x i1> - %v16f16_v16i8 = fptoui <16 x half> undef to <16 x i8> %v16f32_v16i8 = fptoui <16 x float> undef to <16 x i8> %v16f64_v16i8 = fptoui <16 x double> undef to <16 x i8> - %v16f16_v16i16 = fptoui <16 x half> undef to <16 x i16> %v16f32_v16i16 = fptoui <16 x float> undef to <16 x i16> %v16f64_v16i16 = fptoui <16 x double> undef to <16 x i16> - %v16f16_v16i32 = fptoui <16 x half> undef to <16 x i32> %v16f32_v16i32 = fptoui <16 x float> undef to <16 x i32> %v16f64_v16i32 = fptoui <16 x double> undef to <16 x i32> - %v16f16_v16i64 = fptoui <16 x half> undef to <16 x i64> %v16f32_v16i64 = fptoui <16 x float> undef to <16 x i64> %v16f64_v16i64 = fptoui <16 x double> undef to <16 x i64> - %v16f16_v16i1 = fptoui <16 x half> undef to <16 x i1> %v16f32_v16i1 = fptoui <16 x float> undef to <16 x i1> %v16f64_v16i1 = fptoui <16 x double> undef to <16 x i1> - %v32f16_v32i8 = fptoui <32 x half> undef to <32 x i8> %v32f32_v32i8 = fptoui <32 x float> undef to <32 x i8> %v32f64_v32i8 = fptoui <32 x double> undef to <32 x i8> - %v32f16_v32i16 = fptoui <32 x half> undef to <32 x i16> %v32f32_v32i16 = fptoui <32 x float> undef to <32 x i16> %v32f64_v32i16 = fptoui <32 x double> undef to <32 x i16> - %v32f16_v32i32 = fptoui <32 x half> undef to <32 x i32> %v32f32_v32i32 = fptoui <32 x float> undef to <32 x i32> %v32f64_v32i32 = fptoui <32 x double> undef to <32 x i32> - %v32f16_v32i64 = fptoui <32 x half> undef to <32 x i64> %v32f32_v32i64 = fptoui <32 x float> undef to <32 x i64> %v32f64_v32i64 = fptoui <32 x double> undef to <32 x i64> - %v32f16_v32i1 = fptoui <32 x half> undef to <32 x i1> %v32f32_v32i1 = fptoui <32 x float> undef to <32 x i1> %v32f64_v32i1 = fptoui <32 x double> undef to <32 x i1> - %v64f16_v64i8 = fptoui <64 x half> undef to <64 x i8> %v64f32_v64i8 = fptoui <64 x float> undef to <64 x i8> %v64f64_v64i8 = fptoui <64 x double> undef to <64 x i8> - %v64f16_v64i16 = fptoui <64 x half> undef to <64 x i16> %v64f32_v64i16 = fptoui <64 x float> undef to <64 x i16> %v64f64_v64i16 = fptoui <64 x double> undef to <64 x i16> - %v64f16_v64i32 = fptoui <64 x half> undef to <64 x i32> %v64f32_v64i32 = fptoui <64 x float> undef to <64 x i32> %v64f64_v64i32 = fptoui <64 x double> undef to <64 x i32> - %v64f16_v64i64 = fptoui <64 x half> undef to <64 x i64> %v64f32_v64i64 = fptoui <64 x float> undef to <64 x i64> %v64f64_v64i64 = fptoui <64 x double> undef to <64 x i64> - %v64f16_v64i1 = fptoui <64 x half> undef to <64 x i1> %v64f32_v64i1 = fptoui <64 x float> undef to <64 x i1> %v64f64_v64i1 = fptoui <64 x double> undef to <64 x i1> - %v128f16_v128i8 = fptoui <128 x half> undef to <128 x i8> %v128f32_v128i8 = fptoui <128 x float> undef to <128 x i8> %v128f64_v128i8 = fptoui <128 x double> undef to <128 x i8> - %v128f16_v128i16 = fptoui <128 x half> undef to <128 x i16> %v128f32_v128i16 = fptoui <128 x float> undef to <128 x i16> %v128f64_v128i16 = fptoui <128 x double> undef to <128 x i16> - %v128f16_v128i32 = fptoui <128 x half> undef to <128 x i32> %v128f32_v128i32 = fptoui <128 x float> undef to <128 x i32> %v128f64_v128i32 = fptoui <128 x double> undef to <128 x i32> - %v128f16_v128i64 = fptoui <128 x half> undef to <128 x i64> %v128f32_v128i64 = fptoui <128 x float> undef to <128 x i64> %v128f64_v128i64 = fptoui <128 x double> undef to <128 x i64> - %v128f16_v128i1 = fptoui <128 x half> undef to <128 x i1> %v128f32_v128i1 = fptoui <128 x float> undef to <128 x i1> %v128f64_v128i1 = fptoui <128 x double> undef to <128 x i1> - %nxv1f16_nxv1i8 = fptoui undef to %nxv1f32_nxv1i8 = fptoui undef to %nxv1f64_nxv1i8 = fptoui undef to - %nxv1f16_nxv1i16 = fptoui undef to %nxv1f32_nxv1i16 = fptoui undef to %nxv1f64_nxv1i16 = fptoui undef to - %nxv1f16_nxv1i32 = fptoui undef to %nxv1f32_nxv1i32 = fptoui undef to %nxv1f64_nxv1i32 = fptoui undef to - %nxv1f16_nxv1i64 = fptoui undef to %nxv1f32_nxv1i64 = fptoui undef to %nxv1f64_nxv1i64 = fptoui undef to - %nxv1f16_nxv1i1 = fptoui undef to %nxv1f32_nxv1i1 = fptoui undef to %nxv1f64_nxv1i1 = fptoui undef to - %nxv2f16_nxv2i8 = fptoui undef to %nxv2f32_nxv2i8 = fptoui undef to %nxv2f64_nxv2i8 = fptoui undef to - %nxv2f16_nxv2i16 = fptoui undef to %nxv2f32_nxv2i16 = fptoui undef to %nxv2f64_nxv2i16 = fptoui undef to - %nxv2f16_nxv2i32 = fptoui undef to %nxv2f32_nxv2i32 = fptoui undef to %nxv2f64_nxv2i32 = fptoui undef to - %nxv2f16_nxv2i64 = fptoui undef to %nxv2f32_nxv2i64 = fptoui undef to %nxv2f64_nxv2i64 = fptoui undef to - %nxv2f16_nxv2i1 = fptoui undef to %nxv2f32_nxv2i1 = fptoui undef to %nxv2f64_nxv2i1 = fptoui undef to - %nxv4f16_nxv4i8 = fptoui undef to %nxv4f32_nxv4i8 = fptoui undef to %nxv4f64_nxv4i8 = fptoui undef to - %nxv4f16_nxv4i16 = fptoui undef to %nxv4f32_nxv4i16 = fptoui undef to %nxv4f64_nxv4i16 = fptoui undef to - %nxv4f16_nxv4i32 = fptoui undef to %nxv4f32_nxv4i32 = fptoui undef to %nxv4f64_nxv4i32 = fptoui undef to - %nxv4f16_nxv4i64 = fptoui undef to %nxv4f32_nxv4i64 = fptoui undef to %nxv4f64_nxv4i64 = fptoui undef to - %nxv4f16_nxv4i1 = fptoui undef to %nxv4f32_nxv4i1 = fptoui undef to %nxv4f64_nxv4i1 = fptoui undef to - %nxv8f16_nxv8i8 = fptoui undef to %nxv8f32_nxv8i8 = fptoui undef to %nxv8f64_nxv8i8 = fptoui undef to - %nxv8f16_nxv8i16 = fptoui undef to %nxv8f32_nxv8i16 = fptoui undef to %nxv8f64_nxv8i16 = fptoui undef to - %nxv8f16_nxv8i32 = fptoui undef to %nxv8f32_nxv8i32 = fptoui undef to %nxv8f64_nxv8i32 = fptoui undef to - %nxv8f16_nxv8i64 = fptoui undef to %nxv8f32_nxv8i64 = fptoui undef to %nxv8f64_nxv8i64 = fptoui undef to - %nxv8f16_nxv8i1 = fptoui undef to %nxv8f32_nxv8i1 = fptoui undef to %nxv8f64_nxv8i1 = fptoui undef to - %nxv16f16_nxv16i8 = fptoui undef to %nxv16f32_nxv16i8 = fptoui undef to %nxv16f64_nxv16i8 = fptoui undef to - %nxv16f16_nxv16i16 = fptoui undef to %nxv16f32_nxv16i16 = fptoui undef to %nxv16f64_nxv16i16 = fptoui undef to - %nxv16f16_nxv16i32 = fptoui undef to %nxv16f32_nxv16i32 = fptoui undef to %nxv16f64_nxv16i32 = fptoui undef to - %nxv16f16_nxv16i64 = fptoui undef to %nxv16f32_nxv16i64 = fptoui undef to %nxv16f64_nxv16i64 = fptoui undef to - %nxv16f16_nxv16i1 = fptoui undef to %nxv16f32_nxv16i1 = fptoui undef to %nxv16f64_nxv16i1 = fptoui undef to - %nxv32f16_nxv32i8 = fptoui undef to %nxv32f32_nxv32i8 = fptoui undef to %nxv32f64_nxv32i8 = fptoui undef to - %nxv32f16_nxv32i16 = fptoui undef to %nxv32f32_nxv32i16 = fptoui undef to %nxv32f64_nxv32i16 = fptoui undef to - %nxv32f16_nxv32i32 = fptoui undef to %nxv32f32_nxv32i32 = fptoui undef to %nxv32f64_nxv32i32 = fptoui undef to - %nxv32f16_nxv32i64 = fptoui undef to %nxv32f32_nxv32i64 = fptoui undef to %nxv32f64_nxv32i64 = fptoui undef to - %nxv32f16_nxv32i1 = fptoui undef to %nxv32f32_nxv32i1 = fptoui undef to %nxv32f64_nxv32i1 = fptoui undef to - %nxv64f16_nxv64i8 = fptoui undef to %nxv64f32_nxv64i8 = fptoui undef to %nxv64f64_nxv64i8 = fptoui undef to - %nxv64f16_nxv64i16 = fptoui undef to %nxv64f32_nxv64i16 = fptoui undef to %nxv64f64_nxv64i16 = fptoui undef to - %nxv64f16_nxv64i32 = fptoui undef to %nxv64f32_nxv64i32 = fptoui undef to %nxv64f64_nxv64i32 = fptoui undef to - %nxv64f16_nxv64i64 = fptoui undef to %nxv64f32_nxv64i64 = fptoui undef to %nxv64f64_nxv64i64 = fptoui undef to - %nxv64f16_nxv64i1 = fptoui undef to %nxv64f32_nxv64i1 = fptoui undef to %nxv64f64_nxv64i1 = fptoui undef to @@ -3026,652 +2606,442 @@ define void @fptoui() { define void @sitofp() { ; RV32-LABEL: 'sitofp' -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f64 = sitofp <2 x i16> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = sitofp <4 x i16> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f64 = sitofp <8 x i16> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_v16f64 = sitofp <16 x i16> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f64 = sitofp <16 x i32> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f64 = sitofp <16 x i16> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f32 = sitofp <32 x i16> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i16_v32f64 = sitofp <32 x i16> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f32 = sitofp <32 x i32> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32f64 = sitofp <32 x i32> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32f32 = sitofp <32 x i64> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f64 = sitofp <32 x i16> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64f32 = sitofp <64 x i16> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i16_v64f64 = sitofp <64 x i16> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i32_v64f32 = sitofp <64 x i32> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64f64 = sitofp <64 x i32> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64f32 = sitofp <64 x i64> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f64 = sitofp <64 x i16> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128f32 = sitofp <128 x i16> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i16_v128f64 = sitofp <128 x i16> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128i32_v128f32 = sitofp <128 x i32> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128f64 = sitofp <128 x i32> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128f32 = sitofp <128 x i64> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f64 = sitofp <128 x i16> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i16_nxv16f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i16_nxv32f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i16_nxv64f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i64_nxv64f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f64 = sitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = sitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; RV64-LABEL: 'sitofp' -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f64 = sitofp <2 x i16> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = sitofp <4 x i16> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f64 = sitofp <8 x i16> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_v16f64 = sitofp <16 x i16> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f64 = sitofp <16 x i32> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f64 = sitofp <16 x i16> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f32 = sitofp <32 x i16> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i16_v32f64 = sitofp <32 x i16> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f32 = sitofp <32 x i32> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32f64 = sitofp <32 x i32> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32f32 = sitofp <32 x i64> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f64 = sitofp <32 x i16> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64f32 = sitofp <64 x i16> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i16_v64f64 = sitofp <64 x i16> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i32_v64f32 = sitofp <64 x i32> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64f64 = sitofp <64 x i32> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64f32 = sitofp <64 x i64> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f64 = sitofp <64 x i16> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128f32 = sitofp <128 x i16> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i16_v128f64 = sitofp <128 x i16> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128i32_v128f32 = sitofp <128 x i32> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128f64 = sitofp <128 x i32> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128f32 = sitofp <128 x i64> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f64 = sitofp <128 x i16> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i16_nxv16f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i16_nxv32f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i16_nxv64f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i64_nxv64f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f64 = sitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = sitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %v2i8_v2f16 = sitofp <2 x i8> undef to <2 x half> %v2i8_v2f32 = sitofp <2 x i8> undef to <2 x float> %v2i8_v2f64 = sitofp <2 x i8> undef to <2 x double> - %v2i16_v2f16 = sitofp <2 x i16> undef to <2 x half> %v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float> %v2i16_v2f64 = sitofp <2 x i16> undef to <2 x double> - %v2i32_v2f16 = sitofp <2 x i32> undef to <2 x half> %v2i32_v2f32 = sitofp <2 x i32> undef to <2 x float> %v2i32_v2f64 = sitofp <2 x i32> undef to <2 x double> - %v2i64_v2f16 = sitofp <2 x i64> undef to <2 x half> %v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> %v2i64_v2f64 = sitofp <2 x i16> undef to <2 x double> - %v2i1_v2f16 = sitofp <2 x i1> undef to <2 x half> %v2i1_v2f32 = sitofp <2 x i1> undef to <2 x float> %v2i1_v2f64 = sitofp <2 x i1> undef to <2 x double> - %v4i8_v4f16 = sitofp <4 x i8> undef to <4 x half> %v4i8_v4f32 = sitofp <4 x i8> undef to <4 x float> %v4i8_v4f64 = sitofp <4 x i8> undef to <4 x double> - %v4i16_v4f16 = sitofp <4 x i16> undef to <4 x half> %v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float> %v4i16_v4f64 = sitofp <4 x i16> undef to <4 x double> - %v4i32_v4f16 = sitofp <4 x i32> undef to <4 x half> %v4i32_v4f32 = sitofp <4 x i32> undef to <4 x float> %v4i32_v4f64 = sitofp <4 x i32> undef to <4 x double> - %v4i64_v4f16 = sitofp <4 x i64> undef to <4 x half> %v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> %v4i64_v4f64 = sitofp <4 x i16> undef to <4 x double> - %v4i1_v4f16 = sitofp <4 x i1> undef to <4 x half> %v4i1_v4f32 = sitofp <4 x i1> undef to <4 x float> %v4i1_v4f64 = sitofp <4 x i1> undef to <4 x double> - %v8i8_v8f16 = sitofp <8 x i8> undef to <8 x half> %v8i8_v8f32 = sitofp <8 x i8> undef to <8 x float> %v8i8_v8f64 = sitofp <8 x i8> undef to <8 x double> - %v8i16_v8f16 = sitofp <8 x i16> undef to <8 x half> %v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float> %v8i16_v8f64 = sitofp <8 x i16> undef to <8 x double> - %v8i32_v8f16 = sitofp <8 x i32> undef to <8 x half> %v8i32_v8f32 = sitofp <8 x i32> undef to <8 x float> %v8i32_v8f64 = sitofp <8 x i32> undef to <8 x double> - %v8i64_v8f16 = sitofp <8 x i64> undef to <8 x half> %v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> %v8i64_v8f64 = sitofp <8 x i16> undef to <8 x double> - %v8i1_v8f16 = sitofp <8 x i1> undef to <8 x half> %v8i1_v8f32 = sitofp <8 x i1> undef to <8 x float> %v8i1_v8f64 = sitofp <8 x i1> undef to <8 x double> - %v16i8_v16f16 = sitofp <16 x i8> undef to <16 x half> %v16i8_v16f32 = sitofp <16 x i8> undef to <16 x float> %v16i8_v16f64 = sitofp <16 x i8> undef to <16 x double> - %v16i16_v16f16 = sitofp <16 x i16> undef to <16 x half> %v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float> %v16i16_v16f64 = sitofp <16 x i16> undef to <16 x double> - %v16i32_v16f16 = sitofp <16 x i32> undef to <16 x half> %v16i32_v16f32 = sitofp <16 x i32> undef to <16 x float> %v16i32_v16f64 = sitofp <16 x i32> undef to <16 x double> - %v16i64_v16f16 = sitofp <16 x i64> undef to <16 x half> %v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> %v16i64_v16f64 = sitofp <16 x i16> undef to <16 x double> - %v16i1_v16f16 = sitofp <16 x i1> undef to <16 x half> %v16i1_v16f32 = sitofp <16 x i1> undef to <16 x float> %v16i1_v16f64 = sitofp <16 x i1> undef to <16 x double> - %v32i8_v32f16 = sitofp <32 x i8> undef to <32 x half> %v32i8_v32f32 = sitofp <32 x i8> undef to <32 x float> %v32i8_v32f64 = sitofp <32 x i8> undef to <32 x double> - %v32i16_v32f16 = sitofp <32 x i16> undef to <32 x half> %v32i16_v32f32 = sitofp <32 x i16> undef to <32 x float> %v32i16_v32f64 = sitofp <32 x i16> undef to <32 x double> - %v32i32_v32f16 = sitofp <32 x i32> undef to <32 x half> %v32i32_v32f32 = sitofp <32 x i32> undef to <32 x float> %v32i32_v32f64 = sitofp <32 x i32> undef to <32 x double> - %v32i64_v32f16 = sitofp <32 x i64> undef to <32 x half> %v32i64_v32f32 = sitofp <32 x i64> undef to <32 x float> %v32i64_v32f64 = sitofp <32 x i16> undef to <32 x double> - %v32i1_v32f16 = sitofp <32 x i1> undef to <32 x half> %v32i1_v32f32 = sitofp <32 x i1> undef to <32 x float> %v32i1_v32f64 = sitofp <32 x i1> undef to <32 x double> - %v64i8_v64f16 = sitofp <64 x i8> undef to <64 x half> %v64i8_v64f32 = sitofp <64 x i8> undef to <64 x float> %v64i8_v64f64 = sitofp <64 x i8> undef to <64 x double> - %v64i16_v64f16 = sitofp <64 x i16> undef to <64 x half> %v64i16_v64f32 = sitofp <64 x i16> undef to <64 x float> %v64i16_v64f64 = sitofp <64 x i16> undef to <64 x double> - %v64i32_v64f16 = sitofp <64 x i32> undef to <64 x half> %v64i32_v64f32 = sitofp <64 x i32> undef to <64 x float> %v64i32_v64f64 = sitofp <64 x i32> undef to <64 x double> - %v64i64_v64f16 = sitofp <64 x i64> undef to <64 x half> %v64i64_v64f32 = sitofp <64 x i64> undef to <64 x float> %v64i64_v64f64 = sitofp <64 x i16> undef to <64 x double> - %v64i1_v64f16 = sitofp <64 x i1> undef to <64 x half> %v64i1_v64f32 = sitofp <64 x i1> undef to <64 x float> %v64i1_v64f64 = sitofp <64 x i1> undef to <64 x double> - %v128i8_v128f16 = sitofp <128 x i8> undef to <128 x half> %v128i8_v128f32 = sitofp <128 x i8> undef to <128 x float> %v128i8_v128f64 = sitofp <128 x i8> undef to <128 x double> - %v128i16_v128f16 = sitofp <128 x i16> undef to <128 x half> %v128i16_v128f32 = sitofp <128 x i16> undef to <128 x float> %v128i16_v128f64 = sitofp <128 x i16> undef to <128 x double> - %v128i32_v128f16 = sitofp <128 x i32> undef to <128 x half> %v128i32_v128f32 = sitofp <128 x i32> undef to <128 x float> %v128i32_v128f64 = sitofp <128 x i32> undef to <128 x double> - %v128i64_v128f16 = sitofp <128 x i64> undef to <128 x half> %v128i64_v128f32 = sitofp <128 x i64> undef to <128 x float> %v128i64_v128f64 = sitofp <128 x i16> undef to <128 x double> - %v128i1_v128f16 = sitofp <128 x i1> undef to <128 x half> %v128i1_v128f32 = sitofp <128 x i1> undef to <128 x float> %v128i1_v128f64 = sitofp <128 x i1> undef to <128 x double> - %nxv1i8_nxv1f16 = sitofp undef to %nxv1i8_nxv1f32 = sitofp undef to %nxv1i8_nxv1f64 = sitofp undef to - %nxv1i16_nxv1f16 = sitofp undef to %nxv1i16_nxv1f32 = sitofp undef to %nxv1i16_nxv1f64 = sitofp undef to - %nxv1i32_nxv1f16 = sitofp undef to %nxv1i32_nxv1f32 = sitofp undef to %nxv1i32_nxv1f64 = sitofp undef to - %nxv1i64_nxv1f16 = sitofp undef to %nxv1i64_nxv1f32 = sitofp undef to %nxv1i64_nxv1f64 = sitofp undef to - %nxv1i1_nxv1f16 = sitofp undef to %nxv1i1_nxv1f32 = sitofp undef to %nxv1i1_nxv1f64 = sitofp undef to - %nxv2i8_nxv2f16 = sitofp undef to %nxv2i8_nxv2f32 = sitofp undef to %nxv2i8_nxv2f64 = sitofp undef to - %nxv2i16_nxv2f16 = sitofp undef to %nxv2i16_nxv2f32 = sitofp undef to %nxv2i16_nxv2f64 = sitofp undef to - %nxv2i32_nxv2f16 = sitofp undef to %nxv2i32_nxv2f32 = sitofp undef to %nxv2i32_nxv2f64 = sitofp undef to - %nxv2i64_nxv2f16 = sitofp undef to %nxv2i64_nxv2f32 = sitofp undef to %nxv2i64_nxv2f64 = sitofp undef to - %nxv2i1_nxv2f16 = sitofp undef to %nxv2i1_nxv2f32 = sitofp undef to %nxv2i1_nxv2f64 = sitofp undef to - %nxv4i8_nxv4f16 = sitofp undef to %nxv4i8_nxv4f32 = sitofp undef to %nxv4i8_nxv4f64 = sitofp undef to - %nxv4i16_nxv4f16 = sitofp undef to %nxv4i16_nxv4f32 = sitofp undef to %nxv4i16_nxv4f64 = sitofp undef to - %nxv4i32_nxv4f16 = sitofp undef to %nxv4i32_nxv4f32 = sitofp undef to %nxv4i32_nxv4f64 = sitofp undef to - %nxv4i64_nxv4f16 = sitofp undef to %nxv4i64_nxv4f32 = sitofp undef to %nxv4i64_nxv4f64 = sitofp undef to - %nxv4i1_nxv4f16 = sitofp undef to %nxv4i1_nxv4f32 = sitofp undef to %nxv4i1_nxv4f64 = sitofp undef to - %nxv8i8_nxv8f16 = sitofp undef to %nxv8i8_nxv8f32 = sitofp undef to %nxv8i8_nxv8f64 = sitofp undef to - %nxv8i16_nxv8f16 = sitofp undef to %nxv8i16_nxv8f32 = sitofp undef to %nxv8i16_nxv8f64 = sitofp undef to - %nxv8i32_nxv8f16 = sitofp undef to %nxv8i32_nxv8f32 = sitofp undef to %nxv8i32_nxv8f64 = sitofp undef to - %nxv8i64_nxv8f16 = sitofp undef to %nxv8i64_nxv8f32 = sitofp undef to %nxv8i64_nxv8f64 = sitofp undef to - %nxv8i1_nxv8f16 = sitofp undef to %nxv8i1_nxv8f32 = sitofp undef to %nxv8i1_nxv8f64 = sitofp undef to - %nxv16i8_nxv16f16 = sitofp undef to %nxv16i8_nxv16f32 = sitofp undef to %nxv16i8_nxv16f64 = sitofp undef to - %nxv16i16_nxv16f16 = sitofp undef to %nxv16i16_nxv16f32 = sitofp undef to %nxv16i16_nxv16f64 = sitofp undef to - %nxv16i32_nxv16f16 = sitofp undef to %nxv16i32_nxv16f32 = sitofp undef to %nxv16i32_nxv16f64 = sitofp undef to - %nxv16i64_nxv16f16 = sitofp undef to %nxv16i64_nxv16f32 = sitofp undef to %nxv16i64_nxv16f64 = sitofp undef to - %nxv16i1_nxv16f16 = sitofp undef to %nxv16i1_nxv16f32 = sitofp undef to %nxv16i1_nxv16f64 = sitofp undef to - %nxv32i8_nxv32f16 = sitofp undef to %nxv32i8_nxv32f32 = sitofp undef to %nxv32i8_nxv32f64 = sitofp undef to - %nxv32i16_nxv32f16 = sitofp undef to %nxv32i16_nxv32f32 = sitofp undef to %nxv32i16_nxv32f64 = sitofp undef to - %nxv32i32_nxv32f16 = sitofp undef to %nxv32i32_nxv32f32 = sitofp undef to %nxv32i32_nxv32f64 = sitofp undef to - %nxv32i64_nxv32f16 = sitofp undef to %nxv32i64_nxv32f32 = sitofp undef to %nxv32i64_nxv32f64 = sitofp undef to - %nxv32i1_nxv32f16 = sitofp undef to %nxv32i1_nxv32f32 = sitofp undef to %nxv32i1_nxv32f64 = sitofp undef to - %nxv64i8_nxv64f16 = sitofp undef to %nxv64i8_nxv64f32 = sitofp undef to %nxv64i8_nxv64f64 = sitofp undef to - %nxv64i16_nxv64f16 = sitofp undef to %nxv64i16_nxv64f32 = sitofp undef to %nxv64i16_nxv64f64 = sitofp undef to - %nxv64i32_nxv64f16 = sitofp undef to %nxv64i32_nxv64f32 = sitofp undef to %nxv64i32_nxv64f64 = sitofp undef to - %nxv64i64_nxv64f16 = sitofp undef to %nxv64i64_nxv64f32 = sitofp undef to %nxv64i64_nxv64f64 = sitofp undef to - %nxv64i1_nxv64f16 = sitofp undef to %nxv64i1_nxv64f32 = sitofp undef to %nxv64i1_nxv64f64 = sitofp undef to @@ -3680,652 +3050,442 @@ define void @sitofp() { define void @uitofp() { ; RV32-LABEL: 'uitofp' -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f64 = uitofp <2 x i16> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = uitofp <4 x i16> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f64 = uitofp <8 x i16> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_v16f64 = uitofp <16 x i16> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f64 = uitofp <16 x i32> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f64 = uitofp <16 x i16> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f32 = uitofp <32 x i16> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i16_v32f64 = uitofp <32 x i16> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f32 = uitofp <32 x i32> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32f64 = uitofp <32 x i32> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32f32 = uitofp <32 x i64> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f64 = uitofp <32 x i16> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64f32 = uitofp <64 x i16> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i16_v64f64 = uitofp <64 x i16> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i32_v64f32 = uitofp <64 x i32> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64f64 = uitofp <64 x i32> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64f32 = uitofp <64 x i64> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f64 = uitofp <64 x i16> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128f32 = uitofp <128 x i16> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i16_v128f64 = uitofp <128 x i16> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128i32_v128f32 = uitofp <128 x i32> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128f64 = uitofp <128 x i32> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128f32 = uitofp <128 x i64> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f64 = uitofp <128 x i16> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float> ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i16_nxv16f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i16_nxv32f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i16_nxv64f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i64_nxv64f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f64 = uitofp undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = uitofp undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; RV64-LABEL: 'uitofp' -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_v2f64 = uitofp <2 x i16> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4f64 = uitofp <4 x i16> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8f64 = uitofp <8 x i16> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16_v16f64 = uitofp <16 x i16> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32_v16f64 = uitofp <16 x i32> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16f64 = uitofp <16 x i16> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32f32 = uitofp <32 x i16> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i16_v32f64 = uitofp <32 x i16> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32f32 = uitofp <32 x i32> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32f64 = uitofp <32 x i32> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32f32 = uitofp <32 x i64> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32i64_v32f64 = uitofp <32 x i16> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i16_v64f32 = uitofp <64 x i16> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i16_v64f64 = uitofp <64 x i16> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i32_v64f32 = uitofp <64 x i32> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i32_v64f64 = uitofp <64 x i32> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64f32 = uitofp <64 x i64> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64f64 = uitofp <64 x i16> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i16_v128f32 = uitofp <128 x i16> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i16_v128f64 = uitofp <128 x i16> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v128i32_v128f32 = uitofp <128 x i32> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i32_v128f64 = uitofp <128 x i32> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128f32 = uitofp <128 x i64> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128f64 = uitofp <128 x i16> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float> ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8_nxv1f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i8_nxv1f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i16_nxv1f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32_nxv1f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64_nxv1f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i64_nxv1f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv1i1_nxv1f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8_nxv2f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16_nxv2f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32_nxv2f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64_nxv2f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2i1_nxv2f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8_nxv4f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i1_nxv4f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8_nxv8f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i1_nxv8f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8_nxv16f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i8_nxv16f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i16_nxv16f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i32_nxv16f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i1_nxv16f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_nxv16f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8_nxv32f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i8_nxv32f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i8_nxv32f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i16_nxv32f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i16_nxv32f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i32_nxv32f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i32_nxv32f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i1_nxv32f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv32i1_nxv32f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32i1_nxv32f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i8_nxv64f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i8_nxv64f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i8_nxv64f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i16_nxv64f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i16_nxv64f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i16_nxv64f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv64i32_nxv64f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i32_nxv64f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i64_nxv64f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64f64 = uitofp undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv64i1_nxv64f16 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv64i1_nxv64f32 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64i1_nxv64f64 = uitofp undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %v2i8_v2f16 = uitofp <2 x i8> undef to <2 x half> %v2i8_v2f32 = uitofp <2 x i8> undef to <2 x float> %v2i8_v2f64 = uitofp <2 x i8> undef to <2 x double> - %v2i16_v2f16 = uitofp <2 x i16> undef to <2 x half> %v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float> %v2i16_v2f64 = uitofp <2 x i16> undef to <2 x double> - %v2i32_v2f16 = uitofp <2 x i32> undef to <2 x half> %v2i32_v2f32 = uitofp <2 x i32> undef to <2 x float> %v2i32_v2f64 = uitofp <2 x i32> undef to <2 x double> - %v2i64_v2f16 = uitofp <2 x i64> undef to <2 x half> %v2i64_v2f32 = uitofp <2 x i64> undef to <2 x float> %v2i64_v2f64 = uitofp <2 x i16> undef to <2 x double> - %v2i1_v2f16 = uitofp <2 x i1> undef to <2 x half> %v2i1_v2f32 = uitofp <2 x i1> undef to <2 x float> %v2i1_v2f64 = uitofp <2 x i1> undef to <2 x double> - %v4i8_v4f16 = uitofp <4 x i8> undef to <4 x half> %v4i8_v4f32 = uitofp <4 x i8> undef to <4 x float> %v4i8_v4f64 = uitofp <4 x i8> undef to <4 x double> - %v4i16_v4f16 = uitofp <4 x i16> undef to <4 x half> %v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float> %v4i16_v4f64 = uitofp <4 x i16> undef to <4 x double> - %v4i32_v4f16 = uitofp <4 x i32> undef to <4 x half> %v4i32_v4f32 = uitofp <4 x i32> undef to <4 x float> %v4i32_v4f64 = uitofp <4 x i32> undef to <4 x double> - %v4i64_v4f16 = uitofp <4 x i64> undef to <4 x half> %v4i64_v4f32 = uitofp <4 x i64> undef to <4 x float> %v4i64_v4f64 = uitofp <4 x i16> undef to <4 x double> - %v4i1_v4f16 = uitofp <4 x i1> undef to <4 x half> %v4i1_v4f32 = uitofp <4 x i1> undef to <4 x float> %v4i1_v4f64 = uitofp <4 x i1> undef to <4 x double> - %v8i8_v8f16 = uitofp <8 x i8> undef to <8 x half> %v8i8_v8f32 = uitofp <8 x i8> undef to <8 x float> %v8i8_v8f64 = uitofp <8 x i8> undef to <8 x double> - %v8i16_v8f16 = uitofp <8 x i16> undef to <8 x half> %v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float> %v8i16_v8f64 = uitofp <8 x i16> undef to <8 x double> - %v8i32_v8f16 = uitofp <8 x i32> undef to <8 x half> %v8i32_v8f32 = uitofp <8 x i32> undef to <8 x float> %v8i32_v8f64 = uitofp <8 x i32> undef to <8 x double> - %v8i64_v8f16 = uitofp <8 x i64> undef to <8 x half> %v8i64_v8f32 = uitofp <8 x i64> undef to <8 x float> %v8i64_v8f64 = uitofp <8 x i16> undef to <8 x double> - %v8i1_v8f16 = uitofp <8 x i1> undef to <8 x half> %v8i1_v8f32 = uitofp <8 x i1> undef to <8 x float> %v8i1_v8f64 = uitofp <8 x i1> undef to <8 x double> - %v16i8_v16f16 = uitofp <16 x i8> undef to <16 x half> %v16i8_v16f32 = uitofp <16 x i8> undef to <16 x float> %v16i8_v16f64 = uitofp <16 x i8> undef to <16 x double> - %v16i16_v16f16 = uitofp <16 x i16> undef to <16 x half> %v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float> %v16i16_v16f64 = uitofp <16 x i16> undef to <16 x double> - %v16i32_v16f16 = uitofp <16 x i32> undef to <16 x half> %v16i32_v16f32 = uitofp <16 x i32> undef to <16 x float> %v16i32_v16f64 = uitofp <16 x i32> undef to <16 x double> - %v16i64_v16f16 = uitofp <16 x i64> undef to <16 x half> %v16i64_v16f32 = uitofp <16 x i64> undef to <16 x float> %v16i64_v16f64 = uitofp <16 x i16> undef to <16 x double> - %v16i1_v16f16 = uitofp <16 x i1> undef to <16 x half> %v16i1_v16f32 = uitofp <16 x i1> undef to <16 x float> %v16i1_v16f64 = uitofp <16 x i1> undef to <16 x double> - %v32i8_v32f16 = uitofp <32 x i8> undef to <32 x half> %v32i8_v32f32 = uitofp <32 x i8> undef to <32 x float> %v32i8_v32f64 = uitofp <32 x i8> undef to <32 x double> - %v32i16_v32f16 = uitofp <32 x i16> undef to <32 x half> %v32i16_v32f32 = uitofp <32 x i16> undef to <32 x float> %v32i16_v32f64 = uitofp <32 x i16> undef to <32 x double> - %v32i32_v32f16 = uitofp <32 x i32> undef to <32 x half> %v32i32_v32f32 = uitofp <32 x i32> undef to <32 x float> %v32i32_v32f64 = uitofp <32 x i32> undef to <32 x double> - %v32i64_v32f16 = uitofp <32 x i64> undef to <32 x half> %v32i64_v32f32 = uitofp <32 x i64> undef to <32 x float> %v32i64_v32f64 = uitofp <32 x i16> undef to <32 x double> - %v32i1_v32f16 = uitofp <32 x i1> undef to <32 x half> %v32i1_v32f32 = uitofp <32 x i1> undef to <32 x float> %v32i1_v32f64 = uitofp <32 x i1> undef to <32 x double> - %v64i8_v64f16 = uitofp <64 x i8> undef to <64 x half> %v64i8_v64f32 = uitofp <64 x i8> undef to <64 x float> %v64i8_v64f64 = uitofp <64 x i8> undef to <64 x double> - %v64i16_v64f16 = uitofp <64 x i16> undef to <64 x half> %v64i16_v64f32 = uitofp <64 x i16> undef to <64 x float> %v64i16_v64f64 = uitofp <64 x i16> undef to <64 x double> - %v64i32_v64f16 = uitofp <64 x i32> undef to <64 x half> %v64i32_v64f32 = uitofp <64 x i32> undef to <64 x float> %v64i32_v64f64 = uitofp <64 x i32> undef to <64 x double> - %v64i64_v64f16 = uitofp <64 x i64> undef to <64 x half> %v64i64_v64f32 = uitofp <64 x i64> undef to <64 x float> %v64i64_v64f64 = uitofp <64 x i16> undef to <64 x double> - %v64i1_v64f16 = uitofp <64 x i1> undef to <64 x half> %v64i1_v64f32 = uitofp <64 x i1> undef to <64 x float> %v64i1_v64f64 = uitofp <64 x i1> undef to <64 x double> - %v128i8_v128f16 = uitofp <128 x i8> undef to <128 x half> %v128i8_v128f32 = uitofp <128 x i8> undef to <128 x float> %v128i8_v128f64 = uitofp <128 x i8> undef to <128 x double> - %v128i16_v128f16 = uitofp <128 x i16> undef to <128 x half> %v128i16_v128f32 = uitofp <128 x i16> undef to <128 x float> %v128i16_v128f64 = uitofp <128 x i16> undef to <128 x double> - %v128i32_v128f16 = uitofp <128 x i32> undef to <128 x half> %v128i32_v128f32 = uitofp <128 x i32> undef to <128 x float> %v128i32_v128f64 = uitofp <128 x i32> undef to <128 x double> - %v128i64_v128f16 = uitofp <128 x i64> undef to <128 x half> %v128i64_v128f32 = uitofp <128 x i64> undef to <128 x float> %v128i64_v128f64 = uitofp <128 x i16> undef to <128 x double> - %v128i1_v128f16 = uitofp <128 x i1> undef to <128 x half> %v128i1_v128f32 = uitofp <128 x i1> undef to <128 x float> %v128i1_v128f64 = uitofp <128 x i1> undef to <128 x double> - %nxv1i8_nxv1f16 = uitofp undef to %nxv1i8_nxv1f32 = uitofp undef to %nxv1i8_nxv1f64 = uitofp undef to - %nxv1i16_nxv1f16 = uitofp undef to %nxv1i16_nxv1f32 = uitofp undef to %nxv1i16_nxv1f64 = uitofp undef to - %nxv1i32_nxv1f16 = uitofp undef to %nxv1i32_nxv1f32 = uitofp undef to %nxv1i32_nxv1f64 = uitofp undef to - %nxv1i64_nxv1f16 = uitofp undef to %nxv1i64_nxv1f32 = uitofp undef to %nxv1i64_nxv1f64 = uitofp undef to - %nxv1i1_nxv1f16 = uitofp undef to %nxv1i1_nxv1f32 = uitofp undef to %nxv1i1_nxv1f64 = uitofp undef to - %nxv2i8_nxv2f16 = uitofp undef to %nxv2i8_nxv2f32 = uitofp undef to %nxv2i8_nxv2f64 = uitofp undef to - %nxv2i16_nxv2f16 = uitofp undef to %nxv2i16_nxv2f32 = uitofp undef to %nxv2i16_nxv2f64 = uitofp undef to - %nxv2i32_nxv2f16 = uitofp undef to %nxv2i32_nxv2f32 = uitofp undef to %nxv2i32_nxv2f64 = uitofp undef to - %nxv2i64_nxv2f16 = uitofp undef to %nxv2i64_nxv2f32 = uitofp undef to %nxv2i64_nxv2f64 = uitofp undef to - %nxv2i1_nxv2f16 = uitofp undef to %nxv2i1_nxv2f32 = uitofp undef to %nxv2i1_nxv2f64 = uitofp undef to - %nxv4i8_nxv4f16 = uitofp undef to %nxv4i8_nxv4f32 = uitofp undef to %nxv4i8_nxv4f64 = uitofp undef to - %nxv4i16_nxv4f16 = uitofp undef to %nxv4i16_nxv4f32 = uitofp undef to %nxv4i16_nxv4f64 = uitofp undef to - %nxv4i32_nxv4f16 = uitofp undef to %nxv4i32_nxv4f32 = uitofp undef to %nxv4i32_nxv4f64 = uitofp undef to - %nxv4i64_nxv4f16 = uitofp undef to %nxv4i64_nxv4f32 = uitofp undef to %nxv4i64_nxv4f64 = uitofp undef to - %nxv4i1_nxv4f16 = uitofp undef to %nxv4i1_nxv4f32 = uitofp undef to %nxv4i1_nxv4f64 = uitofp undef to - %nxv8i8_nxv8f16 = uitofp undef to %nxv8i8_nxv8f32 = uitofp undef to %nxv8i8_nxv8f64 = uitofp undef to - %nxv8i16_nxv8f16 = uitofp undef to %nxv8i16_nxv8f32 = uitofp undef to %nxv8i16_nxv8f64 = uitofp undef to - %nxv8i32_nxv8f16 = uitofp undef to %nxv8i32_nxv8f32 = uitofp undef to %nxv8i32_nxv8f64 = uitofp undef to - %nxv8i64_nxv8f16 = uitofp undef to %nxv8i64_nxv8f32 = uitofp undef to %nxv8i64_nxv8f64 = uitofp undef to - %nxv8i1_nxv8f16 = uitofp undef to %nxv8i1_nxv8f32 = uitofp undef to %nxv8i1_nxv8f64 = uitofp undef to - %nxv16i8_nxv16f16 = uitofp undef to %nxv16i8_nxv16f32 = uitofp undef to %nxv16i8_nxv16f64 = uitofp undef to - %nxv16i16_nxv16f16 = uitofp undef to %nxv16i16_nxv16f32 = uitofp undef to %nxv16i16_nxv16f64 = uitofp undef to - %nxv16i32_nxv16f16 = uitofp undef to %nxv16i32_nxv16f32 = uitofp undef to %nxv16i32_nxv16f64 = uitofp undef to - %nxv16i64_nxv16f16 = uitofp undef to %nxv16i64_nxv16f32 = uitofp undef to %nxv16i64_nxv16f64 = uitofp undef to - %nxv16i1_nxv16f16 = uitofp undef to %nxv16i1_nxv16f32 = uitofp undef to %nxv16i1_nxv16f64 = uitofp undef to - %nxv32i8_nxv32f16 = uitofp undef to %nxv32i8_nxv32f32 = uitofp undef to %nxv32i8_nxv32f64 = uitofp undef to - %nxv32i16_nxv32f16 = uitofp undef to %nxv32i16_nxv32f32 = uitofp undef to %nxv32i16_nxv32f64 = uitofp undef to - %nxv32i32_nxv32f16 = uitofp undef to %nxv32i32_nxv32f32 = uitofp undef to %nxv32i32_nxv32f64 = uitofp undef to - %nxv32i64_nxv32f16 = uitofp undef to %nxv32i64_nxv32f32 = uitofp undef to %nxv32i64_nxv32f64 = uitofp undef to - %nxv32i1_nxv32f16 = uitofp undef to %nxv32i1_nxv32f32 = uitofp undef to %nxv32i1_nxv32f64 = uitofp undef to - %nxv64i8_nxv64f16 = uitofp undef to %nxv64i8_nxv64f32 = uitofp undef to %nxv64i8_nxv64f64 = uitofp undef to - %nxv64i16_nxv64f16 = uitofp undef to %nxv64i16_nxv64f32 = uitofp undef to %nxv64i16_nxv64f64 = uitofp undef to - %nxv64i32_nxv64f16 = uitofp undef to %nxv64i32_nxv64f32 = uitofp undef to %nxv64i32_nxv64f64 = uitofp undef to - %nxv64i64_nxv64f16 = uitofp undef to %nxv64i64_nxv64f32 = uitofp undef to %nxv64i64_nxv64f64 = uitofp undef to - %nxv64i1_nxv64f16 = uitofp undef to %nxv64i1_nxv64f32 = uitofp undef to %nxv64i1_nxv64f64 = uitofp undef to diff --git a/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll b/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll index 2dc6737a3d8a07..6c1bfb72c85967 100644 --- a/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/icmp-codesize.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse3 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F @@ -11,8 +11,8 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 ; -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=slm | FileCheck %s --check-prefixes=SSE4,SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=goldmont | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { @@ -39,6 +39,29 @@ define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_eq' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_eq' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8 @@ -251,6 +274,29 @@ define i32 @cmp_int_ne(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ne' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ne' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8 @@ -463,6 +509,29 @@ define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sge' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sge' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8 @@ -652,6 +721,52 @@ define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @cmp_int_uge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'cmp_int_uge' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> %argv64i8, %argv64i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> %argv128i8, %argv128i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 %arg16, %arg16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> %argv32i16, %argv32i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> %argv64i16, %argv64i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 %arg32, %arg32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> %argv16i32, %argv16i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> %argv32i32, %argv32i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 %arg64, %arg64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> %argv8i64, %argv8i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> %argv16i64, %argv16i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'cmp_int_uge' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_uge' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 @@ -864,6 +979,29 @@ define i32 @cmp_int_sgt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sgt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sgt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8 @@ -1076,6 +1214,29 @@ define i32 @cmp_int_ugt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ugt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ugt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8 @@ -1288,6 +1449,29 @@ define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sle' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sle' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8 @@ -1477,6 +1661,52 @@ define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @cmp_int_ule(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'cmp_int_ule' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> %argv64i8, %argv64i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> %argv128i8, %argv128i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 %arg16, %arg16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> %argv32i16, %argv32i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> %argv64i16, %argv64i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 %arg32, %arg32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> %argv16i32, %argv16i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> %argv32i32, %argv32i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 %arg64, %arg64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> %argv8i64, %argv8i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> %argv16i64, %argv16i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'cmp_int_ule' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ule' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 @@ -1689,6 +1919,29 @@ define i32 @cmp_int_slt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_slt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_slt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8 @@ -1901,6 +2154,29 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ult' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ult' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8 @@ -2090,6 +2366,52 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'scmp_int' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'scmp_int' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX1-LABEL: 'scmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) @@ -2256,6 +2578,52 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i } define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'ucmp_int' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'ucmp_int' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX1-LABEL: 'ucmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) @@ -2421,3 +2789,5 @@ define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i ret i32 undef } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SSE4: {{.*}} diff --git a/llvm/test/Analysis/CostModel/X86/icmp-latency.ll b/llvm/test/Analysis/CostModel/X86/icmp-latency.ll index 726a6dd782a4f7..efa903ea2819c8 100644 --- a/llvm/test/Analysis/CostModel/X86/icmp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/icmp-latency.ll @@ -2,8 +2,8 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F @@ -12,7 +12,7 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 ; ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=slm | FileCheck %s --check-prefixes=SLM -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { @@ -39,6 +39,29 @@ define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp eq <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_eq' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp eq <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp eq <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp eq <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp eq <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_eq' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8 @@ -274,6 +297,29 @@ define i32 @cmp_int_ne(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ne <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ne' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp ne <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp ne <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp ne <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp ne <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ne' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8 @@ -509,6 +555,29 @@ define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sge <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sge' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sge <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sge <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sge <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sge <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sge' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8 @@ -721,6 +790,52 @@ define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @cmp_int_uge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'cmp_int_uge' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> %argv64i8, %argv64i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> %argv128i8, %argv128i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 %arg16, %arg16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> %argv32i16, %argv32i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> %argv64i16, %argv64i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 %arg32, %arg32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> %argv16i32, %argv16i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> %argv32i32, %argv32i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 %arg64, %arg64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp uge <8 x i64> %argv8i64, %argv8i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp uge <16 x i64> %argv16i64, %argv16i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'cmp_int_uge' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp uge <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp uge <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_uge' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 @@ -956,6 +1071,29 @@ define i32 @cmp_int_sgt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp sgt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sgt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp sgt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp sgt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp sgt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp sgt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sgt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8 @@ -1191,6 +1329,29 @@ define i32 @cmp_int_ugt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ugt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ugt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ugt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ugt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ugt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ugt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ugt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8 @@ -1426,6 +1587,29 @@ define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sle <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sle' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sle <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sle <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sle <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sle <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sle' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8 @@ -1638,6 +1822,52 @@ define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @cmp_int_ule(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'cmp_int_ule' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> %argv64i8, %argv64i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> %argv128i8, %argv128i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 %arg16, %arg16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> %argv32i16, %argv32i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> %argv64i16, %argv64i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 %arg32, %arg32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> %argv16i32, %argv16i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> %argv32i32, %argv32i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 %arg64, %arg64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ule <8 x i64> %argv8i64, %argv8i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ule <16 x i64> %argv16i64, %argv16i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'cmp_int_ule' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ule <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ule <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ule' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 @@ -1873,6 +2103,29 @@ define i32 @cmp_int_slt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp slt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_slt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = icmp slt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = icmp slt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = icmp slt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I64 = icmp slt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_slt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8 @@ -2108,6 +2361,29 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ult <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ult' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ult <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ult <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ult <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ult <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ult' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8 @@ -2320,28 +2596,51 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { -; SSE42-LABEL: 'scmp_int' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE2-LABEL: 'scmp_int' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE4-LABEL: 'scmp_int' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'scmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) @@ -2532,28 +2831,51 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i } define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { -; SSE42-LABEL: 'ucmp_int' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; SSE2-LABEL: 'ucmp_int' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE4-LABEL: 'ucmp_int' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; AVX1-LABEL: 'ucmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) diff --git a/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll index 0deaad5991fb2f..4fc7c68be26f78 100644 --- a/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/icmp-sizelatency.ll @@ -2,8 +2,8 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse3 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F @@ -11,8 +11,8 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOPAVX1 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOPAVX2 ; -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=slm | FileCheck %s --check-prefixes=SSE4,SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=goldmont | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { @@ -39,6 +39,29 @@ define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_eq' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp eq <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp eq <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp eq <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp eq i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp eq <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp eq <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp eq <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp eq i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp eq <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp eq <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp eq <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp eq i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp eq <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp eq <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp eq <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp eq <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_eq' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp eq i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> %argv16i8, %argv16i8 @@ -251,6 +274,29 @@ define i32 @cmp_int_ne(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ne' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ne <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ne <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ne <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ne i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ne <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ne <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ne <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ne <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ne i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ne <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ne <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ne <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ne <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ne i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp ne <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp ne <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp ne <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp ne <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ne' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ne i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ne <16 x i8> %argv16i8, %argv16i8 @@ -463,6 +509,29 @@ define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sge' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sge <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sge <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sge <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sge i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sge <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sge <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sge <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sge <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sge i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sge <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sge <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sge <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sge <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sge i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sge <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sge <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sge <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sge <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sge' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sge i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sge <16 x i8> %argv16i8, %argv16i8 @@ -652,6 +721,52 @@ define i32 @cmp_int_sge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @cmp_int_uge(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'cmp_int_uge' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> %argv64i8, %argv64i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> %argv128i8, %argv128i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 %arg16, %arg16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> %argv32i16, %argv32i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> %argv64i16, %argv64i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 %arg32, %arg32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp uge <16 x i32> %argv16i32, %argv16i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp uge <32 x i32> %argv32i32, %argv32i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 %arg64, %arg64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> %argv8i64, %argv8i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> %argv16i64, %argv16i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'cmp_int_uge' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp uge <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp uge <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp uge <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp uge i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp uge <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp uge <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp uge <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp uge <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp uge i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp uge <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp uge <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp uge <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp uge <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp uge i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp uge <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp uge <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp uge <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp uge <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_uge' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp uge i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp uge <16 x i8> %argv16i8, %argv16i8 @@ -864,6 +979,29 @@ define i32 @cmp_int_sgt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sgt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp sgt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp sgt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp sgt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sgt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp sgt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp sgt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp sgt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp sgt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sgt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp sgt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp sgt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp sgt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp sgt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sgt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp sgt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp sgt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp sgt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp sgt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sgt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sgt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp sgt <16 x i8> %argv16i8, %argv16i8 @@ -1076,6 +1214,29 @@ define i32 @cmp_int_ugt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ugt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ugt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ugt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ugt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ugt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ugt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ugt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ugt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ugt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ugt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ugt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ugt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ugt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ugt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ugt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ugt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ugt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ugt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ugt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ugt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ugt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ugt <16 x i8> %argv16i8, %argv16i8 @@ -1288,6 +1449,29 @@ define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_sle' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp sle <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp sle <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp sle <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp sle i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp sle <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp sle <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp sle <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp sle <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp sle i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp sle <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp sle <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp sle <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp sle <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp sle i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = icmp sle <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = icmp sle <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = icmp sle <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = icmp sle <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_sle' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp sle i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp sle <16 x i8> %argv16i8, %argv16i8 @@ -1477,6 +1661,52 @@ define i32 @cmp_int_sle(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @cmp_int_ule(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'cmp_int_ule' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> %argv64i8, %argv64i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> %argv128i8, %argv128i8 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 %arg16, %arg16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> %argv32i16, %argv32i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> %argv64i16, %argv64i16 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 %arg32, %arg32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = icmp ule <16 x i32> %argv16i32, %argv16i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32I32 = icmp ule <32 x i32> %argv32i32, %argv32i32 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 %arg64, %arg64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> %argv8i64, %argv8i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> %argv16i64, %argv16i64 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'cmp_int_ule' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp ule <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp ule <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp ule <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ule i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = icmp ule <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp ule <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp ule <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp ule <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ule i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = icmp ule <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp ule <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp ule <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp ule <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ule i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = icmp ule <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = icmp ule <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = icmp ule <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = icmp ule <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ule' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ule i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = icmp ule <16 x i8> %argv16i8, %argv16i8 @@ -1689,6 +1919,29 @@ define i32 @cmp_int_slt(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_slt' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = icmp slt <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = icmp slt <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128I8 = icmp slt <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp slt i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp slt <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = icmp slt <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I16 = icmp slt <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I16 = icmp slt <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp slt i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp slt <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = icmp slt <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = icmp slt <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I32 = icmp slt <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp slt i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = icmp slt <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = icmp slt <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = icmp slt <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I64 = icmp slt <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_slt' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp slt i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp slt <16 x i8> %argv16i8, %argv16i8 @@ -1901,6 +2154,29 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> %argv16i64, %argv16i64 ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; +; SSE41-LABEL: 'cmp_int_ult' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 %arg8, %arg8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = icmp ult <32 x i8> %argv32i8, %argv32i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = icmp ult <64 x i8> %argv64i8, %argv64i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V128I8 = icmp ult <128 x i8> %argv128i8, %argv128i8 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = icmp ult i16 %arg16, %arg16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = icmp ult <8 x i16> %argv8i16, %argv8i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = icmp ult <16 x i16> %argv16i16, %argv16i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = icmp ult <32 x i16> %argv32i16, %argv32i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I16 = icmp ult <64 x i16> %argv64i16, %argv64i16 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = icmp ult i32 %arg32, %arg32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = icmp ult <4 x i32> %argv4i32, %argv4i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = icmp ult <8 x i32> %argv8i32, %argv8i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I32 = icmp ult <16 x i32> %argv16i32, %argv16i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32I32 = icmp ult <32 x i32> %argv32i32, %argv32i32 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = icmp ult i64 %arg64, %arg64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = icmp ult <2 x i64> %argv2i64, %argv2i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = icmp ult <4 x i64> %argv4i64, %argv4i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = icmp ult <8 x i64> %argv8i64, %argv8i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I64 = icmp ult <16 x i64> %argv16i64, %argv16i64 +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; SSE42-LABEL: 'cmp_int_ult' ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = icmp ult i8 %arg8, %arg8 ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = icmp ult <16 x i8> %argv16i8, %argv16i8 @@ -2090,6 +2366,52 @@ define i32 @cmp_int_ult(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 } define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'scmp_int' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'scmp_int' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX1-LABEL: 'scmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) @@ -2256,6 +2578,52 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i } define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { +; SSE2-LABEL: 'ucmp_int' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 624 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SSE41-LABEL: 'ucmp_int' +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; ; AVX1-LABEL: 'ucmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) @@ -2421,3 +2789,5 @@ define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i ret i32 undef } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; SSE4: {{.*}} diff --git a/llvm/test/Analysis/CostModel/X86/icmp.ll b/llvm/test/Analysis/CostModel/X86/icmp.ll index 599895c2b5705a..d8959a67145d63 100644 --- a/llvm/test/Analysis/CostModel/X86/icmp.ll +++ b/llvm/test/Analysis/CostModel/X86/icmp.ll @@ -2,8 +2,8 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE4,SSE41 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX1 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512F @@ -12,7 +12,7 @@ ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mattr=+xop,+avx2 | FileCheck %s -check-prefixes=XOPAVX2 ; ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=slm | FileCheck %s --check-prefixes=SLM -; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=goldmont | FileCheck %s --check-prefixes=SSE4,SSE42 ; RUN: opt < %s -mtriple=x86_64-- -passes="print" 2>&1 -disable-output -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1 define i32 @cmp_int_eq(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i8> %argv64i8, <128 x i8> %argv128i8, i16 %arg16, <8 x i16> %argv8i16, <16 x i16> %argv16i16, <32 x i16> %argv32i16, <64 x i16> %argv64i16, i32 %arg32, <4 x i32> %argv4i32, <8 x i32> %argv8i32, <16 x i32> %argv16i32, <32 x i32> %argv32i32, i64 %arg64, <2 x i64> %argv2i64, <4 x i64> %argv4i64, <8 x i64> %argv8i64, <16 x i64> %argv16i64) { @@ -3125,51 +3125,28 @@ define i32 @scmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; SSE41-LABEL: 'scmp_int' -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'scmp_int' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE4-LABEL: 'scmp_int' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.scmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.scmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.scmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.scmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.scmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.scmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.scmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.scmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.scmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.scmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.scmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.scmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.scmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.scmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.scmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.scmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.scmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.scmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.scmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'scmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.scmp.i8.i8(i8 %arg8, i8 %arg8) @@ -3429,51 +3406,28 @@ define i32 @ucmp_int(i8 %arg8, <16 x i8> %argv16i8, <32 x i8> %argv32i8, <64 x i ; SSSE3-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; -; SSE41-LABEL: 'ucmp_int' -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) -; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) -; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef -; -; SSE42-LABEL: 'ucmp_int' -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) -; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) -; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef +; SSE4-LABEL: 'ucmp_int' +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.ucmp.v16i8.v16i8(<16 x i8> %argv16i8, <16 x i8> %argv16i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.ucmp.v32i8.v32i8(<32 x i8> %argv32i8, <32 x i8> %argv32i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.ucmp.v64i8.v64i8(<64 x i8> %argv64i8, <64 x i8> %argv64i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %V128I8 = call <128 x i8> @llvm.ucmp.v128i8.v128i8(<128 x i8> %argv128i8, <128 x i8> %argv128i8) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.ucmp.i16.i16(i16 %arg16, i16 %arg16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.ucmp.v8i16.v8i16(<8 x i16> %argv8i16, <8 x i16> %argv8i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.ucmp.v16i16.v16i16(<16 x i16> %argv16i16, <16 x i16> %argv16i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ucmp.v32i16.v32i16(<32 x i16> %argv32i16, <32 x i16> %argv32i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I16 = call <64 x i16> @llvm.ucmp.v64i16.v64i16(<64 x i16> %argv64i16, <64 x i16> %argv64i16) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.ucmp.i32.i32(i32 %arg32, i32 %arg32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.ucmp.v4i32.v4i32(<4 x i32> %argv4i32, <4 x i32> %argv4i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.ucmp.v8i32.v8i32(<8 x i32> %argv8i32, <8 x i32> %argv8i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.ucmp.v16i32.v16i32(<16 x i32> %argv16i32, <16 x i32> %argv16i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I32 = call <32 x i32> @llvm.ucmp.v32i32.v32i32(<32 x i32> %argv32i32, <32 x i32> %argv32i32) +; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.ucmp.i64.i64(i64 %arg64, i64 %arg64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.ucmp.v2i64.v2i64(<2 x i64> %argv2i64, <2 x i64> %argv2i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.ucmp.v4i64.v4i64(<4 x i64> %argv4i64, <4 x i64> %argv4i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.ucmp.v8i64.v8i64(<8 x i64> %argv8i64, <8 x i64> %argv8i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I64 = call <16 x i64> @llvm.ucmp.v16i64.v16i64(<16 x i64> %argv16i64, <16 x i64> %argv16i64) +; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX1-LABEL: 'ucmp_int' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.ucmp.i8.i8(i8 %arg8, i8 %arg8) diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 835622276ef279..4402289ac170d9 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -511,12 +511,6 @@ define void @f92() sanitize_realtime ret void; } -; CHECK: define void @f93() #54 -define void @f93() nosanitize_realtime -{ - ret void; -} - ; CHECK: define void @f87() [[FNRETTHUNKEXTERN:#[0-9]+]] define void @f87() fn_ret_thunk_extern { ret void } @@ -612,7 +606,6 @@ define void @initializes(ptr initializes((-4, 0), (4, 8)) %a) { ; CHECK: attributes #51 = { uwtable(sync) } ; CHECK: attributes #52 = { nosanitize_bounds } ; CHECK: attributes #53 = { sanitize_realtime } -; CHECK: attributes #54 = { nosanitize_realtime } ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern } ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile } ; CHECK: attributes [[OPTDEBUG]] = { optdebug } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index c401cde8e146e7..fd60c49a4be39b 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1562,7 +1562,7 @@ exit: ; CHECK: select <2 x i1> , <2 x i8> , <2 x i8> call void @f.nobuiltin() builtin - ; CHECK: call void @f.nobuiltin() #54 + ; CHECK: call void @f.nobuiltin() #53 call fastcc noalias ptr @f.noalias() noinline ; CHECK: call fastcc noalias ptr @f.noalias() #12 @@ -1992,9 +1992,6 @@ declare void @f.sanitize_numerical_stability() sanitize_numerical_stability declare void @f.sanitize_realtime() sanitize_realtime ; CHECK: declare void @f.sanitize_realtime() #52 -declare void @f.nosanitize_realtime() nosanitize_realtime -; CHECK: declare void @f.nosanitize_realtime() #53 - ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) @@ -2118,8 +2115,7 @@ define float @nofpclass_callsites(float %arg) { ; CHECK: attributes #50 = { allockind("alloc,uninitialized") } ; CHECK: attributes #51 = { sanitize_numerical_stability } ; CHECK: attributes #52 = { sanitize_realtime } -; CHECK: attributes #53 = { nosanitize_realtime } -; CHECK: attributes #54 = { builtin } +; CHECK: attributes #53 = { builtin } ;; Metadata diff --git a/llvm/test/CodeGen/AVR/jmp.ll b/llvm/test/CodeGen/AVR/jmp.ll new file mode 100644 index 00000000000000..95dfff4836b4e8 --- /dev/null +++ b/llvm/test/CodeGen/AVR/jmp.ll @@ -0,0 +1,25 @@ +; RUN: llc -filetype=obj -mtriple=avr < %s | llvm-objdump -dr --no-show-raw-insn - | FileCheck %s + +define i8 @foo(i8 %a) { +bb0: + %0 = tail call i8 @bar(i8 %a) + %1 = icmp eq i8 %0, 123 + br i1 %1, label %bb1, label %bb2 + +bb1: + ret i8 100 + +bb2: + ret i8 200 +} + +declare i8 @bar(i8); + +; CHECK: rcall .-2 +; CHECK-NEXT: 00000000: R_AVR_13_PCREL bar +; CHECK-NEXT: cpi r24, 0x7b +; CHECK-NEXT: brne .+4 +; CHECK-NEXT: ldi r24, 0x64 +; CHECK-NEXT: ret +; CHECK-NEXT: ldi r24, 0xc8 +; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/BPF/atomics.ll b/llvm/test/CodeGen/BPF/atomics.ll index 0c16c49f2a873b..c17b94af5f7bd9 100644 --- a/llvm/test/CodeGen/BPF/atomics.ll +++ b/llvm/test/CodeGen/BPF/atomics.ll @@ -1,10 +1,11 @@ -; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck --check-prefixes=CHECK,CHECK-V2 %s -; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefixes=CHECK,CHECK-V3 %s +; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s +; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s ; CHECK-LABEL: test_load_add_32 -; CHECK-V2: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2) +; CHECK: lock *(u32 *)(r1 + 0) += r2 +; CHECK: encoding: [0xc3,0x21 ; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) -; CHECK: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00] define void @test_load_add_32(ptr %p, i32 zeroext %v) { entry: atomicrmw add ptr %p, i32 %v seq_cst @@ -12,8 +13,10 @@ entry: } ; CHECK-LABEL: test_load_add_64 -; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) -; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00] +; CHECK: lock *(u64 *)(r1 + 0) += r2 +; CHECK: encoding: [0xdb,0x21 +; CHECK-V3: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00] define void @test_load_add_64(ptr %p, i64 zeroext %v) { entry: atomicrmw add ptr %p, i64 %v seq_cst diff --git a/llvm/test/CodeGen/BPF/atomics_2.ll b/llvm/test/CodeGen/BPF/atomics_2.ll index c670ddb05b6a77..6371e3b875638e 100644 --- a/llvm/test/CodeGen/BPF/atomics_2.ll +++ b/llvm/test/CodeGen/BPF/atomics_2.ll @@ -224,7 +224,7 @@ entry: } ; CHECK-LABEL: test_atomic_xor_64 -; CHECK: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2) +; CHECK: atomic_fetch_xor((u64 *)(r1 + 0), r2) ; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00] ; CHECK: w0 = 0 define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr { diff --git a/llvm/test/CodeGen/BPF/objdump_atomics.ll b/llvm/test/CodeGen/BPF/objdump_atomics.ll index c4cb16b2c36418..fcc889ba300e39 100644 --- a/llvm/test/CodeGen/BPF/objdump_atomics.ll +++ b/llvm/test/CodeGen/BPF/objdump_atomics.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: test_load_add_32 ; CHECK: c3 21 -; CHECK: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) +; CHECK: lock *(u32 *)(r1 + 0) += w2 define void @test_load_add_32(ptr %p, i32 zeroext %v) { entry: atomicrmw add ptr %p, i32 %v seq_cst @@ -11,7 +11,7 @@ entry: ; CHECK-LABEL: test_load_add_64 ; CHECK: db 21 -; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2) +; CHECK: lock *(u64 *)(r1 + 0) += r2 define void @test_load_add_64(ptr %p, i64 zeroext %v) { entry: atomicrmw add ptr %p, i64 %v seq_cst diff --git a/llvm/test/CodeGen/BPF/xadd.ll b/llvm/test/CodeGen/BPF/xadd.ll new file mode 100644 index 00000000000000..5aeeb9baf7b892 --- /dev/null +++ b/llvm/test/CodeGen/BPF/xadd.ll @@ -0,0 +1,59 @@ +; RUN: not llc -march=bpfel < %s 2>&1 | FileCheck %s +; RUN: not llc -march=bpfeb < %s 2>&1 | FileCheck %s + +; This file is generated with the source command and source +; $ clang -target bpf -O2 -g -S -emit-llvm t.c +; $ cat t.c +; int test(int *ptr) { +; int r; +; __sync_fetch_and_add(ptr, 4); +; r = __sync_fetch_and_add(ptr, 6); +; return r; +; } + +; ModuleID = 't.c' +source_filename = "t.c" +target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128" +target triple = "bpf" + +; Function Attrs: nounwind +define dso_local i32 @test(ptr nocapture %ptr) local_unnamed_addr #0 !dbg !7 { +entry: + call void @llvm.dbg.value(metadata ptr %ptr, metadata !13, metadata !DIExpression()), !dbg !15 + %0 = atomicrmw add ptr %ptr, i32 4 seq_cst, !dbg !16 + %1 = atomicrmw add ptr %ptr, i32 6 seq_cst, !dbg !17 +; CHECK: in function test i32 (ptr): Invalid usage of the XADD return value + call void @llvm.dbg.value(metadata i32 %1, metadata !14, metadata !DIExpression()), !dbg !18 + ret i32 %1, !dbg !19 +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 8.0.0 (trunk 342605) (llvm/trunk 342612)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "t.c", directory: "/home/yhs/work/tests/llvm/sync/test1") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 8.0.0 (trunk 342605) (llvm/trunk 342612)"} +!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "ptr", arg: 1, scope: !7, file: !1, line: 1, type: !11) +!14 = !DILocalVariable(name: "r", scope: !7, file: !1, line: 2, type: !10) +!15 = !DILocation(line: 1, column: 15, scope: !7) +!16 = !DILocation(line: 3, column: 4, scope: !7) +!17 = !DILocation(line: 4, column: 8, scope: !7) +!18 = !DILocation(line: 2, column: 8, scope: !7) +!19 = !DILocation(line: 5, column: 4, scope: !7) diff --git a/llvm/test/CodeGen/BPF/xadd_legal.ll b/llvm/test/CodeGen/BPF/xadd_legal.ll index 88f04d85a779f8..9b07afade3fee9 100644 --- a/llvm/test/CodeGen/BPF/xadd_legal.ll +++ b/llvm/test/CodeGen/BPF/xadd_legal.ll @@ -19,7 +19,7 @@ define dso_local i32 @test(ptr nocapture %ptr, i64 %a) { entry: %conv = trunc i64 %a to i32 %0 = atomicrmw add ptr %ptr, i32 %conv seq_cst -; CHECK-64: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2) +; CHECK-64: lock *(u32 *)(r1 + 0) += r2 ; CHECK-32: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2) %1 = load i32, ptr %ptr, align 4 ret i32 %1 diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index f17cec231f3236..3efdd08bbea4c4 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -338,14 +338,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; LA64-NEXT: srli.d $a1, $a0, 1 ; LA64-NEXT: lu12i.w $a2, 349525 ; LA64-NEXT: ori $a2, $a2, 1365 -; LA64-NEXT: lu32i.d $a2, 349525 -; LA64-NEXT: lu52i.d $a2, $a2, 1365 +; LA64-NEXT: bstrins.d $a2, $a2, 62, 32 ; LA64-NEXT: and $a1, $a1, $a2 ; LA64-NEXT: sub.d $a0, $a0, $a1 ; LA64-NEXT: lu12i.w $a1, 209715 ; LA64-NEXT: ori $a1, $a1, 819 -; LA64-NEXT: lu32i.d $a1, 209715 -; LA64-NEXT: lu52i.d $a1, $a1, 819 +; LA64-NEXT: bstrins.d $a1, $a1, 61, 32 ; LA64-NEXT: and $a2, $a0, $a1 ; LA64-NEXT: srli.d $a0, $a0, 2 ; LA64-NEXT: and $a0, $a0, $a1 @@ -354,13 +352,11 @@ define i64 @test_ctpop_i64(i64 %a) nounwind { ; LA64-NEXT: add.d $a0, $a0, $a1 ; LA64-NEXT: lu12i.w $a1, 61680 ; LA64-NEXT: ori $a1, $a1, 3855 -; LA64-NEXT: lu32i.d $a1, -61681 -; LA64-NEXT: lu52i.d $a1, $a1, 240 +; LA64-NEXT: bstrins.d $a1, $a1, 59, 32 ; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: lu12i.w $a1, 4112 ; LA64-NEXT: ori $a1, $a1, 257 -; LA64-NEXT: lu32i.d $a1, 65793 -; LA64-NEXT: lu52i.d $a1, $a1, 16 +; LA64-NEXT: bstrins.d $a1, $a1, 56, 32 ; LA64-NEXT: mul.d $a0, $a0, $a1 ; LA64-NEXT: srli.d $a0, $a0, 56 ; LA64-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/imm.ll b/llvm/test/CodeGen/LoongArch/imm.ll index f84fddaec66b9f..aca508e99fb960 100644 --- a/llvm/test/CodeGen/LoongArch/imm.ll +++ b/llvm/test/CodeGen/LoongArch/imm.ll @@ -47,8 +47,7 @@ define i64 @imm0008000000000fff() { ; CHECK-LABEL: imm0008000000000fff: ; CHECK: # %bb.0: ; CHECK-NEXT: ori $a0, $zero, 4095 -; CHECK-NEXT: lu32i.d $a0, -524288 -; CHECK-NEXT: lu52i.d $a0, $a0, 0 +; CHECK-NEXT: bstrins.d $a0, $a0, 51, 51 ; CHECK-NEXT: ret ret i64 2251799813689343 } @@ -164,3 +163,59 @@ define i64 @imm0008000080000800() { ; CHECK-NEXT: ret ret i64 2251801961170944 } + +define i64 @imm14000000a() { +; CHECK-LABEL: imm14000000a: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $a0, $zero, 10 +; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29 +; CHECK-NEXT: ret + ret i64 5368709130 +} + +define i64 @imm0fff000000000fff() { +; CHECK-LABEL: imm0fff000000000fff: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $a0, $zero, 4095 +; CHECK-NEXT: bstrins.d $a0, $a0, 59, 48 +; CHECK-NEXT: ret + ret i64 1152640029630140415 +} + +define i64 @immffecffffffffffec() { +; CHECK-LABEL: immffecffffffffffec: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a0, $zero, -20 +; CHECK-NEXT: bstrins.d $a0, $a0, 52, 48 +; CHECK-NEXT: ret + ret i64 -5348024557502484 +} + +define i64 @imm1c000000700000() { +; CHECK-LABEL: imm1c000000700000: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a0, 1792 +; CHECK-NEXT: bstrins.d $a0, $a0, 52, 30 +; CHECK-NEXT: ret + ret i64 7881299355238400 +} + +define i64 @immf0f0f0f0f0f0f0f0() { +; CHECK-LABEL: immf0f0f0f0f0f0f0f0: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a0, -61681 +; CHECK-NEXT: ori $a0, $a0, 240 +; CHECK-NEXT: bstrins.d $a0, $a0, 59, 32 +; CHECK-NEXT: ret + ret i64 -1085102592571150096 +} + +define i64 @imm110000014000000a() { +; CHECK-LABEL: imm110000014000000a: +; CHECK: # %bb.0: +; CHECK-NEXT: ori $a0, $zero, 10 +; CHECK-NEXT: lu52i.d $a0, $a0, 272 +; CHECK-NEXT: bstrins.d $a0, $a0, 32, 29 +; CHECK-NEXT: ret + ret i64 1224979104013484042 +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll index 772ae8d81a88bf..9654542f877459 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store.ll @@ -973,9 +973,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind { ; LA64NOPIC-LABEL: ld_sd_constant: ; LA64NOPIC: # %bb.0: ; LA64NOPIC-NEXT: lu12i.w $a1, -136485 -; LA64NOPIC-NEXT: ori $a1, $a1, 3823 -; LA64NOPIC-NEXT: lu32i.d $a1, -147729 -; LA64NOPIC-NEXT: lu52i.d $a2, $a1, -534 +; LA64NOPIC-NEXT: ori $a2, $a1, 3823 +; LA64NOPIC-NEXT: bstrins.d $a2, $a2, 61, 32 ; LA64NOPIC-NEXT: ld.d $a1, $a2, 0 ; LA64NOPIC-NEXT: st.d $a0, $a2, 0 ; LA64NOPIC-NEXT: move $a0, $a1 @@ -984,9 +983,8 @@ define i64 @ld_sd_constant(i64 %a) nounwind { ; LA64PIC-LABEL: ld_sd_constant: ; LA64PIC: # %bb.0: ; LA64PIC-NEXT: lu12i.w $a1, -136485 -; LA64PIC-NEXT: ori $a1, $a1, 3823 -; LA64PIC-NEXT: lu32i.d $a1, -147729 -; LA64PIC-NEXT: lu52i.d $a2, $a1, -534 +; LA64PIC-NEXT: ori $a2, $a1, 3823 +; LA64PIC-NEXT: bstrins.d $a2, $a2, 61, 32 ; LA64PIC-NEXT: ld.d $a1, $a2, 0 ; LA64PIC-NEXT: st.d $a0, $a2, 0 ; LA64PIC-NEXT: move $a0, $a1 diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll index 1e7a79beb62c61..323858c7613a67 100644 --- a/llvm/test/CodeGen/LoongArch/merge-base-offset.ll +++ b/llvm/test/CodeGen/LoongArch/merge-base-offset.ll @@ -1128,8 +1128,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind { ; LA64-NEXT: addi.d $a0, $a0, %pc_lo12(g_a64) ; LA64-NEXT: lu12i.w $a1, 279556 ; LA64-NEXT: ori $a1, $a1, 1088 -; LA64-NEXT: lu32i.d $a1, 17472 -; LA64-NEXT: lu52i.d $a1, $a1, 1092 +; LA64-NEXT: bstrins.d $a1, $a1, 62, 32 ; LA64-NEXT: add.d $a0, $a0, $a1 ; LA64-NEXT: ret ; @@ -1142,8 +1141,7 @@ define dso_local ptr @load_addr_offset_614750729487779976() nounwind { ; LA64-LARGE-NEXT: add.d $a0, $a1, $a0 ; LA64-LARGE-NEXT: lu12i.w $a1, 279556 ; LA64-LARGE-NEXT: ori $a1, $a1, 1088 -; LA64-LARGE-NEXT: lu32i.d $a1, 17472 -; LA64-LARGE-NEXT: lu52i.d $a1, $a1, 1092 +; LA64-LARGE-NEXT: bstrins.d $a1, $a1, 62, 32 ; LA64-LARGE-NEXT: add.d $a0, $a0, $a1 ; LA64-LARGE-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/sextw-removal.ll b/llvm/test/CodeGen/LoongArch/sextw-removal.ll index 2bb39395c1d1b6..7500b5ae09359a 100644 --- a/llvm/test/CodeGen/LoongArch/sextw-removal.ll +++ b/llvm/test/CodeGen/LoongArch/sextw-removal.ll @@ -323,21 +323,17 @@ define void @test7(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill ; CHECK-NEXT: sra.w $a0, $a0, $a1 ; CHECK-NEXT: lu12i.w $a1, 349525 -; CHECK-NEXT: ori $a1, $a1, 1365 -; CHECK-NEXT: lu32i.d $a1, 349525 -; CHECK-NEXT: lu52i.d $fp, $a1, 1365 +; CHECK-NEXT: ori $fp, $a1, 1365 +; CHECK-NEXT: bstrins.d $fp, $fp, 62, 32 ; CHECK-NEXT: lu12i.w $a1, 209715 -; CHECK-NEXT: ori $a1, $a1, 819 -; CHECK-NEXT: lu32i.d $a1, 209715 -; CHECK-NEXT: lu52i.d $s0, $a1, 819 +; CHECK-NEXT: ori $s0, $a1, 819 +; CHECK-NEXT: bstrins.d $s0, $s0, 61, 32 ; CHECK-NEXT: lu12i.w $a1, 61680 -; CHECK-NEXT: ori $a1, $a1, 3855 -; CHECK-NEXT: lu32i.d $a1, -61681 -; CHECK-NEXT: lu52i.d $s1, $a1, 240 +; CHECK-NEXT: ori $s1, $a1, 3855 +; CHECK-NEXT: bstrins.d $s1, $s1, 59, 32 ; CHECK-NEXT: lu12i.w $a1, 4112 -; CHECK-NEXT: ori $a1, $a1, 257 -; CHECK-NEXT: lu32i.d $a1, 65793 -; CHECK-NEXT: lu52i.d $s2, $a1, 16 +; CHECK-NEXT: ori $s2, $a1, 257 +; CHECK-NEXT: bstrins.d $s2, $s2, 56, 32 ; CHECK-NEXT: .p2align 4, , 16 ; CHECK-NEXT: .LBB6_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -374,21 +370,17 @@ define void @test7(i32 signext %arg, i32 signext %arg1) nounwind { ; NORMV-NEXT: st.d $s2, $sp, 8 # 8-byte Folded Spill ; NORMV-NEXT: sra.w $a0, $a0, $a1 ; NORMV-NEXT: lu12i.w $a1, 349525 -; NORMV-NEXT: ori $a1, $a1, 1365 -; NORMV-NEXT: lu32i.d $a1, 349525 -; NORMV-NEXT: lu52i.d $fp, $a1, 1365 +; NORMV-NEXT: ori $fp, $a1, 1365 +; NORMV-NEXT: bstrins.d $fp, $fp, 62, 32 ; NORMV-NEXT: lu12i.w $a1, 209715 -; NORMV-NEXT: ori $a1, $a1, 819 -; NORMV-NEXT: lu32i.d $a1, 209715 -; NORMV-NEXT: lu52i.d $s0, $a1, 819 +; NORMV-NEXT: ori $s0, $a1, 819 +; NORMV-NEXT: bstrins.d $s0, $s0, 61, 32 ; NORMV-NEXT: lu12i.w $a1, 61680 -; NORMV-NEXT: ori $a1, $a1, 3855 -; NORMV-NEXT: lu32i.d $a1, -61681 -; NORMV-NEXT: lu52i.d $s1, $a1, 240 +; NORMV-NEXT: ori $s1, $a1, 3855 +; NORMV-NEXT: bstrins.d $s1, $s1, 59, 32 ; NORMV-NEXT: lu12i.w $a1, 4112 -; NORMV-NEXT: ori $a1, $a1, 257 -; NORMV-NEXT: lu32i.d $a1, 65793 -; NORMV-NEXT: lu52i.d $s2, $a1, 16 +; NORMV-NEXT: ori $s2, $a1, 257 +; NORMV-NEXT: bstrins.d $s2, $s2, 56, 32 ; NORMV-NEXT: .p2align 4, , 16 ; NORMV-NEXT: .LBB6_1: # %bb2 ; NORMV-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll index c88b2bf596ca23..cccb69d2e6986a 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -161,10 +161,8 @@ define i32 @caller_half_in_regs() nounwind { ; RV64IF: # %bb.0: ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IF-NEXT: lui a0, 1048564 -; RV64IF-NEXT: fmv.w.x fa5, a0 -; RV64IF-NEXT: fmv.x.w a1, fa5 ; RV64IF-NEXT: li a0, 1 +; RV64IF-NEXT: lui a1, 1048564 ; RV64IF-NEXT: call callee_half_in_regs ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 @@ -511,9 +509,8 @@ define half @callee_half_ret() nounwind { ; ; RV64IF-LABEL: callee_half_ret: ; RV64IF: # %bb.0: -; RV64IF-NEXT: lui a0, %hi(.LCPI4_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI4_0)(a0) -; RV64IF-NEXT: fmv.x.w a0, fa5 +; RV64IF-NEXT: lui a0, 1048564 +; RV64IF-NEXT: addiw a0, a0, -1024 ; RV64IF-NEXT: ret ; ; RV32-ILP32F-LABEL: callee_half_ret: diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll index c38416d994ba57..69a506cd850f2c 100644 --- a/llvm/test/CodeGen/RISCV/float-imm.ll +++ b/llvm/test/CodeGen/RISCV/float-imm.ll @@ -24,8 +24,8 @@ define float @float_imm() nounwind { ; ; RV64ZFINX-LABEL: float_imm: ; RV64ZFINX: # %bb.0: -; RV64ZFINX-NEXT: lui a0, %hi(.LCPI0_0) -; RV64ZFINX-NEXT: lw a0, %lo(.LCPI0_0)(a0) +; RV64ZFINX-NEXT: lui a0, 263313 +; RV64ZFINX-NEXT: addiw a0, a0, -37 ; RV64ZFINX-NEXT: ret ret float 3.14159274101257324218750 } diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll index 9c11010540e15d..4c39885176f01a 100644 --- a/llvm/test/CodeGen/RISCV/half-imm.ll +++ b/llvm/test/CodeGen/RISCV/half-imm.ll @@ -15,10 +15,10 @@ ; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+zhinxmin -verify-machineinstrs \ ; RUN: -target-abi ilp32 < %s \ -; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN %s +; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN,RV32IZHINXMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+zhinxmin -verify-machineinstrs \ ; RUN: -target-abi lp64 < %s \ -; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN %s +; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN,RV64IZHINXMIN %s ; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh define half @half_imm() nounwind { @@ -30,14 +30,14 @@ define half @half_imm() nounwind { ; ; RV32IZHINX-LABEL: half_imm: ; RV32IZHINX: # %bb.0: -; RV32IZHINX-NEXT: lui a0, %hi(.LCPI0_0) -; RV32IZHINX-NEXT: lh a0, %lo(.LCPI0_0)(a0) +; RV32IZHINX-NEXT: lui a0, 4 +; RV32IZHINX-NEXT: addi a0, a0, 512 ; RV32IZHINX-NEXT: ret ; ; RV64IZHINX-LABEL: half_imm: ; RV64IZHINX: # %bb.0: -; RV64IZHINX-NEXT: lui a0, %hi(.LCPI0_0) -; RV64IZHINX-NEXT: lh a0, %lo(.LCPI0_0)(a0) +; RV64IZHINX-NEXT: lui a0, 4 +; RV64IZHINX-NEXT: addiw a0, a0, 512 ; RV64IZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: half_imm: @@ -46,11 +46,17 @@ define half @half_imm() nounwind { ; CHECKIZFHMIN-NEXT: flh fa0, %lo(.LCPI0_0)(a0) ; CHECKIZFHMIN-NEXT: ret ; -; CHECKIZHINXMIN-LABEL: half_imm: -; CHECKIZHINXMIN: # %bb.0: -; CHECKIZHINXMIN-NEXT: lui a0, %hi(.LCPI0_0) -; CHECKIZHINXMIN-NEXT: lh a0, %lo(.LCPI0_0)(a0) -; CHECKIZHINXMIN-NEXT: ret +; RV32IZHINXMIN-LABEL: half_imm: +; RV32IZHINXMIN: # %bb.0: +; RV32IZHINXMIN-NEXT: lui a0, 4 +; RV32IZHINXMIN-NEXT: addi a0, a0, 512 +; RV32IZHINXMIN-NEXT: ret +; +; RV64IZHINXMIN-LABEL: half_imm: +; RV64IZHINXMIN: # %bb.0: +; RV64IZHINXMIN-NEXT: lui a0, 4 +; RV64IZHINXMIN-NEXT: addiw a0, a0, 512 +; RV64IZHINXMIN-NEXT: ret ret half 3.0 } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 5d5807cbadbad5..4be680e272e5b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -524,8 +524,7 @@ define float @vreduce_fadd_v7f32_neutralstart_fast(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vfredusum.vs v8, v8, v10 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index ae3dce497c6d07..90a856605c70d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -19,12 +19,10 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfabs_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -39,12 +37,10 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfabs_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v9, v9 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v @@ -61,12 +57,10 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfabs_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v9, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -81,12 +75,10 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfabs_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v9, v9 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v @@ -103,12 +95,10 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfabs_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v10, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -123,12 +113,10 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfabs_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v @@ -145,12 +133,10 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vfabs_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfabs.v v12, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -165,12 +151,10 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfabs_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfabs.v v12, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index fbc4c56a911340..019923ffdfdedf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -19,12 +19,9 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfneg_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -39,12 +36,9 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfneg_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfneg.v v9, v9 -; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v @@ -61,12 +55,9 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfneg_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfneg.v v9, v9, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -81,12 +72,9 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfneg_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfneg.v v9, v9 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v @@ -103,12 +91,9 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfneg_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfneg.v v10, v10, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -123,12 +108,9 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ; ZVFHMIN-LABEL: vfneg_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfneg.v v10, v10 -; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v @@ -145,12 +127,9 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ; ZVFHMIN-LABEL: vfneg_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfneg.v v12, v12, v0.t -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -165,12 +144,9 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ; ZVFHMIN-LABEL: vfneg_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfneg.v v12, v12 -; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/reduce-vl-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/reduce-vl-peephole.ll new file mode 100644 index 00000000000000..7f70b0ed224ec0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/reduce-vl-peephole.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s + +define void @avl_not_dominated( %v, ptr %p) { +; CHECK-LABEL: avl_not_dominated: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: slli a1, a1, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 +; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: ret + %w = add %v, splat (i32 1) + %evl = extractelement %v, i32 0 + call void @llvm.vp.store( %w, ptr %p, splat(i1 true), i32 %evl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/reduce-vl-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/reduce-vl-peephole.mir new file mode 100644 index 00000000000000..5a223580821b75 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/reduce-vl-peephole.mir @@ -0,0 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole \ +# RUN: -verify-machineinstrs | FileCheck %s +--- +name: avl_not_dominated +body: | + bb.0: + ; CHECK-LABEL: name: avl_not_dominated + ; CHECK: %evl:gprnox0 = ADDI $x0, 1 + ; CHECK-NEXT: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, %evl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: PseudoVSE32_V_M1 %x, $noreg, %evl, 5 /* e32 */ + %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 5 /* e32 */, 0 /* tu, mu */ + %evl:gprnox0 = ADDI $x0, 1 + PseudoVSE32_V_M1 %x:vr, $noreg, %evl, 5 /* e32 */ +... diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll index 6e34d59a2d9894..e8a7d790758596 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll @@ -19,13 +19,12 @@ define @vfsgnj_vv_nxv1f16( %va, @llvm.vp.copysign.nxv1f16( %va, %vb, %m, i32 %evl) ret %v @@ -40,13 +39,12 @@ define @vfsgnj_vv_nxv1f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vfsgnj_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv1f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -63,13 +61,12 @@ define @vfsgnj_vv_nxv2f16( %va, @llvm.vp.copysign.nxv2f16( %va, %vb, %m, i32 %evl) ret %v @@ -84,13 +81,12 @@ define @vfsgnj_vv_nxv2f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vfsgnj_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v9, v9, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv2f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -107,13 +103,12 @@ define @vfsgnj_vv_nxv4f16( %va, @llvm.vp.copysign.nxv4f16( %va, %vb, %m, i32 %evl) ret %v @@ -128,13 +123,12 @@ define @vfsgnj_vv_nxv4f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vfsgnj_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v10, v12, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv4f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -151,13 +145,12 @@ define @vfsgnj_vv_nxv8f16( %va, @llvm.vp.copysign.nxv8f16( %va, %vb, %m, i32 %evl) ret %v @@ -172,13 +165,12 @@ define @vfsgnj_vv_nxv8f16_unmasked( %va, ; ; ZVFHMIN-LABEL: vfsgnj_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v12, v16, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v10, v10, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v10 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv8f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -195,13 +187,12 @@ define @vfsgnj_vv_nxv16f16( %va, @llvm.vp.copysign.nxv16f16( %va, %vb, %m, i32 %evl) ret %v @@ -216,13 +207,12 @@ define @vfsgnj_vv_nxv16f16_unmasked( %v ; ; ZVFHMIN-LABEL: vfsgnj_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v12, v12, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v12 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -239,48 +229,12 @@ define @vfsgnj_vv_nxv32f16( %va, @llvm.vp.copysign.nxv32f16( %va, %vb, %m, i32 %evl) ret %v @@ -295,48 +249,12 @@ define @vfsgnj_vv_nxv32f16_unmasked( %v ; ; ZVFHMIN-LABEL: vfsgnj_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a1, a2, 1 -; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: sltu a4, a0, a3 -; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: and a3, a4, a3 -; ZVFHMIN-NEXT: srli a2, a2, 2 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v24 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v16, v16, v24, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v16, v16, a1 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.copysign.nxv32f16( %va, %vb, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index 0f7e3f1e0ea5a2..b9be6eb1fa3737 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -19,12 +19,10 @@ define @vfabs_vv_nxv1f16( %va, @llvm.vp.fabs.nxv1f16( %va, %m, i32 %evl) ret %v @@ -39,12 +37,10 @@ define @vfabs_vv_nxv1f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfabs_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v9, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v @@ -61,12 +57,10 @@ define @vfabs_vv_nxv2f16( %va, @llvm.vp.fabs.nxv2f16( %va, %m, i32 %evl) ret %v @@ -81,12 +75,10 @@ define @vfabs_vv_nxv2f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfabs_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfabs.v v9, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v @@ -103,12 +95,10 @@ define @vfabs_vv_nxv4f16( %va, @llvm.vp.fabs.nxv4f16( %va, %m, i32 %evl) ret %v @@ -123,12 +113,10 @@ define @vfabs_vv_nxv4f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfabs_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfabs.v v10, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v @@ -145,12 +133,10 @@ define @vfabs_vv_nxv8f16( %va, @llvm.vp.fabs.nxv8f16( %va, %m, i32 %evl) ret %v @@ -165,12 +151,10 @@ define @vfabs_vv_nxv8f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfabs_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfabs.v v12, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v @@ -187,12 +171,10 @@ define @vfabs_vv_nxv16f16( %va, @llvm.vp.fabs.nxv16f16( %va, %m, i32 %evl) ret %v @@ -207,12 +189,10 @@ define @vfabs_vv_nxv16f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfabs_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v16, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v @@ -229,32 +209,10 @@ define @vfabs_vv_nxv32f16( %va, @llvm.vp.fabs.nxv32f16( %va, %m, i32 %evl) ret %v @@ -269,32 +227,10 @@ define @vfabs_vv_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfabs_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a1, a2, 1 -; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: sltu a4, a0, a3 -; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: and a3, a4, a3 -; ZVFHMIN-NEXT: srli a2, a2, 2 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v16, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v16, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fabs.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll index 69ea7ce33cf6b6..af2668a9b0c545 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -19,12 +19,9 @@ define @vfneg_vv_nxv1f16( %va, @llvm.vp.fneg.nxv1f16( %va, %m, i32 %evl) ret %v @@ -39,12 +36,9 @@ define @vfneg_vv_nxv1f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfneg_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfneg.v v9, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v @@ -61,12 +55,9 @@ define @vfneg_vv_nxv2f16( %va, @llvm.vp.fneg.nxv2f16( %va, %m, i32 %evl) ret %v @@ -81,12 +72,9 @@ define @vfneg_vv_nxv2f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfneg_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfneg.v v9, v9 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v @@ -103,12 +91,9 @@ define @vfneg_vv_nxv4f16( %va, @llvm.vp.fneg.nxv4f16( %va, %m, i32 %evl) ret %v @@ -123,12 +108,9 @@ define @vfneg_vv_nxv4f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfneg_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfneg.v v10, v10 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v @@ -145,12 +127,9 @@ define @vfneg_vv_nxv8f16( %va, @llvm.vp.fneg.nxv8f16( %va, %m, i32 %evl) ret %v @@ -165,12 +144,9 @@ define @vfneg_vv_nxv8f16_unmasked( %va, i ; ; ZVFHMIN-LABEL: vfneg_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfneg.v v12, v12 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v @@ -187,12 +163,9 @@ define @vfneg_vv_nxv16f16( %va, @llvm.vp.fneg.nxv16f16( %va, %m, i32 %evl) ret %v @@ -207,12 +180,9 @@ define @vfneg_vv_nxv16f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfneg_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfneg.v v16, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v @@ -229,32 +199,9 @@ define @vfneg_vv_nxv32f16( %va, @llvm.vp.fneg.nxv32f16( %va, %m, i32 %evl) ret %v @@ -269,32 +216,9 @@ define @vfneg_vv_nxv32f16_unmasked( %va ; ; ZVFHMIN-LABEL: vfneg_vv_nxv32f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a1, a2, 1 -; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: sltu a4, a0, a3 -; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: and a3, a4, a3 -; ZVFHMIN-NEXT: srli a2, a2, 2 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma -; ZVFHMIN-NEXT: vmset.m v16 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfneg.v v16, v16, v0.t -; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 -; ZVFHMIN-NEXT: # %bb.1: -; ZVFHMIN-NEXT: mv a0, a1 -; ZVFHMIN-NEXT: .LBB11_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfneg.v v16, v16 -; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 +; ZVFHMIN-NEXT: lui a1, 8 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.fneg.nxv32f16( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/WebAssembly/half-precision.ll b/llvm/test/CodeGen/WebAssembly/half-precision.ll index adba502335f86c..c0b14d2064d5eb 100644 --- a/llvm/test/CodeGen/WebAssembly/half-precision.ll +++ b/llvm/test/CodeGen/WebAssembly/half-precision.ll @@ -290,3 +290,21 @@ define <8 x i16> @trunc_sat_u_v8i16(<8 x half> %x) { %a = fptoui <8 x half> %x to <8 x i16> ret <8 x i16> %a } + +define <8 x i16> @trunc_sat_s_v8i16_sat(<8 x half> %x) { +; CHECK-LABEL: trunc_sat_s_v8i16_sat: +; CHECK: .functype trunc_sat_s_v8i16_sat (v128) -> (v128) +; CHECK-NEXT: i16x8.trunc_sat_f16x8_s $push0=, $0 +; CHECK-NEXT: return $pop[[R]]{{$}} + %a = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %x) + ret <8 x i16> %a +} + +define <8 x i16> @trunc_sat_u_v8i16_sat(<8 x half> %x) { +; CHECK-LABEL: trunc_sat_u_v8i16_sat: +; CHECK: .functype trunc_sat_u_v8i16_sat (v128) -> (v128) +; CHECK-NEXT: i16x8.trunc_sat_f16x8_u $push0=, $0 +; CHECK-NEXT: return $pop[[R]]{{$}} + %a = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %x) + ret <8 x i16> %a +} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll index 032168e28421b9..23b1043c700165 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll @@ -33,7 +33,7 @@ declare void @use32(ptr) ;. define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-LABEL: define void @test_alloca -; DYNAMIC-SHADOW-SAME: () #[[ATTR1:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG8:![0-9]+]] { +; DYNAMIC-SHADOW-SAME: () #[[ATTR0:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG7:![0-9]+]] { ; DYNAMIC-SHADOW-NEXT: entry: ; DYNAMIC-SHADOW-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) ; DYNAMIC-SHADOW-NEXT: [[TMP0:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) @@ -42,33 +42,33 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; DYNAMIC-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; DYNAMIC-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; DYNAMIC-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META11:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META13:![0-9]+]]) -; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG14:![0-9]+]] -; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP11]], !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG15:![0-9]+]] -; DYNAMIC-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP18]], !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: ret void, !dbg [[DBG15]] +; DYNAMIC-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) +; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] +; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP11]], !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG14:![0-9]+]] +; DYNAMIC-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP18]], !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: ret void, !dbg [[DBG14]] ; ; ZERO-BASED-SHADOW-LABEL: define void @test_alloca -; ZERO-BASED-SHADOW-SAME: () #[[ATTR1:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG8:![0-9]+]] { +; ZERO-BASED-SHADOW-SAME: () #[[ATTR0:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG7:![0-9]+]] { ; ZERO-BASED-SHADOW-NEXT: entry: ; ZERO-BASED-SHADOW-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr null) ; ZERO-BASED-SHADOW-NEXT: [[TMP0:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) @@ -77,30 +77,30 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; ZERO-BASED-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; ZERO-BASED-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META11:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META13:![0-9]+]]) -; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG14:![0-9]+]] -; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG15:![0-9]+]] -; ZERO-BASED-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: ret void, !dbg [[DBG15]] +; ZERO-BASED-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) +; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] +; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG14:![0-9]+]] +; ZERO-BASED-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: ret void, !dbg [[DBG14]] ; entry: %x = alloca i32, align 4 @@ -131,17 +131,15 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !23 = !DILocation(line: 7, column: 5, scope: !15) !24 = !DILocation(line: 8, column: 1, scope: !15) ;. -; DYNAMIC-SHADOW: attributes #[[ATTR0:[0-9]+]] = { nobuiltin } -; DYNAMIC-SHADOW: attributes #[[ATTR1]] = { nobuiltin sanitize_hwaddress } -; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; DYNAMIC-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; DYNAMIC-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } +; DYNAMIC-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. -; ZERO-BASED-SHADOW: attributes #[[ATTR0:[0-9]+]] = { nobuiltin } -; ZERO-BASED-SHADOW: attributes #[[ATTR1]] = { nobuiltin sanitize_hwaddress } -; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; ZERO-BASED-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; ZERO-BASED-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } +; ZERO-BASED-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. ; DYNAMIC-SHADOW: [[META0]] = !{ptr @hwasan.note} ; DYNAMIC-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) @@ -149,16 +147,15 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; DYNAMIC-SHADOW: [[META3]] = !{} ; DYNAMIC-SHADOW: [[META4:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; DYNAMIC-SHADOW: [[META5:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; DYNAMIC-SHADOW: [[META6:![0-9]+]] = !{i32 4, !"nosanitize_hwaddress", i32 1} -; DYNAMIC-SHADOW: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; DYNAMIC-SHADOW: [[DBG8]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META9:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) -; DYNAMIC-SHADOW: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]]) -; DYNAMIC-SHADOW: [[META10]] = !{null} -; DYNAMIC-SHADOW: [[META11]] = !DILocalVariable(name: "x", scope: [[DBG8]], file: [[META2]], line: 5, type: [[META12:![0-9]+]]) -; DYNAMIC-SHADOW: [[META12]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; DYNAMIC-SHADOW: [[META13]] = !DILocation(line: 0, scope: [[DBG8]]) -; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 7, column: 5, scope: [[DBG8]]) -; DYNAMIC-SHADOW: [[DBG15]] = !DILocation(line: 8, column: 1, scope: [[DBG8]]) +; DYNAMIC-SHADOW: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; DYNAMIC-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META8:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) +; DYNAMIC-SHADOW: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; DYNAMIC-SHADOW: [[META9]] = !{null} +; DYNAMIC-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) +; DYNAMIC-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; DYNAMIC-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. ; ZERO-BASED-SHADOW: [[META0]] = !{ptr @hwasan.note} ; ZERO-BASED-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) @@ -166,14 +163,13 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; ZERO-BASED-SHADOW: [[META3]] = !{} ; ZERO-BASED-SHADOW: [[META4:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; ZERO-BASED-SHADOW: [[META5:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; ZERO-BASED-SHADOW: [[META6:![0-9]+]] = !{i32 4, !"nosanitize_hwaddress", i32 1} -; ZERO-BASED-SHADOW: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; ZERO-BASED-SHADOW: [[DBG8]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META9:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) -; ZERO-BASED-SHADOW: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]]) -; ZERO-BASED-SHADOW: [[META10]] = !{null} -; ZERO-BASED-SHADOW: [[META11]] = !DILocalVariable(name: "x", scope: [[DBG8]], file: [[META2]], line: 5, type: [[META12:![0-9]+]]) -; ZERO-BASED-SHADOW: [[META12]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; ZERO-BASED-SHADOW: [[META13]] = !DILocation(line: 0, scope: [[DBG8]]) -; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 7, column: 5, scope: [[DBG8]]) -; ZERO-BASED-SHADOW: [[DBG15]] = !DILocation(line: 8, column: 1, scope: [[DBG8]]) +; ZERO-BASED-SHADOW: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; ZERO-BASED-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META8:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) +; ZERO-BASED-SHADOW: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; ZERO-BASED-SHADOW: [[META9]] = !{null} +; ZERO-BASED-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) +; ZERO-BASED-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; ZERO-BASED-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll index dc2d11cb4b3538..9cebe2e845f772 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/basic.ll @@ -9,6 +9,8 @@ ; RUN: opt < %s -passes=hwasan -hwasan-recover=0 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=ABORT-ZERO-BASED-SHADOW ; RUN: opt < %s -passes=hwasan -hwasan-recover=1 -hwasan-mapping-offset=0 -S | FileCheck %s --check-prefixes=RECOVER-ZERO-BASED-SHADOW +; CHECK: @llvm.used = appending global [1 x ptr] [ptr @hwasan.module_ctor] +; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @hwasan.module_ctor, ptr @hwasan.module_ctor }] target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "riscv64-unknown-linux" @@ -30,7 +32,7 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 0) ; CHECK-NEXT: br label [[TMP13]] @@ -66,7 +68,7 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2:![0-9]+]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1:![0-9]+]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 0) ; FASTPATH-NEXT: br label [[TMP13]] @@ -86,7 +88,7 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2:![0-9]+]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1:![0-9]+]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -106,10 +108,10 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2:![0-9]+]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 96", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -118,13 +120,13 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -143,7 +145,7 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2:![0-9]+]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1:![0-9]+]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -163,10 +165,10 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2:![0-9]+]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 96", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -175,13 +177,13 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -210,7 +212,7 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 1) ; CHECK-NEXT: br label [[TMP13]] @@ -246,7 +248,7 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 1) ; FASTPATH-NEXT: br label [[TMP13]] @@ -266,7 +268,7 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 1) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -286,10 +288,10 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 97", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -298,13 +300,13 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -323,7 +325,7 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 1) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -343,10 +345,10 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 97", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -355,13 +357,13 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -390,7 +392,7 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 2) ; CHECK-NEXT: br label [[TMP13]] @@ -426,7 +428,7 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 2) ; FASTPATH-NEXT: br label [[TMP13]] @@ -446,7 +448,7 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 2) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -466,10 +468,10 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 98", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -478,13 +480,13 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -503,7 +505,7 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 2) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -523,10 +525,10 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 98", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -535,13 +537,13 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -570,7 +572,7 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 3) ; CHECK-NEXT: br label [[TMP13]] @@ -606,7 +608,7 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 3) ; FASTPATH-NEXT: br label [[TMP13]] @@ -626,7 +628,7 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 3) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -646,10 +648,10 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 99", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -658,13 +660,13 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -683,7 +685,7 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 3) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -703,10 +705,10 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 99", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -715,13 +717,13 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -750,7 +752,7 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 4) ; CHECK-NEXT: br label [[TMP13]] @@ -786,7 +788,7 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 4) ; FASTPATH-NEXT: br label [[TMP13]] @@ -806,7 +808,7 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 4) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -826,10 +828,10 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 100", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -838,13 +840,13 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -863,7 +865,7 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 4) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -883,10 +885,10 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 100", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -895,13 +897,13 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1011,7 +1013,7 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 16) ; CHECK-NEXT: br label [[TMP13]] @@ -1047,7 +1049,7 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 16) ; FASTPATH-NEXT: br label [[TMP13]] @@ -1067,7 +1069,7 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 16) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -1087,10 +1089,10 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 112", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1099,13 +1101,13 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1124,7 +1126,7 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 16) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -1144,10 +1146,10 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 112", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1156,13 +1158,13 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1191,7 +1193,7 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 17) ; CHECK-NEXT: br label [[TMP13]] @@ -1227,7 +1229,7 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 17) ; FASTPATH-NEXT: br label [[TMP13]] @@ -1247,7 +1249,7 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 17) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -1267,10 +1269,10 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 113", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1279,13 +1281,13 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1304,7 +1306,7 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 17) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -1324,10 +1326,10 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 113", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1336,13 +1338,13 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1371,7 +1373,7 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 18) ; CHECK-NEXT: br label [[TMP13]] @@ -1407,7 +1409,7 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 18) ; FASTPATH-NEXT: br label [[TMP13]] @@ -1427,7 +1429,7 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 18) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -1447,10 +1449,10 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 114", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1459,13 +1461,13 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1484,7 +1486,7 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 18) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -1504,10 +1506,10 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 114", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1516,13 +1518,13 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1551,7 +1553,7 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 19) ; CHECK-NEXT: br label [[TMP13]] @@ -1587,7 +1589,7 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 19) ; FASTPATH-NEXT: br label [[TMP13]] @@ -1607,7 +1609,7 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 19) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -1627,10 +1629,10 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 115", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1639,13 +1641,13 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1664,7 +1666,7 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 19) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -1684,10 +1686,10 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 115", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1696,13 +1698,13 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1731,7 +1733,7 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; CHECK: 12: ; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 20) ; CHECK-NEXT: br label [[TMP13]] @@ -1767,7 +1769,7 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[TMP8]] ; FASTPATH-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 ; FASTPATH-NEXT: [[TMP11:%.*]] = icmp ne i8 [[TMP6]], [[TMP10]] -; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] ; FASTPATH: 12: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP3]], ptr [[A]], i32 20) ; FASTPATH-NEXT: br label [[TMP13]] @@ -1787,7 +1789,7 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-DYNAMIC-SHADOW: 8: ; ABORT-DYNAMIC-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 20) ; ABORT-DYNAMIC-SHADOW-NEXT: br label [[TMP9]] @@ -1807,10 +1809,10 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 116", "{x10}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1819,13 +1821,13 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1844,7 +1846,7 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; ABORT-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; ABORT-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; ABORT-ZERO-BASED-SHADOW: 8: ; ABORT-ZERO-BASED-SHADOW-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 20) ; ABORT-ZERO-BASED-SHADOW-NEXT: br label [[TMP9]] @@ -1864,10 +1866,10 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "ebreak\0Aaddiw x0, x11, 116", "{x10}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1876,13 +1878,13 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -2058,43 +2060,43 @@ entry: define i8 @test_load_noattr(ptr %a) { ; CHECK-LABEL: define i8 @test_load_noattr -; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; CHECK-NEXT: ret i8 [[B]] ; ; NOFASTPATH-LABEL: define i8 @test_load_noattr -; NOFASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; NOFASTPATH-SAME: (ptr [[A:%.*]]) { ; NOFASTPATH-NEXT: entry: ; NOFASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; NOFASTPATH-NEXT: ret i8 [[B]] ; ; FASTPATH-LABEL: define i8 @test_load_noattr -; FASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; FASTPATH-SAME: (ptr [[A:%.*]]) { ; FASTPATH-NEXT: entry: ; FASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; FASTPATH-NEXT: ret i8 [[B]] ; ; ABORT-DYNAMIC-SHADOW-LABEL: define i8 @test_load_noattr -; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) { ; ABORT-DYNAMIC-SHADOW-NEXT: entry: ; ABORT-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-DYNAMIC-SHADOW-LABEL: define i8 @test_load_noattr -; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) { ; RECOVER-DYNAMIC-SHADOW-NEXT: entry: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; ABORT-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_noattr -; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) { ; ABORT-ZERO-BASED-SHADOW-NEXT: entry: ; ABORT-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_noattr -; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) { ; RECOVER-ZERO-BASED-SHADOW-NEXT: entry: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] @@ -2106,43 +2108,43 @@ entry: define i8 @test_load_notmyattr(ptr %a) sanitize_address { ; CHECK-LABEL: define i8 @test_load_notmyattr -; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; CHECK-NEXT: ret i8 [[B]] ; ; NOFASTPATH-LABEL: define i8 @test_load_notmyattr -; NOFASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; NOFASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; NOFASTPATH-NEXT: entry: ; NOFASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; NOFASTPATH-NEXT: ret i8 [[B]] ; ; FASTPATH-LABEL: define i8 @test_load_notmyattr -; FASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; FASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; FASTPATH-NEXT: entry: ; FASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; FASTPATH-NEXT: ret i8 [[B]] ; ; ABORT-DYNAMIC-SHADOW-LABEL: define i8 @test_load_notmyattr -; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; ABORT-DYNAMIC-SHADOW-NEXT: entry: ; ABORT-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-DYNAMIC-SHADOW-LABEL: define i8 @test_load_notmyattr -; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; RECOVER-DYNAMIC-SHADOW-NEXT: entry: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; ABORT-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_notmyattr -; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; ABORT-ZERO-BASED-SHADOW-NEXT: entry: ; ABORT-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_notmyattr -; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; RECOVER-ZERO-BASED-SHADOW-NEXT: entry: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll index 0f74736dc232ea..4bd23ea76c159b 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -34,7 +34,7 @@ declare void @use32(ptr) ;. define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-LABEL: define void @test_alloca( -; DYNAMIC-SHADOW-SAME: ) #[[ATTR1:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG8:![0-9]+]] { +; DYNAMIC-SHADOW-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG7:![0-9]+]] { ; DYNAMIC-SHADOW-NEXT: entry: ; DYNAMIC-SHADOW-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) ; DYNAMIC-SHADOW-NEXT: [[TMP0:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) @@ -43,33 +43,33 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; DYNAMIC-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; DYNAMIC-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; DYNAMIC-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META11:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META13:![0-9]+]]) -; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG14:![0-9]+]] -; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP11]], !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG14]] -; DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG15:![0-9]+]] -; DYNAMIC-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP18]], !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG15]] -; DYNAMIC-SHADOW-NEXT: ret void, !dbg [[DBG15]] +; DYNAMIC-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) +; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] +; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP11]], !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG14:![0-9]+]] +; DYNAMIC-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP18]], !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG14]] +; DYNAMIC-SHADOW-NEXT: ret void, !dbg [[DBG14]] ; ; ZERO-BASED-SHADOW-LABEL: define void @test_alloca( -; ZERO-BASED-SHADOW-SAME: ) #[[ATTR1:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG8:![0-9]+]] { +; ZERO-BASED-SHADOW-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__hwasan_personality_thunk !dbg [[DBG7:![0-9]+]] { ; ZERO-BASED-SHADOW-NEXT: entry: ; ZERO-BASED-SHADOW-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr null) ; ZERO-BASED-SHADOW-NEXT: [[TMP0:%.*]] = call ptr @llvm.frameaddress.p0(i32 0) @@ -78,30 +78,30 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; ZERO-BASED-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; ZERO-BASED-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META11:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META13:![0-9]+]]) -; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG14:![0-9]+]] -; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG14]] -; ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG15:![0-9]+]] -; ZERO-BASED-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr, !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG15]] -; ZERO-BASED-SHADOW-NEXT: ret void, !dbg [[DBG15]] +; ZERO-BASED-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) +; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] +; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG14:![0-9]+]] +; ZERO-BASED-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP17]], 4, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr, !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP19]], i8 [[TMP15]], i64 1, i1 false), !dbg [[DBG14]] +; ZERO-BASED-SHADOW-NEXT: ret void, !dbg [[DBG14]] ; entry: %x = alloca i32, align 4 @@ -112,13 +112,13 @@ entry: define void @test_vscale_alloca() sanitize_hwaddress { ; DYNAMIC-SHADOW-LABEL: define void @test_vscale_alloca( -; DYNAMIC-SHADOW-SAME: ) #[[ATTR1]] { +; DYNAMIC-SHADOW-SAME: ) #[[ATTR0]] { ; DYNAMIC-SHADOW-NEXT: [[X:%.*]] = alloca , align 32 ; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X]]) ; DYNAMIC-SHADOW-NEXT: ret void ; ; ZERO-BASED-SHADOW-LABEL: define void @test_vscale_alloca( -; ZERO-BASED-SHADOW-SAME: ) #[[ATTR1]] { +; ZERO-BASED-SHADOW-SAME: ) #[[ATTR0]] { ; ZERO-BASED-SHADOW-NEXT: [[X:%.*]] = alloca , align 32 ; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X]]) ; ZERO-BASED-SHADOW-NEXT: ret void @@ -150,17 +150,15 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !23 = !DILocation(line: 7, column: 5, scope: !15) !24 = !DILocation(line: 8, column: 1, scope: !15) ;. -; DYNAMIC-SHADOW: attributes #[[ATTR0:[0-9]+]] = { nobuiltin } -; DYNAMIC-SHADOW: attributes #[[ATTR1]] = { nobuiltin sanitize_hwaddress } -; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; DYNAMIC-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; DYNAMIC-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } +; DYNAMIC-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. -; ZERO-BASED-SHADOW: attributes #[[ATTR0:[0-9]+]] = { nobuiltin } -; ZERO-BASED-SHADOW: attributes #[[ATTR1]] = { nobuiltin sanitize_hwaddress } -; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; ZERO-BASED-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; ZERO-BASED-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } +; ZERO-BASED-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. ; DYNAMIC-SHADOW: [[META0]] = !{ptr @hwasan.note} ; DYNAMIC-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) @@ -168,16 +166,15 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; DYNAMIC-SHADOW: [[META3]] = !{} ; DYNAMIC-SHADOW: [[META4:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; DYNAMIC-SHADOW: [[META5:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; DYNAMIC-SHADOW: [[META6:![0-9]+]] = !{i32 4, !"nosanitize_hwaddress", i32 1} -; DYNAMIC-SHADOW: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; DYNAMIC-SHADOW: [[DBG8]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META9:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) -; DYNAMIC-SHADOW: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]]) -; DYNAMIC-SHADOW: [[META10]] = !{null} -; DYNAMIC-SHADOW: [[META11]] = !DILocalVariable(name: "x", scope: [[DBG8]], file: [[META2]], line: 5, type: [[META12:![0-9]+]]) -; DYNAMIC-SHADOW: [[META12]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; DYNAMIC-SHADOW: [[META13]] = !DILocation(line: 0, scope: [[DBG8]]) -; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 7, column: 5, scope: [[DBG8]]) -; DYNAMIC-SHADOW: [[DBG15]] = !DILocation(line: 8, column: 1, scope: [[DBG8]]) +; DYNAMIC-SHADOW: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; DYNAMIC-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META8:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) +; DYNAMIC-SHADOW: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; DYNAMIC-SHADOW: [[META9]] = !{null} +; DYNAMIC-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) +; DYNAMIC-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; DYNAMIC-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. ; ZERO-BASED-SHADOW: [[META0]] = !{ptr @hwasan.note} ; ZERO-BASED-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) @@ -185,14 +182,13 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; ZERO-BASED-SHADOW: [[META3]] = !{} ; ZERO-BASED-SHADOW: [[META4:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; ZERO-BASED-SHADOW: [[META5:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; ZERO-BASED-SHADOW: [[META6:![0-9]+]] = !{i32 4, !"nosanitize_hwaddress", i32 1} -; ZERO-BASED-SHADOW: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} -; ZERO-BASED-SHADOW: [[DBG8]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META9:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) -; ZERO-BASED-SHADOW: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]]) -; ZERO-BASED-SHADOW: [[META10]] = !{null} -; ZERO-BASED-SHADOW: [[META11]] = !DILocalVariable(name: "x", scope: [[DBG8]], file: [[META2]], line: 5, type: [[META12:![0-9]+]]) -; ZERO-BASED-SHADOW: [[META12]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; ZERO-BASED-SHADOW: [[META13]] = !DILocation(line: 0, scope: [[DBG8]]) -; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 7, column: 5, scope: [[DBG8]]) -; ZERO-BASED-SHADOW: [[DBG15]] = !DILocation(line: 8, column: 1, scope: [[DBG8]]) +; ZERO-BASED-SHADOW: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; ZERO-BASED-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META8:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) +; ZERO-BASED-SHADOW: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; ZERO-BASED-SHADOW: [[META9]] = !{null} +; ZERO-BASED-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) +; ZERO-BASED-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; ZERO-BASED-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/attrinfer.ll b/llvm/test/Instrumentation/HWAddressSanitizer/attrinfer.ll deleted file mode 100644 index eeb51aeda1000b..00000000000000 --- a/llvm/test/Instrumentation/HWAddressSanitizer/attrinfer.ll +++ /dev/null @@ -1,14 +0,0 @@ -; Standard library functions get inferred attributes, some of which are not -; correct when building for HWASan. - -; RUN: opt < %s -passes=hwasan -S | FileCheck %s --check-prefixes=CHECK - -target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" -target triple = "aarch64--linux-android10000" - -declare float @frexpf(float noundef, ptr nocapture noundef) local_unnamed_addr #0 - -attributes #0 = { mustprogress nofree nounwind willreturn memory(argmem: write) "frame-pointer"="non-leaf" "hwasan-abi"="interceptor" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fix-cortex-a53-835769,+fp-armv8,+neon,+outline-atomics,+tagged-globals,+v8a" } - -; CHECK-NOT: memory(argmem: write) -; CHECK: nobuiltin diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll b/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll index 1e74f2891a2e3c..4212293f42545e 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/basic.ll @@ -42,7 +42,7 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2:![0-9]+]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1:![0-9]+]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 0) ; FASTPATH-NEXT: br label [[TMP9]] @@ -70,10 +70,10 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2:![0-9]+]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2336", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -82,13 +82,13 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -115,10 +115,10 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2:![0-9]+]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2336", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -127,13 +127,13 @@ define i8 @test_load8(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -174,7 +174,7 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 1) ; FASTPATH-NEXT: br label [[TMP9]] @@ -202,10 +202,10 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2337", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -214,13 +214,13 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -247,10 +247,10 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2337", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -259,13 +259,13 @@ define i16 @test_load16(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -306,7 +306,7 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 2) ; FASTPATH-NEXT: br label [[TMP9]] @@ -334,10 +334,10 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2338", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -346,13 +346,13 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -379,10 +379,10 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2338", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -391,13 +391,13 @@ define i32 @test_load32(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -438,7 +438,7 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 3) ; FASTPATH-NEXT: br label [[TMP9]] @@ -466,10 +466,10 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2339", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -478,13 +478,13 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -511,10 +511,10 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2339", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -523,13 +523,13 @@ define i64 @test_load64(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -570,7 +570,7 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 4) ; FASTPATH-NEXT: br label [[TMP9]] @@ -598,10 +598,10 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2340", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -610,13 +610,13 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -643,10 +643,10 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2340", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -655,13 +655,13 @@ define i128 @test_load128(ptr %a) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -771,7 +771,7 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 16) ; FASTPATH-NEXT: br label [[TMP9]] @@ -799,10 +799,10 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2352", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -811,13 +811,13 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -844,10 +844,10 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2352", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -856,13 +856,13 @@ define void @test_store8(ptr %a, i8 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 0 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -903,7 +903,7 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 17) ; FASTPATH-NEXT: br label [[TMP9]] @@ -931,10 +931,10 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2353", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -943,13 +943,13 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -976,10 +976,10 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2353", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -988,13 +988,13 @@ define void @test_store16(ptr %a, i16 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1035,7 +1035,7 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 18) ; FASTPATH-NEXT: br label [[TMP9]] @@ -1063,10 +1063,10 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2354", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1075,13 +1075,13 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1108,10 +1108,10 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2354", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1120,13 +1120,13 @@ define void @test_store32(ptr %a, i32 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 3 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1167,7 +1167,7 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 19) ; FASTPATH-NEXT: br label [[TMP9]] @@ -1195,10 +1195,10 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2355", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1207,13 +1207,13 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1240,10 +1240,10 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2355", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1252,13 +1252,13 @@ define void @test_store64(ptr %a, i64 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 7 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1299,7 +1299,7 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; FASTPATH-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; FASTPATH-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; FASTPATH-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF2]] +; FASTPATH-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; FASTPATH: 8: ; FASTPATH-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[A]], i32 20) ; FASTPATH-NEXT: br label [[TMP9]] @@ -1327,10 +1327,10 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP4]] ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 8: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 10: ; RECOVER-DYNAMIC-SHADOW-NEXT: call void asm sideeffect "brk #2356", "{x0}"(i64 [[TMP0]]) ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1339,13 +1339,13 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 16: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-DYNAMIC-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-DYNAMIC-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-DYNAMIC-SHADOW: 21: ; RECOVER-DYNAMIC-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-DYNAMIC-SHADOW: 22: @@ -1372,10 +1372,10 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = icmp ne i8 [[TMP2]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP22:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 8: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = icmp ugt i8 [[TMP6]], 15 -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 10: ; RECOVER-ZERO-BASED-SHADOW-NEXT: call void asm sideeffect "brk #2356", "{x0}"(i64 [[TMP0]]) ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP21:%.*]] @@ -1384,13 +1384,13 @@ define void @test_store128(ptr %a, i128 %b) sanitize_hwaddress { ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP12]] to i8 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = add i8 [[TMP13]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = icmp uge i8 [[TMP14]], [[TMP6]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP15]], label [[TMP10]], label [[TMP16:%.*]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 16: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = or i64 [[TMP3]], 15 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP2]], [[TMP19]] -; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF2]] +; RECOVER-ZERO-BASED-SHADOW-NEXT: br i1 [[TMP20]], label [[TMP10]], label [[TMP21]], !prof [[PROF1]] ; RECOVER-ZERO-BASED-SHADOW: 21: ; RECOVER-ZERO-BASED-SHADOW-NEXT: br label [[TMP22]] ; RECOVER-ZERO-BASED-SHADOW: 22: @@ -1542,43 +1542,43 @@ entry: define i8 @test_load_noattr(ptr %a) { ; CHECK-LABEL: define i8 @test_load_noattr -; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; CHECK-NEXT: ret i8 [[B]] ; ; NOFASTPATH-LABEL: define i8 @test_load_noattr -; NOFASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; NOFASTPATH-SAME: (ptr [[A:%.*]]) { ; NOFASTPATH-NEXT: entry: ; NOFASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; NOFASTPATH-NEXT: ret i8 [[B]] ; ; FASTPATH-LABEL: define i8 @test_load_noattr -; FASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; FASTPATH-SAME: (ptr [[A:%.*]]) { ; FASTPATH-NEXT: entry: ; FASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; FASTPATH-NEXT: ret i8 [[B]] ; ; ABORT-DYNAMIC-SHADOW-LABEL: define i8 @test_load_noattr -; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) { ; ABORT-DYNAMIC-SHADOW-NEXT: entry: ; ABORT-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-DYNAMIC-SHADOW-LABEL: define i8 @test_load_noattr -; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) { ; RECOVER-DYNAMIC-SHADOW-NEXT: entry: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; ABORT-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_noattr -; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) { ; ABORT-ZERO-BASED-SHADOW-NEXT: entry: ; ABORT-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_noattr -; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) { ; RECOVER-ZERO-BASED-SHADOW-NEXT: entry: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] @@ -1590,43 +1590,43 @@ entry: define i8 @test_load_notmyattr(ptr %a) sanitize_address { ; CHECK-LABEL: define i8 @test_load_notmyattr -; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; CHECK-NEXT: ret i8 [[B]] ; ; NOFASTPATH-LABEL: define i8 @test_load_notmyattr -; NOFASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; NOFASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; NOFASTPATH-NEXT: entry: ; NOFASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; NOFASTPATH-NEXT: ret i8 [[B]] ; ; FASTPATH-LABEL: define i8 @test_load_notmyattr -; FASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; FASTPATH-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; FASTPATH-NEXT: entry: ; FASTPATH-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; FASTPATH-NEXT: ret i8 [[B]] ; ; ABORT-DYNAMIC-SHADOW-LABEL: define i8 @test_load_notmyattr -; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; ABORT-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; ABORT-DYNAMIC-SHADOW-NEXT: entry: ; ABORT-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-DYNAMIC-SHADOW-LABEL: define i8 @test_load_notmyattr -; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; RECOVER-DYNAMIC-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; RECOVER-DYNAMIC-SHADOW-NEXT: entry: ; RECOVER-DYNAMIC-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-DYNAMIC-SHADOW-NEXT: ret i8 [[B]] ; ; ABORT-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_notmyattr -; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; ABORT-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; ABORT-ZERO-BASED-SHADOW-NEXT: entry: ; ABORT-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; ABORT-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] ; ; RECOVER-ZERO-BASED-SHADOW-LABEL: define i8 @test_load_notmyattr -; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; RECOVER-ZERO-BASED-SHADOW-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; RECOVER-ZERO-BASED-SHADOW-NEXT: entry: ; RECOVER-ZERO-BASED-SHADOW-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; RECOVER-ZERO-BASED-SHADOW-NEXT: ret i8 [[B]] diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/fixed-shadow.ll b/llvm/test/Instrumentation/HWAddressSanitizer/fixed-shadow.ll index f72fc0a9720e4a..980189c5607f31 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/fixed-shadow.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/fixed-shadow.ll @@ -194,7 +194,7 @@ entry: define i8 @test_load_noattr(ptr %a) { ; CHECK-LABEL: define i8 @test_load_noattr -; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; CHECK-NEXT: ret i8 [[B]] @@ -206,7 +206,7 @@ entry: define i8 @test_load_notmyattr(ptr %a) sanitize_address { ; CHECK-LABEL: define i8 @test_load_notmyattr -; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[A]], align 4 ; CHECK-NEXT: ret i8 [[B]] diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/hwasan-pass-second-run.ll b/llvm/test/Instrumentation/HWAddressSanitizer/hwasan-pass-second-run.ll index 2635dfb75ed98f..00614b603fe799 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/hwasan-pass-second-run.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/hwasan-pass-second-run.ll @@ -18,7 +18,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: @__hwasan_shadow = external global [0 x i8] ;. define i8 @test_load8(ptr %a) sanitize_hwaddress { -; CHECK: Function Attrs: nobuiltin sanitize_hwaddress +; CHECK: Function Attrs: sanitize_hwaddress ; CHECK-LABEL: define i8 @test_load8 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: @@ -33,7 +33,7 @@ entry: ret i8 %b } ;. -; CHECK: attributes #[[ATTR0]] = { nobuiltin sanitize_hwaddress } +; CHECK: attributes #[[ATTR0]] = { sanitize_hwaddress } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } ;. ; CHECK: [[META0]] = !{ptr @hwasan.note} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/mem-attr.ll b/llvm/test/Instrumentation/HWAddressSanitizer/mem-attr.ll index 919eacb2951f5e..c0e370f20213aa 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/mem-attr.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/mem-attr.ll @@ -11,5 +11,5 @@ entry: ret void } -; CHECK: attributes #0 = { nobuiltin sanitize_hwaddress uwtable } +; CHECK: attributes #0 = { sanitize_hwaddress uwtable } attributes #0 = { sanitize_hwaddress memory(argmem: write) uwtable } diff --git a/llvm/test/MC/AArch64/SVE/directive-arch-negative.s b/llvm/test/MC/AArch64/SVE/directive-arch-negative.s new file mode 100644 index 00000000000000..e3029c16ffc8a6 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/directive-arch-negative.s @@ -0,0 +1,8 @@ +// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s + +// Check that setting +nosve implies +nosve2 +.arch armv9-a+nosve + +adclb z0.s, z1.s, z31.s +// CHECK: error: instruction requires: sve2 +// CHECK-NEXT: adclb z0.s, z1.s, z31.s diff --git a/llvm/test/MC/AArch64/SVE/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SVE/directive-arch_extension-negative.s index 661f13974d0bc8..31118f7490d00d 100644 --- a/llvm/test/MC/AArch64/SVE/directive-arch_extension-negative.s +++ b/llvm/test/MC/AArch64/SVE/directive-arch_extension-negative.s @@ -1,7 +1,12 @@ // RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s -.arch_extension nosve +.arch_extension sve2+nosve ptrue p0.b, pow2 // CHECK: error: instruction requires: sve or sme // CHECK-NEXT: ptrue p0.b, pow2 + +// Check that setting +nosve implies +nosve2 +adclb z0.s, z1.s, z31.s +// CHECK: error: instruction requires: sve2 +// CHECK-NEXT: adclb z0.s, z1.s, z31.s diff --git a/llvm/test/MC/AArch64/SVE/directive-cpu-negative.s b/llvm/test/MC/AArch64/SVE/directive-cpu-negative.s index 82acc1b0b0be9b..6ba537ca70609e 100644 --- a/llvm/test/MC/AArch64/SVE/directive-cpu-negative.s +++ b/llvm/test/MC/AArch64/SVE/directive-cpu-negative.s @@ -1,6 +1,11 @@ // RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s -.cpu generic+sve+nosve +.cpu generic+sve2+nosve ptrue p0.b, pow2 // CHECK: error: instruction requires: sve or sme // CHECK-NEXT: ptrue p0.b, pow2 + +// Check that setting +nosve implies +nosve2 +adclb z0.s, z1.s, z31.s +// CHECK: error: instruction requires: sve2 +// CHECK-NEXT: adclb z0.s, z1.s, z31.s diff --git a/llvm/test/MC/AArch64/directive-arch-negative.s b/llvm/test/MC/AArch64/directive-arch-negative.s index f60759899aa6c9..406507d5fc8f4d 100644 --- a/llvm/test/MC/AArch64/directive-arch-negative.s +++ b/llvm/test/MC/AArch64/directive-arch-negative.s @@ -12,10 +12,13 @@ # CHECK-NEXT: aese v0.8h, v1.8h # CHECK-NEXT: ^ -// We silently ignore invalid features. .arch armv8+foo aese v0.8h, v1.8h +# CHECK: error: unsupported architectural extension: foo +# CHECK-NEXT: .arch armv8+foo +# CHECK-NEXT: ^ + # CHECK: error: invalid operand for instruction # CHECK-NEXT: aese v0.8h, v1.8h # CHECK-NEXT: ^ diff --git a/llvm/test/MC/AArch64/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/directive-arch_extension-negative.s index 1c1cfc9d33e3ed..1843af56555461 100644 --- a/llvm/test/MC/AArch64/directive-arch_extension-negative.s +++ b/llvm/test/MC/AArch64/directive-arch_extension-negative.s @@ -4,7 +4,7 @@ // RUN: -filetype asm -o - %s 2>&1 | FileCheck %s .arch_extension axp64 -// CHECK: error: unknown architectural extension: axp64 +// CHECK: error: unsupported architectural extension: axp64 // CHECK-NEXT: .arch_extension axp64 crc32cx w0, w1, x3 @@ -49,6 +49,8 @@ fminnm d0, d0, d1 // CHECK: [[@LINE-1]]:1: error: instruction requires: fp // CHECK-NEXT: fminnm d0, d0, d1 +// nofp implied nosimd, so reinstate it +.arch_extension simd addp v0.4s, v0.4s, v0.4s // CHECK-NOT: [[@LINE-1]]:1: error: instruction requires: neon .arch_extension nosimd @@ -70,6 +72,8 @@ casa w5, w7, [x20] // CHECK: [[@LINE-1]]:1: error: instruction requires: lse // CHECK-NEXT: casa w5, w7, [x20] +// nolse implied nolse128, so reinstate it +.arch_extension lse128 swpp x0, x2, [x3] // CHECK-NOT: [[@LINE-1]]:1: error: instruction requires: lse128 .arch_extension nolse128 @@ -84,6 +88,8 @@ cfp rctx, x0 // CHECK: [[@LINE-1]]:5: error: CFPRCTX requires: predres // CHECK-NEXT: cfp rctx, x0 +// nopredres implied nopredres2, so reinstate it +.arch_extension predres2 cosp rctx, x0 // CHECK-NOT: [[@LINE-1]]:6: error: COSP requires: predres2 .arch_extension nopredres2 @@ -133,6 +139,8 @@ ldapr x0, [x1] // CHECK: [[@LINE-1]]:1: error: instruction requires: rcpc // CHECK-NEXT: ldapr x0, [x1] +// norcpc implied norcpc3, so reinstate it +.arch_extension rcpc3 stilp w24, w0, [x16, #-8]! // CHECK-NOT: [[@LINE-1]]:1: error: instruction requires: rcpc3 .arch_extension norcpc3 @@ -169,6 +177,8 @@ cpyfp [x0]!, [x1]!, x2! // CHECK: [[@LINE-1]]:1: error: instruction requires: mops // CHECK-NEXT: cpyfp [x0]!, [x1]!, x2! +// nolse128 implied nod128, so reinstate it +.arch_extension d128 // This needs to come before `.arch_extension nothe` as it uses an instruction // that requires both the and d128 sysp #0, c2, c0, #0, x0, x1 @@ -204,6 +214,8 @@ umax x0, x1, x2 // CHECK: [[@LINE-1]]:1: error: instruction requires: cssc // CHECK-NEXT: umax x0, x1, x2 +// noras implied norasv2, so reinstate it +.arch_extension rasv2 mrs x0, ERXGSR_EL1 // CHECK-NOT: [[@LINE-1]]:9: error: expected readable system register .arch_extension norasv2 diff --git a/llvm/test/MC/AVR/inst-brbc.s b/llvm/test/MC/AVR/inst-brbc.s index 4d7d684da4468a..3ef3664cf07bfc 100644 --- a/llvm/test/MC/AVR/inst-brbc.s +++ b/llvm/test/MC/AVR/inst-brbc.s @@ -3,7 +3,6 @@ ; RUN: | llvm-objdump -d - | FileCheck --check-prefix=INST %s foo: - brbc 3, .+8 brbc 0, .-16 .short 0xf759 @@ -11,14 +10,16 @@ foo: .short 0xf74c .short 0xf4c7 -; CHECK: brvc .Ltmp0+8 ; encoding: [0bAAAAA011,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp0+8, kind: fixup_7_pcrel -; CHECK: brcc .Ltmp1-16 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp1-16, kind: fixup_7_pcrel +; CHECK: brvc (.Ltmp0+8)+2 ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel +; +; CHECK: brcc (.Ltmp1-16)+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-16)+2, kind: fixup_7_pcrel -; INST: 23 f4 brvc .+8 -; INST: c0 f7 brsh .-16 -; INST: 59 f7 brne .-42 -; INST: 52 f7 brpl .-44 -; INST: 4c f7 brge .-46 -; INST: c7 f4 brid .+48 +; INST-LABEL: : +; INST-NEXT: 23 f4 brvc .+8 +; INST-NEXT: c0 f7 brsh .-16 +; INST-NEXT: 59 f7 brne .-42 +; INST-NEXT: 52 f7 brpl .-44 +; INST-NEXT: 4c f7 brge .-46 +; INST-NEXT: c7 f4 brid .+48 diff --git a/llvm/test/MC/AVR/inst-brbs.s b/llvm/test/MC/AVR/inst-brbs.s index 7987feeec654a1..f15a779a53654f 100644 --- a/llvm/test/MC/AVR/inst-brbs.s +++ b/llvm/test/MC/AVR/inst-brbs.s @@ -3,7 +3,6 @@ ; RUN: | llvm-objdump -d - | FileCheck --check-prefix=INST %s foo: - brbs 3, .+8 brbs 0, .-12 .short 0xf359 @@ -11,14 +10,15 @@ foo: .short 0xf34c .short 0xf077 -; CHECK: brvs .Ltmp0+8 ; encoding: [0bAAAAA011,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp0+8, kind: fixup_7_pcrel -; CHECK: brcs .Ltmp1-12 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp1-12, kind: fixup_7_pcrel +; CHECK: brvs (.Ltmp0+8)+2 ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel +; CHECK: brcs (.Ltmp1-12)+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-12)+2, kind: fixup_7_pcrel -; INST: 23 f0 brvs .+8 -; INST: d0 f3 brlo .-12 -; INST: 59 f3 breq .-42 -; INST: 52 f3 brmi .-44 -; INST: 4c f3 brlt .-46 -; INST: 77 f0 brie .+28 +; INST-LABEL: : +; INST-NEXT: 23 f0 brvs .+8 +; INST-NEXT: d0 f3 brlo .-12 +; INST-NEXT: 59 f3 breq .-42 +; INST-NEXT: 52 f3 brmi .-44 +; INST-NEXT: 4c f3 brlt .-46 +; INST-NEXT: 77 f0 brie .+28 diff --git a/llvm/test/MC/AVR/inst-brcc.s b/llvm/test/MC/AVR/inst-brcc.s new file mode 100644 index 00000000000000..d9218bc61e787f --- /dev/null +++ b/llvm/test/MC/AVR/inst-brcc.s @@ -0,0 +1,28 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brcc .+66 + brcc .-22 + brbc 0, .+66 + brbc 0, bar + +bar: + +; CHECK: brcc (.Ltmp0+66)+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+66)+2, kind: fixup_7_pcrel +; CHECK: brcc (.Ltmp1-22)+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-22)+2, kind: fixup_7_pcrel +; CHECK: brcc (.Ltmp2+66)+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+66)+2, kind: fixup_7_pcrel +; CHECK: brcc bar ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 08 f5 brsh .+66 +; INST-NEXT: a8 f7 brsh .-22 +; INST-NEXT: 08 f5 brsh .+66 +; INST-NEXT: 00 f4 brsh .+0 diff --git a/llvm/test/MC/AVR/inst-brcs.s b/llvm/test/MC/AVR/inst-brcs.s new file mode 100644 index 00000000000000..0012cb31f61269 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brcs.s @@ -0,0 +1,28 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brcs .+8 + brcs .+4 + brbs 0, .+8 + brbs 0, bar + +bar: + +; CHECK: brcs (.Ltmp0+8)+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+8)+2, kind: fixup_7_pcrel +; CHECK: brcs (.Ltmp1+4)+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+4)+2, kind: fixup_7_pcrel +; CHECK: brcs (.Ltmp2+8)+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_7_pcrel +; CHECK: brcs bar ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 20 f0 brlo .+8 +; INST-NEXT: 10 f0 brlo .+4 +; INST-NEXT: 20 f0 brlo .+8 +; INST-NEXT: 00 f0 brlo .+0 diff --git a/llvm/test/MC/AVR/inst-breq.s b/llvm/test/MC/AVR/inst-breq.s new file mode 100644 index 00000000000000..f82010f02ba617 --- /dev/null +++ b/llvm/test/MC/AVR/inst-breq.s @@ -0,0 +1,28 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + breq .-18 + breq .-12 + brbs 1, .-18 + brbs 1, bar + +bar: + +; CHECK: breq (.Ltmp0-18)+2 ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-18)+2, kind: fixup_7_pcrel +; CHECK: breq (.Ltmp1-12)+2 ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-12)+2, kind: fixup_7_pcrel +; CHECK: brbs 1, (.Ltmp2-18)+2 ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2-18)+2, kind: fixup_7_pcrel +; CHECK: brbs 1, bar ; encoding: [0bAAAAA001,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: b9 f3 breq .-18 +; INST-NEXT: d1 f3 breq .-12 +; INST-NEXT: b9 f3 breq .-18 +; INST-NEXT: 01 f0 breq .+0 diff --git a/llvm/test/MC/AVR/inst-brge.s b/llvm/test/MC/AVR/inst-brge.s new file mode 100644 index 00000000000000..1121284a114689 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brge.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brge .+50 + brge .+42 + brge bar + +bar: + +; CHECK: brge (.Ltmp0+50)+2 ; encoding: [0bAAAAA100,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+50)+2, kind: fixup_7_pcrel +; CHECK: brge (.Ltmp1+42)+2 ; encoding: [0bAAAAA100,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+42)+2, kind: fixup_7_pcrel +; CHECK: brge bar ; encoding: [0bAAAAA100,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: cc f4 brge .+50 +; INST-NEXT: ac f4 brge .+42 +; INST-NEXT: 04 f4 brge .+0 diff --git a/llvm/test/MC/AVR/inst-brhc.s b/llvm/test/MC/AVR/inst-brhc.s new file mode 100644 index 00000000000000..eb16ac2ef7a64e --- /dev/null +++ b/llvm/test/MC/AVR/inst-brhc.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brhc .+12 + brhc .+14 + brhc bar + +bar: + +; CHECK: brhc (.Ltmp0+12)+2 ; encoding: [0bAAAAA101,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+12)+2, kind: fixup_7_pcrel +; CHECK: brhc (.Ltmp1+14)+2 ; encoding: [0bAAAAA101,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+14)+2, kind: fixup_7_pcrel +; CHECK: brhc bar ; encoding: [0bAAAAA101,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 35 f4 brhc .+12 +; INST-NEXT: 3d f4 brhc .+14 +; INST-NEXT: 05 f4 brhc .+0 diff --git a/llvm/test/MC/AVR/inst-brhs.s b/llvm/test/MC/AVR/inst-brhs.s new file mode 100644 index 00000000000000..77c49596b3b0b8 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brhs.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brhs .-66 + brhs .+14 + brhs bar + +bar: + +; CHECK: brhs (.Ltmp0-66)+2 ; encoding: [0bAAAAA101,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-66)+2, kind: fixup_7_pcrel +; CHECK: brhs (.Ltmp1+14)+2 ; encoding: [0bAAAAA101,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+14)+2, kind: fixup_7_pcrel +; CHECK: brhs bar ; encoding: [0bAAAAA101,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: fd f2 brhs .-66 +; INST-NEXT: 3d f0 brhs .+14 +; INST-NEXT: 05 f0 brhs .+0 diff --git a/llvm/test/MC/AVR/inst-brid.s b/llvm/test/MC/AVR/inst-brid.s new file mode 100644 index 00000000000000..70d0ea83c49b2a --- /dev/null +++ b/llvm/test/MC/AVR/inst-brid.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brid .+42 + brid .+62 + brid bar + +bar: + +; CHECK: brid (.Ltmp0+42)+2 ; encoding: [0bAAAAA111,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+42)+2, kind: fixup_7_pcrel +; CHECK: brid (.Ltmp1+62)+2 ; encoding: [0bAAAAA111,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+62)+2, kind: fixup_7_pcrel +; CHECK: brid bar ; encoding: [0bAAAAA111,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: af f4 brid .+42 +; INST-NEXT: ff f4 brid .+62 +; INST-NEXT: 07 f4 brid .+0 diff --git a/llvm/test/MC/AVR/inst-brie.s b/llvm/test/MC/AVR/inst-brie.s new file mode 100644 index 00000000000000..717c686e2ed44e --- /dev/null +++ b/llvm/test/MC/AVR/inst-brie.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brie .+20 + brie .+40 + brie bar + +bar: + +; CHECK: brie (.Ltmp0+20)+2 ; encoding: [0bAAAAA111,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+20)+2, kind: fixup_7_pcrel +; CHECK: brie (.Ltmp1+40)+2 ; encoding: [0bAAAAA111,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+40)+2, kind: fixup_7_pcrel +; CHECK: brie bar ; encoding: [0bAAAAA111,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 57 f0 brie .+20 +; INST-NEXT: a7 f0 brie .+40 +; INST-NEXT: 07 f0 brie .+0 diff --git a/llvm/test/MC/AVR/inst-brlo.s b/llvm/test/MC/AVR/inst-brlo.s new file mode 100644 index 00000000000000..4b56d66ffdfe00 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brlo.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brlo .+12 + brlo .+28 + brlo bar + +bar: + +; CHECK: brlo (.Ltmp0+12)+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+12)+2, kind: fixup_7_pcrel +; CHECK: brlo (.Ltmp1+28)+2 ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+28)+2, kind: fixup_7_pcrel +; CHECK: brlo bar ; encoding: [0bAAAAA000,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 30 f0 brlo .+12 +; INST-NEXT: 70 f0 brlo .+28 +; INST-NEXT: 00 f0 brlo .+0 diff --git a/llvm/test/MC/AVR/inst-brlt.s b/llvm/test/MC/AVR/inst-brlt.s new file mode 100644 index 00000000000000..8a7c543f9444b1 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brlt.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brlt .+16 + brlt .+2 + brlt bar + +bar: + +; CHECK: brlt (.Ltmp0+16)+2 ; encoding: [0bAAAAA100,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+16)+2, kind: fixup_7_pcrel +; CHECK: brlt (.Ltmp1+2)+2 ; encoding: [0bAAAAA100,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+2)+2, kind: fixup_7_pcrel +; CHECK: brlt bar ; encoding: [0bAAAAA100,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 44 f0 brlt .+16 +; INST-NEXT: 0c f0 brlt .+2 +; INST-NEXT: 04 f0 brlt .+0 diff --git a/llvm/test/MC/AVR/inst-brmi.s b/llvm/test/MC/AVR/inst-brmi.s new file mode 100644 index 00000000000000..878612d294dd95 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brmi.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brmi .+66 + brmi .+58 + brmi bar + +bar: + +; CHECK: brmi (.Ltmp0+66)+2 ; encoding: [0bAAAAA010,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+66)+2, kind: fixup_7_pcrel +; CHECK: brmi (.Ltmp1+58)+2 ; encoding: [0bAAAAA010,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+58)+2, kind: fixup_7_pcrel +; CHECK: brmi bar ; encoding: [0bAAAAA010,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 0a f1 brmi .+66 +; INST-NEXT: ea f0 brmi .+58 +; INST-NEXT: 02 f0 brmi .+0 diff --git a/llvm/test/MC/AVR/inst-brne.s b/llvm/test/MC/AVR/inst-brne.s new file mode 100644 index 00000000000000..9d6bee4b754d95 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brne.s @@ -0,0 +1,28 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brne .+10 + brne .+2 + brbc 1, .+10 + brbc 1, bar + +bar: + +; CHECK: brne (.Ltmp0+10)+2 ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+10)+2, kind: fixup_7_pcrel +; CHECK: brne (.Ltmp1+2)+2 ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+2)+2, kind: fixup_7_pcrel +; CHECK: brbc 1, (.Ltmp2+10)+2 ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+10)+2, kind: fixup_7_pcrel +; CHECK: brbc 1, bar ; encoding: [0bAAAAA001,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 29 f4 brne .+10 +; INST-NEXT: 09 f4 brne .+2 +; INST-NEXT: 29 f4 brne .+10 +; INST-NEXT: 01 f4 brne .+0 diff --git a/llvm/test/MC/AVR/inst-brpl.s b/llvm/test/MC/AVR/inst-brpl.s new file mode 100644 index 00000000000000..393365ee35339e --- /dev/null +++ b/llvm/test/MC/AVR/inst-brpl.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brpl .-12 + brpl .+18 + brpl bar + +bar: + +; CHECK: brpl (.Ltmp0-12)+2 ; encoding: [0bAAAAA010,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-12)+2, kind: fixup_7_pcrel +; CHECK: brpl (.Ltmp1+18)+2 ; encoding: [0bAAAAA010,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+18)+2, kind: fixup_7_pcrel +; CHECK: brpl bar ; encoding: [0bAAAAA010,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: d2 f7 brpl .-12 +; INST-NEXT: 4a f4 brpl .+18 +; INST-NEXT: 02 f4 brpl .+0 diff --git a/llvm/test/MC/AVR/inst-brsh.s b/llvm/test/MC/AVR/inst-brsh.s new file mode 100644 index 00000000000000..0bacd64d3d8d05 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brsh.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brsh .+32 + brsh .+70 + brsh bar + +bar: + +; CHECK: brsh (.Ltmp0+32)+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+32)+2, kind: fixup_7_pcrel +; CHECK: brsh (.Ltmp1+70)+2 ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+70)+2, kind: fixup_7_pcrel +; CHECK: brsh bar ; encoding: [0bAAAAA000,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 80 f4 brsh .+32 +; INST-NEXT: 18 f5 brsh .+70 +; INST-NEXT: 00 f4 brsh .+0 diff --git a/llvm/test/MC/AVR/inst-brtc.s b/llvm/test/MC/AVR/inst-brtc.s new file mode 100644 index 00000000000000..eb4ee211628721 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brtc.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brtc .+52 + brtc .+50 + brtc bar + +bar: + +; CHECK: brtc (.Ltmp0+52)+2 ; encoding: [0bAAAAA110,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+52)+2, kind: fixup_7_pcrel +; CHECK: brtc (.Ltmp1+50)+2 ; encoding: [0bAAAAA110,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+50)+2, kind: fixup_7_pcrel +; CHECK: brtc bar ; encoding: [0bAAAAA110,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: d6 f4 brtc .+52 +; INST-NEXT: ce f4 brtc .+50 +; INST-NEXT: 06 f4 brtc .+0 diff --git a/llvm/test/MC/AVR/inst-brts.s b/llvm/test/MC/AVR/inst-brts.s new file mode 100644 index 00000000000000..ccd794a9225894 --- /dev/null +++ b/llvm/test/MC/AVR/inst-brts.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brts .+18 + brts .+22 + brts bar + +bar: + +; CHECK: brts (.Ltmp0+18)+2 ; encoding: [0bAAAAA110,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+18)+2, kind: fixup_7_pcrel +; CHECK: brts (.Ltmp1+22)+2 ; encoding: [0bAAAAA110,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+22)+2, kind: fixup_7_pcrel +; CHECK: brts bar ; encoding: [0bAAAAA110,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 4e f0 brts .+18 +; INST-NEXT: 5e f0 brts .+22 +; INST-NEXT: 06 f0 brts .+0 diff --git a/llvm/test/MC/AVR/inst-brvc.s b/llvm/test/MC/AVR/inst-brvc.s new file mode 100644 index 00000000000000..573f779c0dcd6a --- /dev/null +++ b/llvm/test/MC/AVR/inst-brvc.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brvc .-28 + brvc .-62 + brvc bar + +bar: + +; CHECK: brvc (.Ltmp0-28)+2 ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0-28)+2, kind: fixup_7_pcrel +; CHECK: brvc (.Ltmp1-62)+2 ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-62)+2, kind: fixup_7_pcrel +; CHECK: brvc bar ; encoding: [0bAAAAA011,0b111101AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 93 f7 brvc .-28 +; INST-NEXT: 0b f7 brvc .-62 +; INST-NEXT: 03 f4 brvc .+0 diff --git a/llvm/test/MC/AVR/inst-brvs.s b/llvm/test/MC/AVR/inst-brvs.s new file mode 100644 index 00000000000000..d50a1a9ec5b62f --- /dev/null +++ b/llvm/test/MC/AVR/inst-brvs.s @@ -0,0 +1,24 @@ +; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; +; RUN: llvm-mc -filetype=obj -triple avr < %s \ +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s + +foo: + brvs .+18 + brvs .+32 + brvs bar + +bar: + +; CHECK: brvs (.Ltmp0+18)+2 ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+18)+2, kind: fixup_7_pcrel +; CHECK: brvs (.Ltmp1+32)+2 ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1+32)+2, kind: fixup_7_pcrel +; CHECK: brvs bar ; encoding: [0bAAAAA011,0b111100AA] +; CHECK-NEXT: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel + +; INST-LABEL: : +; INST-NEXT: 4b f0 brvs .+18 +; INST-NEXT: 83 f0 brvs .+32 +; INST-NEXT: 03 f0 brvs .+0 diff --git a/llvm/test/MC/AVR/inst-family-cond-branch.s b/llvm/test/MC/AVR/inst-family-cond-branch.s deleted file mode 100644 index dc36425a884f3b..00000000000000 --- a/llvm/test/MC/AVR/inst-family-cond-branch.s +++ /dev/null @@ -1,321 +0,0 @@ -; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s -; RUN: llvm-mc -filetype=obj -triple avr < %s \ -; RUN: | llvm-objdump -d - | FileCheck --check-prefix=INST %s - - -foo: - ; BREQ - breq .-18 - breq .-12 - brbs 1, .-18 - brbs 1, baz - -; CHECK: breq .Ltmp0-18 ; encoding: [0bAAAAA001,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp0-18, kind: fixup_7_pcrel -; CHECK: breq .Ltmp1-12 ; encoding: [0bAAAAA001,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp1-12, kind: fixup_7_pcrel -; CHECK: brbs 1, .Ltmp2-18 ; encoding: [0bAAAAA001,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp2-18, kind: fixup_7_pcrel -; CHECK: brbs 1, baz ; encoding: [0bAAAAA001,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: baz, kind: fixup_7_pcrel - -; INST-LABEL: : -; INST: breq .-18 -; INST: breq .-12 -; INST: breq .-18 -; INST: breq .+0 - - ; BRNE - brne .+10 - brne .+2 - brbc 1, .+10 - brbc 1, bar - -; CHECK: brne .Ltmp3+10 ; encoding: [0bAAAAA001,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp3+10, kind: fixup_7_pcrel -; CHECK: brne .Ltmp4+2 ; encoding: [0bAAAAA001,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp4+2, kind: fixup_7_pcrel -; CHECK: brbc 1, .Ltmp5+10 ; encoding: [0bAAAAA001,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp5+10, kind: fixup_7_pcrel -; CHECK: brbc 1, bar ; encoding: [0bAAAAA001,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: bar, kind: fixup_7_pcrel - -; INST: brne .+10 -; INST: brne .+2 -; INST: brne .+10 -; INST: brne .+0 - -bar: - ; BRCS - brcs .+8 - brcs .+4 - brbs 0, .+8 - brbs 0, end - -; CHECK: brcs .Ltmp6+8 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp6+8, kind: fixup_7_pcrel -; CHECK: brcs .Ltmp7+4 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp7+4, kind: fixup_7_pcrel -; CHECK: brcs .Ltmp8+8 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp8+8, kind: fixup_7_pcrel -; CHECK: brcs end ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST-LABEL: : -; INST: brlo .+8 -; INST: brlo .+4 -; INST: brlo .+8 -; INST: brlo .+0 - - ; BRCC - brcc .+66 - brcc .-22 - brbc 0, .+66 - brbc 0, baz - -; CHECK: brcc .Ltmp9+66 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp9+66, kind: fixup_7_pcrel -; CHECK: brcc .Ltmp10-22 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp10-22, kind: fixup_7_pcrel -; CHECK: brcc .Ltmp11+66 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp11+66, kind: fixup_7_pcrel -; CHECK: brcc baz ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: baz, kind: fixup_7_pcrel - -; INST: brsh .+66 -; INST: brsh .-22 -; INST: brsh .+66 -; INST: brsh .+0 - -; BRSH - brsh .+32 - brsh .+70 - brsh car - -; CHECK: brsh .Ltmp12+32 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp12+32, kind: fixup_7_pcrel -; CHECK: brsh .Ltmp13+70 ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp13+70, kind: fixup_7_pcrel -; CHECK: brsh car ; encoding: [0bAAAAA000,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: car, kind: fixup_7_pcrel - -; INST: brsh .+32 -; INST: brsh .+70 -; INST: brsh .+0 - -baz: - - ; BRLO - brlo .+12 - brlo .+28 - brlo car - -; CHECK: brlo .Ltmp14+12 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp14+12, kind: fixup_7_pcrel -; CHECK: brlo .Ltmp15+28 ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp15+28, kind: fixup_7_pcrel -; CHECK: brlo car ; encoding: [0bAAAAA000,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: car, kind: fixup_7_pcrel - -; INST-LABEL: : -; INST: brlo .+12 -; INST: brlo .+28 -; INST: brlo .+0 - - ; BRMI - brmi .+66 - brmi .+58 - brmi car - -; CHECK: brmi .Ltmp16+66 ; encoding: [0bAAAAA010,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp16+66, kind: fixup_7_pcrel -; CHECK: brmi .Ltmp17+58 ; encoding: [0bAAAAA010,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp17+58, kind: fixup_7_pcrel -; CHECK: brmi car ; encoding: [0bAAAAA010,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: car, kind: fixup_7_pcrel - -; INST: brmi .+66 -; INST: brmi .+58 -; INST: brmi .+0 - - ; BRPL - brpl .-12 - brpl .+18 - brpl car - -; CHECK: brpl .Ltmp18-12 ; encoding: [0bAAAAA010,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp18-12, kind: fixup_7_pcrel -; CHECK: brpl .Ltmp19+18 ; encoding: [0bAAAAA010,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp19+18, kind: fixup_7_pcrel -; CHECK: brpl car ; encoding: [0bAAAAA010,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: car, kind: fixup_7_pcrel - -; INST: brpl .-12 -; INST: brpl .+18 -; INST: brpl .+0 - -; BRGE - brge .+50 - brge .+42 - brge car - -; CHECK: brge .Ltmp20+50 ; encoding: [0bAAAAA100,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp20+50, kind: fixup_7_pcrel -; CHECK: brge .Ltmp21+42 ; encoding: [0bAAAAA100,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp21+42, kind: fixup_7_pcrel -; CHECK: brge car ; encoding: [0bAAAAA100,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: car, kind: fixup_7_pcrel - -; INST: brge .+50 -; INST: brge .+42 -; INST: brge .+0 - -car: - ; BRLT - brlt .+16 - brlt .+2 - brlt end - -; CHECK: brlt .Ltmp22+16 ; encoding: [0bAAAAA100,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp22+16, kind: fixup_7_pcrel -; CHECK: brlt .Ltmp23+2 ; encoding: [0bAAAAA100,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp23+2, kind: fixup_7_pcrel -; CHECK: brlt end ; encoding: [0bAAAAA100,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST-LABEL: : -; INST: brlt .+16 -; INST: brlt .+2 -; INST: brlt .+0 - - ; BRHS - brhs .-66 - brhs .+14 - brhs just_another_label - -; CHECK: brhs .Ltmp24-66 ; encoding: [0bAAAAA101,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp24-66, kind: fixup_7_pcrel -; CHECK: brhs .Ltmp25+14 ; encoding: [0bAAAAA101,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp25+14, kind: fixup_7_pcrel -; CHECK: brhs just_another_label ; encoding: [0bAAAAA101,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: just_another_label, kind: fixup_7_pcrel - -; INST: brhs .-66 -; INST: brhs .+14 -; INST: brhs .+0 - - ; BRHC - brhc .+12 - brhc .+14 - brhc just_another_label - -; CHECK: brhc .Ltmp26+12 ; encoding: [0bAAAAA101,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp26+12, kind: fixup_7_pcrel -; CHECK: brhc .Ltmp27+14 ; encoding: [0bAAAAA101,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp27+14, kind: fixup_7_pcrel -; CHECK: brhc just_another_label ; encoding: [0bAAAAA101,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: just_another_label, kind: fixup_7_pcrel - -; INST: brhc .+12 -; INST: brhc .+14 -; INST: brhc .+0 - - ; BRTS - brts .+18 - brts .+22 - brts just_another_label - -; CHECK: brts .Ltmp28+18 ; encoding: [0bAAAAA110,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp28+18, kind: fixup_7_pcrel -; CHECK: brts .Ltmp29+22 ; encoding: [0bAAAAA110,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp29+22, kind: fixup_7_pcrel -; CHECK: brts just_another_label ; encoding: [0bAAAAA110,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: just_another_label, kind: fixup_7_pcrel - -; INST: brts .+18 -; INST: brts .+22 -; INST: brts .+0 - -just_another_label: - ; BRTC - brtc .+52 - brtc .+50 - brtc end - -; CHECK: brtc .Ltmp30+52 ; encoding: [0bAAAAA110,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp30+52, kind: fixup_7_pcrel -; CHECK: brtc .Ltmp31+50 ; encoding: [0bAAAAA110,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp31+50, kind: fixup_7_pcrel -; CHECK: brtc end ; encoding: [0bAAAAA110,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST-LABEL: : -; INST: brtc .+52 -; INST: brtc .+50 -; INST: brtc .+0 - - ; BRVS - brvs .+18 - brvs .+32 - brvs end - -; CHECK: brvs .Ltmp32+18 ; encoding: [0bAAAAA011,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp32+18, kind: fixup_7_pcrel -; CHECK: brvs .Ltmp33+32 ; encoding: [0bAAAAA011,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp33+32, kind: fixup_7_pcrel -; CHECK: brvs end ; encoding: [0bAAAAA011,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST: brvs .+18 -; INST: brvs .+32 -; INST: brvs .+0 - - ; BRVC - brvc .-28 - brvc .-62 - brvc end - -; CHECK: brvc .Ltmp34-28 ; encoding: [0bAAAAA011,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp34-28, kind: fixup_7_pcrel -; CHECK: brvc .Ltmp35-62 ; encoding: [0bAAAAA011,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp35-62, kind: fixup_7_pcrel -; CHECK: brvc end ; encoding: [0bAAAAA011,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST: brvc .-28 -; INST: brvc .-62 -; INST: brvc .+0 - - ; BRIE - brie .+20 - brie .+40 - brie end - -; CHECK: brie .Ltmp36+20 ; encoding: [0bAAAAA111,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp36+20, kind: fixup_7_pcrel -; CHECK: brie .Ltmp37+40 ; encoding: [0bAAAAA111,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp37+40, kind: fixup_7_pcrel -; CHECK: brie end ; encoding: [0bAAAAA111,0b111100AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST: brie .+20 -; INST: brie .+40 -; INST: brie .+0 - - ; BRID - brid .+42 - brid .+62 - brid end - -; CHECK: brid .Ltmp38+42 ; encoding: [0bAAAAA111,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp38+42, kind: fixup_7_pcrel -; CHECK: brid .Ltmp39+62 ; encoding: [0bAAAAA111,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp39+62, kind: fixup_7_pcrel -; CHECK: brid end ; encoding: [0bAAAAA111,0b111101AA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_7_pcrel - -; INST: brid .+42 -; INST: brid .+62 -; INST: brid .+0 - -end: diff --git a/llvm/test/MC/AVR/inst-rcall.s b/llvm/test/MC/AVR/inst-rcall.s index 006013aa6ea946..a4ec32d05b1a43 100644 --- a/llvm/test/MC/AVR/inst-rcall.s +++ b/llvm/test/MC/AVR/inst-rcall.s @@ -1,27 +1,28 @@ ; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; ; RUN: llvm-mc -filetype=obj -triple avr < %s \ -; RUN: | llvm-objdump -d - | FileCheck --check-prefix=INST %s - +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s foo: - rcall .+0 rcall .-8 rcall .+12 rcall .+46 .short 0xdfea -; CHECK: rcall .Ltmp0+0 ; encoding: [A,0b1101AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp0+0, kind: fixup_13_pcrel -; CHECK: rcall .Ltmp1-8 ; encoding: [A,0b1101AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp1-8, kind: fixup_13_pcrel -; CHECK: rcall .Ltmp2+12 ; encoding: [A,0b1101AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp2+12, kind: fixup_13_pcrel -; CHECK: rcall .Ltmp3+46 ; encoding: [A,0b1101AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp3+46, kind: fixup_13_pcrel +; CHECK: rcall (.Ltmp0+0)+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+0)+2, kind: fixup_13_pcrel +; CHECK: rcall (.Ltmp1-8)+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-8)+2, kind: fixup_13_pcrel +; CHECK: rcall (.Ltmp2+12)+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+12)+2, kind: fixup_13_pcrel +; CHECK: rcall (.Ltmp3+46)+2 ; encoding: [A,0b1101AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+46)+2, kind: fixup_13_pcrel -; INST: 00 d0 rcall .+0 -; INST: fc df rcall .-8 -; INST: 06 d0 rcall .+12 -; INST: 17 d0 rcall .+46 -; INST: ea df rcall .-44 +; INST-LABEL: : +; INST-NEXT: 00 d0 rcall .+0 +; INST-NEXT: fc df rcall .-8 +; INST-NEXT: 06 d0 rcall .+12 +; INST-NEXT: 17 d0 rcall .+46 +; INST-NEXT: ea df rcall .-44 diff --git a/llvm/test/MC/AVR/inst-rjmp.s b/llvm/test/MC/AVR/inst-rjmp.s index 3dbac39e055ddf..cc843a58b55d2c 100644 --- a/llvm/test/MC/AVR/inst-rjmp.s +++ b/llvm/test/MC/AVR/inst-rjmp.s @@ -1,49 +1,56 @@ ; RUN: llvm-mc -triple avr -show-encoding < %s | FileCheck %s +; ; RUN: llvm-mc -filetype=obj -triple avr < %s \ -; RUN: | llvm-objdump -d - | FileCheck --check-prefix=INST %s - +; RUN: | llvm-objdump -d - \ +; RUN: | FileCheck --check-prefix=INST %s foo: - rjmp .+2 rjmp .-2 rjmp foo rjmp .+8 rjmp end rjmp .+0 + end: rjmp .-4 rjmp .-6 + x: rjmp x .short 0xc00f -; CHECK: rjmp .Ltmp0+2 ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp0+2, kind: fixup_13_pcrel -; CHECK: rjmp .Ltmp1-2 ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp1-2, kind: fixup_13_pcrel -; CHECK: rjmp foo ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel -; CHECK: rjmp .Ltmp2+8 ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp2+8, kind: fixup_13_pcrel -; CHECK: rjmp end ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel -; CHECK: rjmp .Ltmp3+0 ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp3+0, kind: fixup_13_pcrel -; CHECK: rjmp .Ltmp4-4 ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp4-4, kind: fixup_13_pcrel -; CHECK: rjmp .Ltmp5-6 ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: .Ltmp5-6, kind: fixup_13_pcrel -; CHECK: rjmp x ; encoding: [A,0b1100AAAA] -; CHECK: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp0+2)+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp0+2)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp1-2)+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp1-2)+2, kind: fixup_13_pcrel +; CHECK: rjmp foo ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: foo, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp2+8)+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp2+8)+2, kind: fixup_13_pcrel +; CHECK: rjmp end ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: end, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp3+0)+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp3+0)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp4-4)+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp4-4)+2, kind: fixup_13_pcrel +; CHECK: rjmp (.Ltmp5-6)+2 ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: (.Ltmp5-6)+2, kind: fixup_13_pcrel +; CHECK: rjmp x ; encoding: [A,0b1100AAAA] +; CHECK-NEXT: ; fixup A - offset: 0, value: x, kind: fixup_13_pcrel -; INST: 01 c0 rjmp .+2 -; INST: ff cf rjmp .-2 -; INST: 00 c0 rjmp .+0 -; INST: 04 c0 rjmp .+8 -; INST: 00 c0 rjmp .+0 -; INST: 00 c0 rjmp .+0 -; INST: fe cf rjmp .-4 -; INST: fd cf rjmp .-6 -; INST: 00 c0 rjmp .+0 -; INST: 0f c0 rjmp .+30 +; INST-LABEL: : +; INST-NEXT: 01 c0 rjmp .+2 +; INST-NEXT: ff cf rjmp .-2 +; INST-NEXT: fd cf rjmp .-6 +; INST-NEXT: 04 c0 rjmp .+8 +; INST-NEXT: 01 c0 rjmp .+2 +; INST-NEXT: 00 c0 rjmp .+0 +; INST-EMPTY: +; INST-LABEL: : +; INST-NEXT: fe cf rjmp .-4 +; INST-NEXT: fd cf rjmp .-6 +; INST-EMPTY: +; INST-LABEL: : +; INST-NEXT: ff cf rjmp .-2 +; INST-NEXT: 0f c0 rjmp .+30 diff --git a/llvm/test/MC/LoongArch/Macros/macros-li.s b/llvm/test/MC/LoongArch/Macros/macros-li.s index 994aa439effa1b..8ac82a766f6043 100644 --- a/llvm/test/MC/LoongArch/Macros/macros-li.s +++ b/llvm/test/MC/LoongArch/Macros/macros-li.s @@ -45,8 +45,7 @@ li.d $a0, 0x7ffff00000800 li.d $a0, 0x8000000000fff # CHECK: ori $a0, $zero, 4095 -# CHECK-NEXT: lu32i.d $a0, -524288 -# CHECK-NEXT: lu52i.d $a0, $a0, 0 +# CHECK-NEXT: bstrins.d $a0, $a0, 51, 51 li.d $a0, 0x8000080000800 # CHECK: lu12i.w $a0, -524288 diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 45335b348b7e8f..48aec4bc52a0c5 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -854,85 +854,85 @@ main: # CHECK: f16x8.replace_lane 1 # encoding: [0xfd,0xa2,0x02,0x01] f16x8.replace_lane 1 - # CHECK: f16x8.add # encoding: [0xfd,0xb4,0x02] + # CHECK: f16x8.add # encoding: [0xfd,0xbd,0x02] f16x8.add - # CHECK: f16x8.sub # encoding: [0xfd,0xb5,0x02] + # CHECK: f16x8.sub # encoding: [0xfd,0xbe,0x02] f16x8.sub - # CHECK: f16x8.mul # encoding: [0xfd,0xb6,0x02] + # CHECK: f16x8.mul # encoding: [0xfd,0xbf,0x02] f16x8.mul - # CHECK: f16x8.div # encoding: [0xfd,0xb7,0x02] + # CHECK: f16x8.div # encoding: [0xfd,0xc0,0x02] f16x8.div - # CHECK: f16x8.min # encoding: [0xfd,0xb8,0x02] + # CHECK: f16x8.min # encoding: [0xfd,0xc1,0x02] f16x8.min - # CHECK: f16x8.max # encoding: [0xfd,0xb9,0x02] + # CHECK: f16x8.max # encoding: [0xfd,0xc2,0x02] f16x8.max - # CHECK: f16x8.pmin # encoding: [0xfd,0xba,0x02] + # CHECK: f16x8.pmin # encoding: [0xfd,0xc3,0x02] f16x8.pmin - # CHECK: f16x8.pmax # encoding: [0xfd,0xbb,0x02] + # CHECK: f16x8.pmax # encoding: [0xfd,0xc4,0x02] f16x8.pmax - # CHECK: f16x8.eq # encoding: [0xfd,0xc0,0x02] + # CHECK: f16x8.eq # encoding: [0xfd,0xb7,0x02] f16x8.eq - # CHECK: f16x8.ne # encoding: [0xfd,0xc1,0x02] + # CHECK: f16x8.ne # encoding: [0xfd,0xb8,0x02] f16x8.ne - # CHECK: f16x8.lt # encoding: [0xfd,0xc2,0x02] + # CHECK: f16x8.lt # encoding: [0xfd,0xb9,0x02] f16x8.lt - # CHECK: f16x8.gt # encoding: [0xfd,0xc3,0x02] + # CHECK: f16x8.gt # encoding: [0xfd,0xba,0x02] f16x8.gt - # CHECK: f16x8.le # encoding: [0xfd,0xc4,0x02] + # CHECK: f16x8.le # encoding: [0xfd,0xbb,0x02] f16x8.le - # CHECK: f16x8.ge # encoding: [0xfd,0xc5,0x02] + # CHECK: f16x8.ge # encoding: [0xfd,0xbc,0x02] f16x8.ge - # CHECK: f16x8.abs # encoding: [0xfd,0xb1,0x02] + # CHECK: f16x8.abs # encoding: [0xfd,0xb0,0x02] f16x8.abs - # CHECK: f16x8.neg # encoding: [0xfd,0xb2,0x02] + # CHECK: f16x8.neg # encoding: [0xfd,0xb1,0x02] f16x8.neg - # CHECK: f16x8.sqrt # encoding: [0xfd,0xb3,0x02] + # CHECK: f16x8.sqrt # encoding: [0xfd,0xb2,0x02] f16x8.sqrt - # CHECK: f16x8.ceil # encoding: [0xfd,0xbc,0x02] + # CHECK: f16x8.ceil # encoding: [0xfd,0xb3,0x02] f16x8.ceil - # CHECK: f16x8.floor # encoding: [0xfd,0xbd,0x02] + # CHECK: f16x8.floor # encoding: [0xfd,0xb4,0x02] f16x8.floor - # CHECK: f16x8.trunc # encoding: [0xfd,0xbe,0x02] + # CHECK: f16x8.trunc # encoding: [0xfd,0xb5,0x02] f16x8.trunc - # CHECK: f16x8.nearest # encoding: [0xfd,0xbf,0x02] + # CHECK: f16x8.nearest # encoding: [0xfd,0xb6,0x02] f16x8.nearest - # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xc6,0x02] + # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xce,0x02] f16x8.relaxed_madd - # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xc7,0x02] + # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xcf,0x02] f16x8.relaxed_nmadd - # CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc8,0x02] + # CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc5,0x02] i16x8.trunc_sat_f16x8_s - # CHECK: i16x8.trunc_sat_f16x8_u # encoding: [0xfd,0xc9,0x02] + # CHECK: i16x8.trunc_sat_f16x8_u # encoding: [0xfd,0xc6,0x02] i16x8.trunc_sat_f16x8_u - # CHECK: f16x8.convert_i16x8_s # encoding: [0xfd,0xca,0x02] + # CHECK: f16x8.convert_i16x8_s # encoding: [0xfd,0xc7,0x02] f16x8.convert_i16x8_s - # CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xcb,0x02] + # CHECK: f16x8.convert_i16x8_u # encoding: [0xfd,0xc8,0x02] f16x8.convert_i16x8_u end_function diff --git a/llvm/test/ObjectYAML/DXContainer/DomainMaskVectors.yaml b/llvm/test/ObjectYAML/DXContainer/DomainMaskVectors.yaml index f3cfa90d1cf901..1a2f341f03ef71 100644 --- a/llvm/test/ObjectYAML/DXContainer/DomainMaskVectors.yaml +++ b/llvm/test/ObjectYAML/DXContainer/DomainMaskVectors.yaml @@ -75,7 +75,8 @@ Parts: LowerBound: 0 UpperBound: 0 Kind: CBuffer - Flags: 0 + Flags: + UsedByAtomic64: true SigInputElements: - Name: AAA_HSFoo Indices: [ 0 ] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-amplification.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-amplification.yaml index 8bae742b573919..1e00e604f3e248 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-amplification.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-amplification.yaml @@ -30,13 +30,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -77,13 +79,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-compute.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-compute.yaml index 74eb2b86ad01b2..c8bfd9acf68efc 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-compute.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-compute.yaml @@ -29,13 +29,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -75,13 +77,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-domain.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-domain.yaml index 38f81bd93d67cf..021fb1b5fffb1f 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-domain.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-domain.yaml @@ -33,13 +33,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -84,13 +86,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-geometry.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-geometry.yaml index 99fdbbb7c9edaf..74e32efbe2c659 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-geometry.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-geometry.yaml @@ -34,13 +34,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -85,13 +87,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-hull.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-hull.yaml index de8af95dbcbd89..79d92e2f0c5e6f 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-hull.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-hull.yaml @@ -34,13 +34,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -86,13 +88,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-mesh.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-mesh.yaml index 78fc077348f42a..27bf148126005b 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-mesh.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-mesh.yaml @@ -36,13 +36,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -89,13 +91,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-pixel.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-pixel.yaml index ebe1e51faff3f8..1a1a74d7f3121d 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-pixel.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-pixel.yaml @@ -31,13 +31,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -79,13 +81,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv2-vertex.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv2-vertex.yaml index 2bca2f211136b2..6b0ba5eb3d19f0 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv2-vertex.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv2-vertex.yaml @@ -30,13 +30,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -77,13 +79,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-amplification.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-amplification.yaml index 9e31d40ec7c1b4..6f7d151b266c9d 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-amplification.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-amplification.yaml @@ -31,13 +31,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -79,13 +81,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-compute.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-compute.yaml index 530a8597cb6498..2de3d435af1de9 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-compute.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-compute.yaml @@ -30,13 +30,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -77,13 +79,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-domain.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-domain.yaml index a71ab67633eb6f..91afb2f11fc7c4 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-domain.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-domain.yaml @@ -34,13 +34,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -86,13 +88,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-geometry.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-geometry.yaml index db530253c6a745..f661e81fe869b9 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-geometry.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-geometry.yaml @@ -35,13 +35,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -87,13 +89,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-hull.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-hull.yaml index 3e3ba493e98450..4140c3180e32ca 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-hull.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-hull.yaml @@ -35,13 +35,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -88,13 +90,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-mesh.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-mesh.yaml index 57bbcecfa1796b..03ce5b583315d0 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-mesh.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-mesh.yaml @@ -37,13 +37,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -91,13 +93,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-pixel.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-pixel.yaml index c94c234142a34b..2434567b2a6f5c 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-pixel.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-pixel.yaml @@ -32,13 +32,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: false SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -81,13 +83,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/ObjectYAML/DXContainer/PSVv3-vertex.yaml b/llvm/test/ObjectYAML/DXContainer/PSVv3-vertex.yaml index 697fa870c2257c..b43f6aa6b71d4a 100644 --- a/llvm/test/ObjectYAML/DXContainer/PSVv3-vertex.yaml +++ b/llvm/test/ObjectYAML/DXContainer/PSVv3-vertex.yaml @@ -31,13 +31,15 @@ Parts: LowerBound: 3 UpperBound: 4 Kind: TextureCube - Flags: 0 + Flags: + UsedByAtomic64: false - Type: Invalid Space: 32768 LowerBound: 8388608 UpperBound: 2147483648 Kind: Invalid - Flags: 0 + Flags: + UsedByAtomic64: true SigInputElements: [] SigOutputElements: [] SigPatchOrPrimElements: [] @@ -79,13 +81,15 @@ Parts: # CHECK-NEXT: LowerBound: 3 # CHECK-NEXT: UpperBound: 4 # CHECK-NEXT: Kind: TextureCube -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: false # CHECK-NEXT: - Type: Invalid # CHECK-NEXT: Space: 32768 # CHECK-NEXT: LowerBound: 8388608 # CHECK-NEXT: UpperBound: 2147483648 # CHECK-NEXT: Kind: Invalid -# CHECK-NEXT: Flags: 0 +# CHECK-NEXT: Flags: +# CHECK-NEXT: UsedByAtomic64: true # CHECK-NEXT: SigInputElements: [] # CHECK-NEXT: SigOutputElements: [] # CHECK-NEXT: SigPatchOrPrimElements: [] diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 781ba636c3ab3c..2a6780b60211cf 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -2685,11 +2685,291 @@ define @scalable_splat_zero() { ; See https://github.com/llvm/llvm-project/issues/78507 define double @call_abs(double noundef %__x) { +; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; TUNIT-LABEL: define noundef nofpclass(ninf nzero nsub nnorm) double @call_abs +; TUNIT-SAME: (double noundef [[__X:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: [[ABS:%.*]] = tail call noundef nofpclass(ninf nzero nsub nnorm) double @llvm.fabs.f64(double noundef [[__X]]) #[[ATTR22]] +; TUNIT-NEXT: ret double [[ABS]] +; +; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CGSCC-LABEL: define noundef nofpclass(ninf nzero nsub nnorm) double @call_abs +; CGSCC-SAME: (double noundef [[__X:%.*]]) #[[ATTR3]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: [[ABS:%.*]] = tail call noundef nofpclass(ninf nzero nsub nnorm) double @llvm.fabs.f64(double noundef [[__X]]) #[[ATTR19]] +; CGSCC-NEXT: ret double [[ABS]] +; entry: %abs = tail call double @llvm.fabs.f64(double %__x) ret double %abs } +define float @bitcast_to_float_sign_0(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) float @bitcast_to_float_sign_0 +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ARG]], 1 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[SHR]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %shr = lshr i32 %arg, 1 + %cast = bitcast i32 %shr to float + ret float %cast +} + +define float @bitcast_to_float_nnan(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) float @bitcast_to_float_nnan +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ARG]], 2 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[SHR]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %shr = lshr i32 %arg, 2 + %cast = bitcast i32 %shr to float + ret float %cast +} + +define float @bitcast_to_float_sign_1(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) float @bitcast_to_float_sign_1 +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], -2147483648 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[OR]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %or = or i32 %arg, -2147483648 + %cast = bitcast i32 %or to float + ret float %cast +} + +define float @bitcast_to_float_nan(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(inf zero sub norm) float @bitcast_to_float_nan +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 2139095041 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[OR]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %or = or i32 %arg, 2139095041 + %cast = bitcast i32 %or to float + ret float %cast +} + +define float @bitcast_to_float_zero(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf sub norm) float @bitcast_to_float_zero +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[ARG]], 31 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[SHL]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %shl = shl i32 %arg, 31 + %cast = bitcast i32 %shl to float + ret float %cast +} + +define float @bitcast_to_float_nzero(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(zero) float @bitcast_to_float_nzero +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 134217728 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[OR]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %or = or i32 %arg, 134217728 + %cast = bitcast i32 %or to float + ret float %cast +} + +define float @bitcast_to_float_inf(i32 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan zero sub norm) float @bitcast_to_float_inf +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = shl i32 [[ARG]], 31 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHR]], 2139095040 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i32 [[OR]] to float +; CHECK-NEXT: ret float [[CAST]] +; + %shr = shl i32 %arg, 31 + %or = or i32 %shr, 2139095040 + %cast = bitcast i32 %or to float + ret float %cast +} + +define double @bitcast_to_double_sign_0(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) double @bitcast_to_double_sign_0 +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[ARG]], 1 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[SHR]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %shr = lshr i64 %arg, 1 + %cast = bitcast i64 %shr to double + ret double %cast +} + +define double @bitcast_to_double_nnan(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) double @bitcast_to_double_nnan +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[ARG]], 2 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[SHR]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %shr = lshr i64 %arg, 2 + %cast = bitcast i64 %shr to double + ret double %cast +} + +define double @bitcast_to_double_sign_1(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) double @bitcast_to_double_sign_1 +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or i64 [[ARG]], -9223372036854775808 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[OR]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %or = or i64 %arg, -9223372036854775808 + %cast = bitcast i64 %or to double + ret double %cast +} + +define double @bitcast_to_double_nan(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(inf zero sub norm) double @bitcast_to_double_nan +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or i64 [[ARG]], -4503599627370495 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[OR]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %or = or i64 %arg, -4503599627370495 + %cast = bitcast i64 %or to double + ret double %cast +} + + +define double @bitcast_to_double_zero(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf sub norm) double @bitcast_to_double_zero +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[ARG]], 63 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[SHL]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %shl = shl i64 %arg, 63 + %cast = bitcast i64 %shl to double + ret double %cast +} + +define double @bitcast_to_double_nzero(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(zero) double @bitcast_to_double_nzero +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or i64 [[ARG]], 1152921504606846976 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[OR]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %or = or i64 %arg, 1152921504606846976 + %cast = bitcast i64 %or to double + ret double %cast +} + +define double @bitcast_to_double_inf(i64 %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan zero sub norm) double @bitcast_to_double_inf +; CHECK-SAME: (i64 [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = shl i64 [[ARG]], 63 +; CHECK-NEXT: [[OR:%.*]] = or i64 [[SHR]], 9218868437227405312 +; CHECK-NEXT: [[CAST:%.*]] = bitcast i64 [[OR]] to double +; CHECK-NEXT: ret double [[CAST]] +; + %shr = shl i64 %arg, 63 + %or = or i64 %shr, 9218868437227405312 + %cast = bitcast i64 %or to double + ret double %cast +} + + +define <2 x float> @bitcast_to_float_vect_sign_0(<2 x i32> %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) <2 x float> @bitcast_to_float_vect_sign_0 +; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[SHR]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[CAST]] +; + %shr = lshr <2 x i32> %arg, + %cast = bitcast <2 x i32> %shr to <2 x float> + ret <2 x float> %cast +} + +define <2 x float> @bitcast_to_float_vect_nnan(<2 x i32> %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) <2 x float> @bitcast_to_float_vect_nnan +; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[ARG]], +; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[SHR]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[CAST]] +; + %shr = lshr <2 x i32> %arg, + %cast = bitcast <2 x i32> %shr to <2 x float> + ret <2 x float> %cast +} + +define <2 x float> @bitcast_to_float_vect_sign_1(<2 x i32> %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) <2 x float> @bitcast_to_float_vect_sign_1 +; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[CAST]] +; + %or = or <2 x i32> %arg, + %cast = bitcast <2 x i32> %or to <2 x float> + ret <2 x float> %cast +} + +define <2 x float> @bitcast_to_float_vect_nan(<2 x i32> %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define nofpclass(inf zero sub norm) <2 x float> @bitcast_to_float_vect_nan +; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[CAST]] +; + %or = or <2 x i32> %arg, + %cast = bitcast <2 x i32> %or to <2 x float> + ret <2 x float> %cast +} + +define <2 x float> @bitcast_to_float_vect_conservative_1(<2 x i32> %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define <2 x float> @bitcast_to_float_vect_conservative_1 +; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[CAST]] +; + %or = or <2 x i32> %arg, + %cast = bitcast <2 x i32> %or to <2 x float> + ret <2 x float> %cast +} + +define <2 x float> @bitcast_to_float_vect_conservative_2(<2 x i32> %arg) { +; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define <2 x float> @bitcast_to_float_vect_conservative_2 +; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG]], +; CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[CAST]] +; + %or = or <2 x i32> %arg, + %cast = bitcast <2 x i32> %or to <2 x float> + ret <2 x float> %cast +} + declare i64 @_Z13get_global_idj(i32 noundef) attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" } diff --git a/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll b/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll index bd05cffcaa8b7f..187278d1c9035a 100644 --- a/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll +++ b/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll @@ -1,12 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=i686-unknown-unknown -S -passes=inline | FileCheck %s define i32 @func_target_cpu_nocona() #0 { +; CHECK-LABEL: @func_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +; ret i32 0 } -; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona( -; CHECK-NEXT: ret i32 0 define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 { +; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +; %call = call i32 @func_target_cpu_nocona() ret i32 %call } diff --git a/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll b/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll index b0a145d54cf593..e6693a637d820d 100644 --- a/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll +++ b/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll @@ -1,37 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -passes=inline | FileCheck %s define i32 @func_target_cpu_base() #0 { +; CHECK-LABEL: @func_target_cpu_base( +; CHECK-NEXT: ret i32 0 +; ret i32 0 } -; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base( -; CHECK-NEXT: ret i32 0 define i32 @target_cpu_k8_call_target_cpu_base() #1 { +; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +; %call = call i32 @func_target_cpu_base() ret i32 %call } -; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base( -; CHECK-NEXT: ret i32 0 define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 { +; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +; %call = call i32 @func_target_cpu_base() ret i32 %call } -; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base( -; CHECK-NEXT: ret i32 0 define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 { +; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +; %call = call i32 @func_target_cpu_base() ret i32 %call } define i32 @func_target_cpu_nocona() #4 { +; CHECK-LABEL: @func_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +; ret i32 0 } -; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona( -; CHECK-NEXT: ret i32 0 define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 { +; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +; %call = call i32 @func_target_cpu_nocona() ret i32 %call } diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll index 80d8e1b16ed28b..3c44da84813fdb 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512-inseltpoison.ll @@ -1814,1366 +1814,6 @@ define double @test_mask3_vfnmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> ret double %13 } -declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) - -define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_si_256( -; CHECK-NEXT: ret <8 x i32> [[A0:%.*]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - ret <8 x i32> %1 -} - -define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP2]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) - ret <8 x i32> %1 -} - -define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP3]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - ret <8 x i32> %1 -} - -define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP3]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - ret <8 x i32> %1 -} - -define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP3]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) - -define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_sf_256( -; CHECK-NEXT: ret <8 x float> [[A0:%.*]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - ret <8 x float> %1 -} - -define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP2]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) - ret <8 x float> %1 -} - -define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP3]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - ret <8 x float> %1 -} - -define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP3]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - ret <8 x float> %1 -} - -define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP3]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>) - -define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_di_256( -; CHECK-NEXT: ret <4 x i64> [[A0:%.*]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - ret <4 x i64> %1 -} - -define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP2]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer) - ret <4 x i64> %1 -} - -define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP3]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - ret <4 x i64> %1 -} - -define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP3]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - ret <4 x i64> %1 -} - -define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP3]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) - -define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_df_256( -; CHECK-NEXT: ret <4 x double> [[A0:%.*]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - ret <4 x double> %1 -} - -define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP2]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer) - ret <4 x double> %1 -} - -define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - ret <4 x double> %1 -} - -define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - ret <4 x double> %1 -} - -define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) - -define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_si_512( -; CHECK-NEXT: ret <16 x i32> [[A0:%.*]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - ret <16 x i32> %1 -} - -define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP2]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x i32> [[TMP1]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer) - ret <16 x i32> %1 -} - -define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP3]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i32> [[TMP1]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - ret <16 x i32> %1 -} - -define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP3]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i32> [[TMP1]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - ret <16 x i32> %1 -} - -define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP3]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) - -define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_sf_512( -; CHECK-NEXT: ret <16 x float> [[A0:%.*]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - ret <16 x float> %1 -} - -define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP2]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x float> [[TMP1]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer) - ret <16 x float> %1 -} - -define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP3]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: ret <16 x float> [[TMP1]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - ret <16 x float> %1 -} - -define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP3]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: ret <16 x float> [[TMP1]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - ret <16 x float> %1 -} - -define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP3]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) - -define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_di_512( -; CHECK-NEXT: ret <8 x i64> [[A0:%.*]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - ret <8 x i64> %1 -} - -define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP2]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer) - ret <8 x i64> %1 -} - -define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP3]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - ret <8 x i64> %1 -} - -define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP3]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - ret <8 x i64> %1 -} - -define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP3]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) - -define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_df_512( -; CHECK-NEXT: ret <8 x double> [[A0:%.*]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - ret <8 x double> %1 -} - -define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP2]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x double> [[TMP1]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer) - ret <8 x double> %1 -} - -define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP3]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: ret <8 x double> [[TMP1]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - ret <8 x double> %1 -} - -define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP3]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: ret <8 x double> [[TMP1]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - ret <8 x double> %1 -} - -define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP3]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) - -define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_hi_128( -; CHECK-NEXT: ret <8 x i16> [[A0:%.*]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - ret <8 x i16> %1 -} - -define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP2]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x i16> [[TMP1]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer) - ret <8 x i16> %1 -} - -define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP3]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i16> [[TMP1]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - ret <8 x i16> %1 -} - -define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP3]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i16> [[TMP1]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - ret <8 x i16> %1 -} - -define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP3]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) - -define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_hi_256( -; CHECK-NEXT: ret <16 x i16> [[A0:%.*]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - ret <16 x i16> %1 -} - -define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP2]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer) - ret <16 x i16> %1 -} - -define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP3]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - ret <16 x i16> %1 -} - -define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP3]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - ret <16 x i16> %1 -} - -define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP3]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) - -define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_hi_512( -; CHECK-NEXT: ret <32 x i16> [[A0:%.*]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - ret <32 x i16> %1 -} - -define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP2]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: ret <32 x i16> [[TMP1]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer) - ret <32 x i16> %1 -} - -define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP3]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i16> [[TMP1]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - ret <32 x i16> %1 -} - -define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP3]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i16> [[TMP1]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - ret <32 x i16> %1 -} - -define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP3]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>) - -define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_qi_128( -; CHECK-NEXT: ret <16 x i8> [[A0:%.*]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - ret <16 x i8> %1 -} - -define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP2]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x i8> [[TMP1]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer) - ret <16 x i8> %1 -} - -define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP3]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i8> [[TMP1]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - ret <16 x i8> %1 -} - -define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP3]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i8> [[TMP1]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - ret <16 x i8> %1 -} - -define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP3]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>) - -define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_qi_256( -; CHECK-NEXT: ret <32 x i8> [[A0:%.*]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - ret <32 x i8> %1 -} - -define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP2]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: ret <32 x i8> [[TMP1]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer) - ret <32 x i8> %1 -} - -define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP3]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i8> [[TMP1]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - ret <32 x i8> %1 -} - -define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP3]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i8> [[TMP1]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - ret <32 x i8> %1 -} - -define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP3]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>) - -define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_qi_512( -; CHECK-NEXT: ret <64 x i8> [[A0:%.*]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - ret <64 x i8> %1 -} - -define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP2]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - -define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer -; CHECK-NEXT: ret <64 x i8> [[TMP1]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer) - ret <64 x i8> %1 -} - -define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP3]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - -define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: ret <64 x i8> [[TMP1]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - ret <64 x i8> %1 -} - -define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP3]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - -define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: ret <64 x i8> [[TMP1]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - ret <64 x i8> %1 -} - -define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP3]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32) define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll index 906e84b6074811..d89cf6b0bb9868 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -1814,1366 +1814,6 @@ define double @test_mask3_vfnmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> ret double %13 } -declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) - -define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_si_256( -; CHECK-NEXT: ret <8 x i32> [[A0:%.*]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - ret <8 x i32> %1 -} - -define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP2]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) - ret <8 x i32> %1 -} - -define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP3]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - ret <8 x i32> %1 -} - -define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP3]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_si_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - ret <8 x i32> %1 -} - -define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_si_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i32> [[TMP3]] -; - %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru - ret <8 x i32> %3 -} - -declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) - -define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_sf_256( -; CHECK-NEXT: ret <8 x float> [[A0:%.*]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - ret <8 x float> %1 -} - -define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP2]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) - ret <8 x float> %1 -} - -define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP3]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - ret <8 x float> %1 -} - -define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP3]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_sf_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: ret <8 x float> [[TMP1]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - ret <8 x float> %1 -} - -define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_sf_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x float> [[TMP3]] -; - %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru - ret <8 x float> %3 -} - -declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>) - -define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_di_256( -; CHECK-NEXT: ret <4 x i64> [[A0:%.*]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - ret <4 x i64> %1 -} - -define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP2]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer) - ret <4 x i64> %1 -} - -define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP3]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - ret <4 x i64> %1 -} - -define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP3]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_di_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - ret <4 x i64> %1 -} - -define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_di_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x i64> [[TMP3]] -; - %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru - ret <4 x i64> %3 -} - -declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) - -define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_df_256( -; CHECK-NEXT: ret <4 x double> [[A0:%.*]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - ret <4 x double> %1 -} - -define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP2]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer) - ret <4 x double> %1 -} - -define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - ret <4 x double> %1 -} - -define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_df_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: ret <4 x double> [[TMP1]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - ret <4 x double> %1 -} - -define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_df_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <4 x double> [[TMP3]] -; - %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> - %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru - ret <4 x double> %3 -} - -declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) - -define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_si_512( -; CHECK-NEXT: ret <16 x i32> [[A0:%.*]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - ret <16 x i32> %1 -} - -define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP2]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x i32> [[TMP1]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer) - ret <16 x i32> %1 -} - -define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP3]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i32> [[TMP1]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - ret <16 x i32> %1 -} - -define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP3]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_si_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i32> [[TMP1]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - ret <16 x i32> %1 -} - -define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_si_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i32> [[TMP3]] -; - %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru - ret <16 x i32> %3 -} - -declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) - -define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_sf_512( -; CHECK-NEXT: ret <16 x float> [[A0:%.*]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - ret <16 x float> %1 -} - -define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP2]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x float> [[TMP1]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer) - ret <16 x float> %1 -} - -define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP3]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: ret <16 x float> [[TMP1]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - ret <16 x float> %1 -} - -define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP3]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_sf_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: ret <16 x float> [[TMP1]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - ret <16 x float> %1 -} - -define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_sf_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x float> [[TMP3]] -; - %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru - ret <16 x float> %3 -} - -declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) - -define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_di_512( -; CHECK-NEXT: ret <8 x i64> [[A0:%.*]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - ret <8 x i64> %1 -} - -define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP2]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer) - ret <8 x i64> %1 -} - -define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP3]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - ret <8 x i64> %1 -} - -define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP3]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_di_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - ret <8 x i64> %1 -} - -define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_di_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i64> [[TMP3]] -; - %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru - ret <8 x i64> %3 -} - -declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) - -define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_df_512( -; CHECK-NEXT: ret <8 x double> [[A0:%.*]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - ret <8 x double> %1 -} - -define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP2]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x double> [[TMP1]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer) - ret <8 x double> %1 -} - -define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP3]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: ret <8 x double> [[TMP1]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - ret <8 x double> %1 -} - -define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP3]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_df_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: ret <8 x double> [[TMP1]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - ret <8 x double> %1 -} - -define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_df_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x double> [[TMP3]] -; - %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru - ret <8 x double> %3 -} - -declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) - -define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_hi_128( -; CHECK-NEXT: ret <8 x i16> [[A0:%.*]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - ret <8 x i16> %1 -} - -define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP2]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: ret <8 x i16> [[TMP1]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer) - ret <8 x i16> %1 -} - -define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP3]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i16> [[TMP1]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - ret <8 x i16> %1 -} - -define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP3]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_hi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: ret <8 x i16> [[TMP1]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - ret <8 x i16> %1 -} - -define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_hi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <8 x i16> [[TMP3]] -; - %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru - ret <8 x i16> %3 -} - -declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) - -define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_hi_256( -; CHECK-NEXT: ret <16 x i16> [[A0:%.*]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - ret <16 x i16> %1 -} - -define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP2]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer) - ret <16 x i16> %1 -} - -define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP3]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - ret <16 x i16> %1 -} - -define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP3]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_hi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - ret <16 x i16> %1 -} - -define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_hi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i16> [[TMP3]] -; - %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru - ret <16 x i16> %3 -} - -declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) - -define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_hi_512( -; CHECK-NEXT: ret <32 x i16> [[A0:%.*]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - ret <32 x i16> %1 -} - -define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP2]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: ret <32 x i16> [[TMP1]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer) - ret <32 x i16> %1 -} - -define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP3]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i16> [[TMP1]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - ret <32 x i16> %1 -} - -define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP3]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_hi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i16> [[TMP1]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - ret <32 x i16> %1 -} - -define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_hi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i16> [[TMP3]] -; - %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru - ret <32 x i16> %3 -} - -declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>) - -define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_qi_128( -; CHECK-NEXT: ret <16 x i8> [[A0:%.*]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - ret <16 x i8> %1 -} - -define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP2]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: ret <16 x i8> [[TMP1]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer) - ret <16 x i8> %1 -} - -define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP3]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i8> [[TMP1]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - ret <16 x i8> %1 -} - -define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP3]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_qi_128( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: ret <16 x i8> [[TMP1]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - ret <16 x i8> %1 -} - -define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_qi_128_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <16 x i8> [[TMP3]] -; - %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru - ret <16 x i8> %3 -} - -declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>) - -define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_qi_256( -; CHECK-NEXT: ret <32 x i8> [[A0:%.*]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - ret <32 x i8> %1 -} - -define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP2]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: ret <32 x i8> [[TMP1]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer) - ret <32 x i8> %1 -} - -define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP3]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i8> [[TMP1]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - ret <32 x i8> %1 -} - -define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP3]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_qi_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: ret <32 x i8> [[TMP1]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - ret <32 x i8> %1 -} - -define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_qi_256_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <32 x i8> [[TMP3]] -; - %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru - ret <32 x i8> %3 -} - -declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>) - -define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @identity_test_permvar_qi_512( -; CHECK-NEXT: ret <64 x i8> [[A0:%.*]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - ret <64 x i8> %1 -} - -define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @identity_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP2]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - -define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @zero_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer -; CHECK-NEXT: ret <64 x i8> [[TMP1]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer) - ret <64 x i8> %1 -} - -define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @zero_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP3]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - -define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: ret <64 x i8> [[TMP1]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - ret <64 x i8> %1 -} - -define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP3]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - -define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { -; -; CHECK-LABEL: @undef_test_permvar_qi_512( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: ret <64 x i8> [[TMP1]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - ret <64 x i8> %1 -} - -define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { -; -; CHECK-LABEL: @undef_test_permvar_qi_512_mask( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] -; CHECK-NEXT: ret <64 x i8> [[TMP3]] -; - %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) - %2 = bitcast i64 %mask to <64 x i1> - %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru - ret <64 x i8> %3 -} - declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32) define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) { diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll b/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll new file mode 100644 index 00000000000000..6519e4f5348484 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/X86/x86-vperm.ll @@ -0,0 +1,1404 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s + +declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) + +define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @identity_test_permvar_si_256( +; CHECK-NEXT: ret <8 x i32> [[A0:%.*]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i32> [[TMP2]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru + ret <8 x i32> %3 +} + +define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @zero_test_permvar_si_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru + ret <8 x i32> %3 +} + +define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_si_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru + ret <8 x i32> %3 +} + +define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) { +; CHECK-LABEL: @undef_test_permvar_si_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_si_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i32> [[TMP3]] +; + %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru + ret <8 x i32> %3 +} + +define <8 x i32> @demandedbit_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_si_256_mask( +; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[M]]) +; CHECK-NEXT: ret <8 x i32> [[S]] +; + %m = or <8 x i32> %a1, + %s = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %m) + ret <8 x i32> %s +} + +declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) + +define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @identity_test_permvar_sf_256( +; CHECK-NEXT: ret <8 x float> [[A0:%.*]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + ret <8 x float> %1 +} + +define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x float> [[TMP2]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru + ret <8 x float> %3 +} + +define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @zero_test_permvar_sf_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) + ret <8 x float> %1 +} + +define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru + ret <8 x float> %3 +} + +define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_sf_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + ret <8 x float> %1 +} + +define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru + ret <8 x float> %3 +} + +define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) { +; CHECK-LABEL: @undef_test_permvar_sf_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP1]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + ret <8 x float> %1 +} + +define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_sf_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x float> [[TMP3]] +; + %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru + ret <8 x float> %3 +} + +define <8 x float> @demandedbit_test_permvar_sf_256_mask(<8 x float> %a0, <8 x i32> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_sf_256_mask( +; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[M]]) +; CHECK-NEXT: ret <8 x float> [[S]] +; + %m = or <8 x i32> %a1, + %s = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %m) + ret <8 x float> %s +} + +declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>) + +define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @identity_test_permvar_di_256( +; CHECK-NEXT: ret <4 x i64> [[A0:%.*]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x i64> [[TMP2]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru + ret <4 x i64> %3 +} + +define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @zero_test_permvar_di_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer) + ret <4 x i64> %1 +} + +define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x i64> [[TMP3]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru + ret <4 x i64> %3 +} + +define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_di_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x i64> [[TMP3]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru + ret <4 x i64> %3 +} + +define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) { +; CHECK-LABEL: @undef_test_permvar_di_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i64> [[TMP1]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_di_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x i64> [[TMP3]] +; + %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru + ret <4 x i64> %3 +} + +define <4 x i64> @demandedbits_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %a1) { +; CHECK-LABEL: @demandedbits_test_permvar_di_256_mask( +; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> [[A0:%.*]], <4 x i64> [[M]]) +; CHECK-NEXT: ret <4 x i64> [[S]] +; + %m = or <4 x i64> %a1, + %s = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> %m) + ret <4 x i64> %s +} + +declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>) + +define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @identity_test_permvar_df_256( +; CHECK-NEXT: ret <4 x double> [[A0:%.*]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) + ret <4 x double> %1 +} + +define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x double> [[TMP2]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru + ret <4 x double> %3 +} + +define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @zero_test_permvar_df_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer) + ret <4 x double> %1 +} + +define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru + ret <4 x double> %3 +} + +define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_df_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) + ret <4 x double> %1 +} + +define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru + ret <4 x double> %3 +} + +define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) { +; CHECK-LABEL: @undef_test_permvar_df_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[TMP1]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) + ret <4 x double> %1 +} + +define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_df_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <4 x double> [[TMP3]] +; + %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> + %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru + ret <4 x double> %3 +} + +define <4 x double> @demandedbits_test_permvar_df_256_mask(<4 x double> %a0, <4 x i64> %a1) { +; CHECK-LABEL: @demandedbits_test_permvar_df_256_mask( +; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> [[A0:%.*]], <4 x i64> [[M]]) +; CHECK-NEXT: ret <4 x double> [[S]] +; + %m = or <4 x i64> %a1, + %s = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> %m) + ret <4 x double> %s +} + +declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>) + +define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @identity_test_permvar_si_512( +; CHECK-NEXT: ret <16 x i32> [[A0:%.*]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) + ret <16 x i32> %1 +} + +define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i32> [[TMP2]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru + ret <16 x i32> %3 +} + +define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @zero_test_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer) + ret <16 x i32> %1 +} + +define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru + ret <16 x i32> %3 +} + +define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) + ret <16 x i32> %1 +} + +define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru + ret <16 x i32> %3 +} + +define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) { +; CHECK-LABEL: @undef_test_permvar_si_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[TMP1]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) + ret <16 x i32> %1 +} + +define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_si_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i32> [[TMP3]] +; + %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru + ret <16 x i32> %3 +} + +define <16 x i32> @demandedbit_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_si_512_mask( +; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[A0:%.*]], <16 x i32> [[M]]) +; CHECK-NEXT: ret <16 x i32> [[S]] +; + %m = or <16 x i32> %a1, + %s = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> %m) + ret <16 x i32> %s +} + +declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>) + +define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @identity_test_permvar_sf_512( +; CHECK-NEXT: ret <16 x float> [[A0:%.*]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) + ret <16 x float> %1 +} + +define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x float> [[TMP2]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @zero_test_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer) + ret <16 x float> %1 +} + +define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) + ret <16 x float> %1 +} + +define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) { +; CHECK-LABEL: @undef_test_permvar_sf_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP1]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) + ret <16 x float> %1 +} + +define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_sf_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x float> [[TMP3]] +; + %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru + ret <16 x float> %3 +} + +define <16 x float> @demandedbit_test_permvar_sf_512_mask(<16 x float> %a0, <16 x i32> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_sf_512_mask( +; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[A0:%.*]], <16 x i32> [[M]]) +; CHECK-NEXT: ret <16 x float> [[S]] +; + %m = or <16 x i32> %a1, + %s = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> %m) + ret <16 x float> %s +} + +declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>) + +define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @identity_test_permvar_di_512( +; CHECK-NEXT: ret <8 x i64> [[A0:%.*]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) + ret <8 x i64> %1 +} + +define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i64> [[TMP2]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru + ret <8 x i64> %3 +} + +define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @zero_test_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer) + ret <8 x i64> %1 +} + +define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru + ret <8 x i64> %3 +} + +define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) + ret <8 x i64> %1 +} + +define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru + ret <8 x i64> %3 +} + +define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) { +; CHECK-LABEL: @undef_test_permvar_di_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) + ret <8 x i64> %1 +} + +define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_di_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i64> [[TMP3]] +; + %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru + ret <8 x i64> %3 +} + +define <8 x i64> @demandedbit_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_di_512_mask( +; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[A0:%.*]], <8 x i64> [[M]]) +; CHECK-NEXT: ret <8 x i64> [[S]] +; + %m = or <8 x i64> %a1, + %s = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> %m) + ret <8 x i64> %s +} + +declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>) + +define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @identity_test_permvar_df_512( +; CHECK-NEXT: ret <8 x double> [[A0:%.*]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) + ret <8 x double> %1 +} + +define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x double> [[TMP2]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru + ret <8 x double> %3 +} + +define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @zero_test_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer) + ret <8 x double> %1 +} + +define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru + ret <8 x double> %3 +} + +define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) + ret <8 x double> %1 +} + +define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru + ret <8 x double> %3 +} + +define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) { +; CHECK-LABEL: @undef_test_permvar_df_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: ret <8 x double> [[TMP1]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) + ret <8 x double> %1 +} + +define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_df_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x double> [[TMP3]] +; + %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru + ret <8 x double> %3 +} + +define <8 x double> @demandedbit_test_permvar_df_512_mask(<8 x double> %a0, <8 x i64> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_df_512_mask( +; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[A0:%.*]], <8 x i64> [[M]]) +; CHECK-NEXT: ret <8 x double> [[S]] +; + %m = or <8 x i64> %a1, + %s = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> %m) + ret <8 x double> %s +} + +declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) + +define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @identity_test_permvar_hi_128( +; CHECK-NEXT: ret <8 x i16> [[A0:%.*]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @identity_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i16> [[TMP2]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru + ret <8 x i16> %3 +} + +define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @zero_test_permvar_hi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @zero_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru + ret <8 x i16> %3 +} + +define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_hi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru + ret <8 x i16> %3 +} + +define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) { +; CHECK-LABEL: @undef_test_permvar_hi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) { +; CHECK-LABEL: @undef_test_permvar_hi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <8 x i16> [[TMP3]] +; + %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> ) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru + ret <8 x i16> %3 +} + +define <8 x i16> @demandedbit_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_hi_128_mask( +; CHECK-NEXT: [[M:%.*]] = or <8 x i16> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> [[A0:%.*]], <8 x i16> [[M]]) +; CHECK-NEXT: ret <8 x i16> [[S]] +; + %m = or <8 x i16> %a1, + %s = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> %m) + ret <8 x i16> %s +} + +declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>) + +define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @identity_test_permvar_hi_256( +; CHECK-NEXT: ret <16 x i16> [[A0:%.*]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i16> [[TMP2]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru + ret <16 x i16> %3 +} + +define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @zero_test_permvar_hi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru + ret <16 x i16> %3 +} + +define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_hi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru + ret <16 x i16> %3 +} + +define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) { +; CHECK-LABEL: @undef_test_permvar_hi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_hi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i16> [[TMP3]] +; + %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru + ret <16 x i16> %3 +} + +define <16 x i16> @demandedbit_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_hi_256_mask( +; CHECK-NEXT: [[M:%.*]] = or <16 x i16> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> [[A0:%.*]], <16 x i16> [[M]]) +; CHECK-NEXT: ret <16 x i16> [[S]] +; + %m = or <16 x i16> %a1, + %s = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> %m) + ret <16 x i16> %s +} + +declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>) + +define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @identity_test_permvar_hi_512( +; CHECK-NEXT: ret <32 x i16> [[A0:%.*]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @identity_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i16> [[TMP2]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru + ret <32 x i16> %3 +} + +define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @zero_test_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer) + ret <32 x i16> %1 +} + +define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @zero_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru + ret <32 x i16> %3 +} + +define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru + ret <32 x i16> %3 +} + +define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) { +; CHECK-LABEL: @undef_test_permvar_hi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) + ret <32 x i16> %1 +} + +define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) { +; CHECK-LABEL: @undef_test_permvar_hi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i16> [[TMP3]] +; + %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> ) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru + ret <32 x i16> %3 +} + +define <32 x i16> @demandedbit_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_hi_512_mask( +; CHECK-NEXT: [[M:%.*]] = or <32 x i16> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> [[A0:%.*]], <32 x i16> [[M]]) +; CHECK-NEXT: ret <32 x i16> [[S]] +; + %m = or <32 x i16> %a1, + %s = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> %m) + ret <32 x i16> %s +} + +declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>) + +define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @identity_test_permvar_qi_128( +; CHECK-NEXT: ret <16 x i8> [[A0:%.*]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @identity_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru + ret <16 x i8> %3 +} + +define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @zero_test_permvar_qi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer) + ret <16 x i8> %1 +} + +define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @zero_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru + ret <16 x i8> %3 +} + +define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_qi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru + ret <16 x i8> %3 +} + +define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) { +; CHECK-LABEL: @undef_test_permvar_qi_128( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP1]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) + ret <16 x i8> %1 +} + +define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) { +; CHECK-LABEL: @undef_test_permvar_qi_128_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <16 x i8> [[TMP3]] +; + %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> ) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru + ret <16 x i8> %3 +} + +define <16 x i8> @demandedbit_test_permvar_qi_129_mask(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_qi_129_mask( +; CHECK-NEXT: [[M:%.*]] = or <16 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> [[A0:%.*]], <16 x i8> [[M]]) +; CHECK-NEXT: ret <16 x i8> [[S]] +; + %m = or <16 x i8> %a1, + %s = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> %m) + ret <16 x i8> %s +} + +declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>) + +define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @identity_test_permvar_qi_256( +; CHECK-NEXT: ret <32 x i8> [[A0:%.*]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @identity_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i8> [[TMP2]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru + ret <32 x i8> %3 +} + +define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @zero_test_permvar_qi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer) + ret <32 x i8> %1 +} + +define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @zero_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru + ret <32 x i8> %3 +} + +define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_qi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru + ret <32 x i8> %3 +} + +define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) { +; CHECK-LABEL: @undef_test_permvar_qi_256( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) + ret <32 x i8> %1 +} + +define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) { +; CHECK-LABEL: @undef_test_permvar_qi_256_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <32 x i8> [[TMP3]] +; + %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> ) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru + ret <32 x i8> %3 +} + +define <32 x i8> @demandedbit_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_qi_256_mask( +; CHECK-NEXT: [[M:%.*]] = or <32 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> [[A0:%.*]], <32 x i8> [[M]]) +; CHECK-NEXT: ret <32 x i8> [[S]] +; + %m = or <32 x i8> %a1, + %s = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> %m) + ret <32 x i8> %s +} + +declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>) + +define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @identity_test_permvar_qi_512( +; CHECK-NEXT: ret <64 x i8> [[A0:%.*]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @identity_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <64 x i8> [[TMP2]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru + ret <64 x i8> %3 +} + +define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @zero_test_permvar_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer) + ret <64 x i8> %1 +} + +define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @zero_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru + ret <64 x i8> %3 +} + +define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @shuffle_test_permvar_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru + ret <64 x i8> %3 +} + +define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) { +; CHECK-LABEL: @undef_test_permvar_qi_512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) { +; CHECK-LABEL: @undef_test_permvar_qi_512_mask( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]] +; CHECK-NEXT: ret <64 x i8> [[TMP3]] +; + %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> ) + %2 = bitcast i64 %mask to <64 x i1> + %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru + ret <64 x i8> %3 +} + +define <64 x i8> @demandedbit_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %a1) { +; CHECK-LABEL: @demandedbit_test_permvar_qi_512_mask( +; CHECK-NEXT: [[M:%.*]] = or <64 x i8> [[A1:%.*]], +; CHECK-NEXT: [[S:%.*]] = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> [[A0:%.*]], <64 x i8> [[M]]) +; CHECK-NEXT: ret <64 x i8> [[S]] +; + %m = or <64 x i8> %a1, + %s = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> %m) + ret <64 x i8> %s +} diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll index a65358e1033cc6..eb6ad4458d932e 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermi2.ll @@ -25,6 +25,30 @@ define <2 x i64> @shuffle_vpermv3_v2i64_unary(<2 x i64> %x0) { ret <2 x i64> %r } +define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %m) { +; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits( +; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]]) +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %t = or <2 x i64> %m, + %r = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %t, <2 x i64> %x1) + ret <2 x i64> %r +} + +define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits_negative(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %m) { +; CHECK-LABEL: define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits_negative( +; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]]) +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %t = or <2 x i64> %m, + %r = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %t, <2 x i64> %x1) + ret <2 x i64> %r +} + define <4 x i64> @shuffle_vpermv3_v4i64(<4 x i64> %x0, <4 x i64> %x1) { ; CHECK-LABEL: define <4 x i64> @shuffle_vpermv3_v4i64( ; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]]) { @@ -45,6 +69,18 @@ define <4 x i64> @shuffle_vpermv3_v4i64_unary(<4 x i64> %x0) { ret <4 x i64> %r } +define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %m) { +; CHECK-LABEL: define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits( +; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <4 x i64> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[T]], <4 x i64> [[X1]]) +; CHECK-NEXT: ret <4 x i64> [[R]] +; + %t = or <4 x i64> %m, + %r = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %t, <4 x i64> %x1) + ret <4 x i64> %r +} + define <8 x i64> @shuffle_vpermv3_v8i64(<8 x i64> %x0, <8 x i64> %x1) { ; CHECK-LABEL: define <8 x i64> @shuffle_vpermv3_v8i64( ; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]]) { @@ -65,6 +101,18 @@ define <8 x i64> @shuffle_vpermv3_v8i64_unary(<8 x i64> %x0) { ret <8 x i64> %r } +define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %m) { +; CHECK-LABEL: define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits( +; CHECK-SAME: <8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <8 x i64> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> [[T]], <8 x i64> [[X1]]) +; CHECK-NEXT: ret <8 x i64> [[R]] +; + %t = or <8 x i64> %m, + %r = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %t, <8 x i64> %x1) + ret <8 x i64> %r +} + ; ; vXi32 ; @@ -89,6 +137,18 @@ define <4 x i32> @shuffle_vpermv3_v4i32_unary(<4 x i32> %x0) { ret <4 x i32> %r } +define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %m) { +; CHECK-LABEL: define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits( +; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <4 x i32> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[T]], <4 x i32> [[X1]]) +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %t = or <4 x i32> %m, + %r = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %t, <4 x i32> %x1) + ret <4 x i32> %r +} + define <8 x i32> @shuffle_vpermv3_v8i32(<8 x i32> %x0, <8 x i32> %x1) { ; CHECK-LABEL: define <8 x i32> @shuffle_vpermv3_v8i32( ; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]]) { @@ -109,6 +169,18 @@ define <8 x i32> @shuffle_vpermv3_v8i32_unary(<8 x i32> %x0) { ret <8 x i32> %r } +define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %m) { +; CHECK-LABEL: define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits( +; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <8 x i32> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[T]], <8 x i32> [[X1]]) +; CHECK-NEXT: ret <8 x i32> [[R]] +; + %t = or <8 x i32> %m, + %r = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %t, <8 x i32> %x1) + ret <8 x i32> %r +} + define <16 x i32> @shuffle_vpermv3_v16i32(<16 x i32> %x0, <16 x i32> %x1) { ; CHECK-LABEL: define <16 x i32> @shuffle_vpermv3_v16i32( ; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]]) { @@ -129,6 +201,18 @@ define <16 x i32> @shuffle_vpermv3_v16i32_unary(<16 x i32> %x0) { ret <16 x i32> %r } +define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %m) { +; CHECK-LABEL: define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits( +; CHECK-SAME: <16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <16 x i32> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> [[T]], <16 x i32> [[X1]]) +; CHECK-NEXT: ret <16 x i32> [[R]] +; + %t = or <16 x i32> %m, + %r = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %t, <16 x i32> %x1) + ret <16 x i32> %r +} + ; ; vXi16 ; @@ -153,6 +237,18 @@ define <8 x i16> @shuffle_vpermv3_v8i16_unary(<8 x i16> %x0) { ret <8 x i16> %r } +define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %m) { +; CHECK-LABEL: define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits( +; CHECK-SAME: <8 x i16> [[X0:%.*]], <8 x i16> [[X1:%.*]], <8 x i16> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <8 x i16> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> [[T]], <8 x i16> [[X1]]) +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %t = or <8 x i16> %m, + %r = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %t, <8 x i16> %x1) + ret <8 x i16> %r +} + define <16 x i16> @shuffle_vpermv3_v16i16(<16 x i16> %x0, <16 x i16> %x1) { ; CHECK-LABEL: define <16 x i16> @shuffle_vpermv3_v16i16( ; CHECK-SAME: <16 x i16> [[X0:%.*]], <16 x i16> [[X1:%.*]]) { @@ -173,6 +269,18 @@ define <16 x i16> @shuffle_vpermv3_v16i16_unary(<16 x i16> %x0) { ret <16 x i16> %r } +define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %m) { +; CHECK-LABEL: define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits( +; CHECK-SAME: <16 x i16> [[X0:%.*]], <16 x i16> [[X1:%.*]], <16 x i16> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <16 x i16> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> [[T]], <16 x i16> [[X1]]) +; CHECK-NEXT: ret <16 x i16> [[R]] +; + %t = or <16 x i16> %m, + %r = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %t, <16 x i16> %x1) + ret <16 x i16> %r +} + define <32 x i16> @shuffle_vpermv3_v32i16(<32 x i16> %x0, <32 x i16> %x1) { ; CHECK-LABEL: define <32 x i16> @shuffle_vpermv3_v32i16( ; CHECK-SAME: <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]]) { @@ -193,6 +301,18 @@ define <32 x i16> @shuffle_vpermv3_v32i16_unary(<32 x i16> %x0) { ret <32 x i16> %r } +define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %m) { +; CHECK-LABEL: define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits( +; CHECK-SAME: <32 x i16> [[X0:%.*]], <32 x i16> [[X1:%.*]], <32 x i16> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <32 x i16> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> [[T]], <32 x i16> [[X1]]) +; CHECK-NEXT: ret <32 x i16> [[R]] +; + %t = or <32 x i16> %m, + %r = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %t, <32 x i16> %x1) + ret <32 x i16> %r +} + ; ; vXi8 ; @@ -217,6 +337,18 @@ define <16 x i8> @shuffle_vpermv3_v16i8_unary(<16 x i8> %x0) { ret <16 x i8> %r } +define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %m) { +; CHECK-LABEL: define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits( +; CHECK-SAME: <16 x i8> [[X0:%.*]], <16 x i8> [[X1:%.*]], <16 x i8> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <16 x i8> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> [[T]], <16 x i8> [[X1]]) +; CHECK-NEXT: ret <16 x i8> [[R]] +; + %t = or <16 x i8> %m, + %r = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %t, <16 x i8> %x1) + ret <16 x i8> %r +} + define <32 x i8> @shuffle_vpermv3_v32i8(<32 x i8> %x0, <32 x i8> %x1) { ; CHECK-LABEL: define <32 x i8> @shuffle_vpermv3_v32i8( ; CHECK-SAME: <32 x i8> [[X0:%.*]], <32 x i8> [[X1:%.*]]) { @@ -237,6 +369,18 @@ define <32 x i8> @shuffle_vpermv3_v32i8_unary(<32 x i8> %x0) { ret <32 x i8> %r } +define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %m) { +; CHECK-LABEL: define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits( +; CHECK-SAME: <32 x i8> [[X0:%.*]], <32 x i8> [[X1:%.*]], <32 x i8> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <32 x i8> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> [[T]], <32 x i8> [[X1]]) +; CHECK-NEXT: ret <32 x i8> [[R]] +; + %t = or <32 x i8> %m, + %r = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %t, <32 x i8> %x1) + ret <32 x i8> %r +} + define <64 x i8> @shuffle_vpermv3_v64i8(<64 x i8> %x0, <64 x i8> %x1) { ; CHECK-LABEL: define <64 x i8> @shuffle_vpermv3_v64i8( ; CHECK-SAME: <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]]) { @@ -256,3 +400,15 @@ define <64 x i8> @shuffle_vpermv3_v64i8_unary(<64 x i8> %x0) { %r = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> , <64 x i8> %x0) ret <64 x i8> %r } + +define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %m) { +; CHECK-LABEL: define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits( +; CHECK-SAME: <64 x i8> [[X0:%.*]], <64 x i8> [[X1:%.*]], <64 x i8> [[M:%.*]]) { +; CHECK-NEXT: [[T:%.*]] = or <64 x i8> [[M]], +; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> [[T]], <64 x i8> [[X1]]) +; CHECK-NEXT: ret <64 x i8> [[R]] +; + %t = or <64 x i8> %m, + %r = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> %x0, <64 x i8> %t, <64 x i8> %x1) + ret <64 x i8> %r +} diff --git a/llvm/test/Transforms/LICM/hoist-add-sub.ll b/llvm/test/Transforms/LICM/hoist-add-sub.ll index 5393cdb1d29c43..d9b868eda579f9 100644 --- a/llvm/test/Transforms/LICM/hoist-add-sub.ll +++ b/llvm/test/Transforms/LICM/hoist-add-sub.ll @@ -51,6 +51,55 @@ out_of_bounds: ret i32 -1 } +define i32 @test_01_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_01_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nuw i32 [[X]], 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_OP]] +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; +entry: + %x = load i32, ptr %x_p, !range !2 + %length = load i32, ptr %length_p, !range !1 + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = sub nuw i32 %x, %iv + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 +} + ; TODO: x - iv < 4 ==> iv > x - 4 define i32 @test_01a(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_01a @@ -114,6 +163,68 @@ failed: ret i32 -2 } +define i32 @test_01a_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_01a_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; CHECK-NEXT: [[PRECOND_1:%.*]] = icmp uge i32 [[X]], 0 +; CHECK-NEXT: [[PRECOND_2:%.*]] = icmp uge i32 [[LENGTH]], 0 +; CHECK-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[ARITH:%.*]] = sub nuw i32 [[X]], [[IV]] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4 +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; CHECK: failed: +; CHECK-NEXT: ret i32 -2 +; +entry: + %x = load i32, ptr %x_p + %length = load i32, ptr %length_p + %precond_1 = icmp uge i32 %x, 0 + %precond_2 = icmp uge i32 %length, 0 + %precond = and i1 %precond_1, %precond_2 + br i1 %precond, label %loop, label %failed + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = sub nuw i32 %x, %iv + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 + +failed: + ret i32 -2 +} + ; Range info is missing for x, cannot prove no-overflow. Should not hoist. define i32 @test_01_neg(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_01_neg @@ -164,6 +275,54 @@ out_of_bounds: ret i32 -1 } +define i32 @test_01_neg_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_01_neg_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[ARITH:%.*]] = sub nuw i32 [[X]], [[IV]] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4 +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; +entry: + %x = load i32, ptr %x_p + %length = load i32, ptr %length_p, !range !0 + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = sub nuw i32 %x, %iv + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 +} ; x + iv < 4 ==> iv < 4 - x define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) { @@ -215,6 +374,55 @@ out_of_bounds: ret i32 -1 } +define i32 @test_02_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_02_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nuw i32 4, [[X]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_OP]] +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; +entry: + %x = load i32, ptr %x_p, !range !3 + %length = load i32, ptr %length_p, !range !1 + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = add nuw i32 %x, %iv + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 +} + ; TODO: x + iv < 4 ==> iv < 4 - x define i32 @test_02a(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_02a @@ -278,12 +486,74 @@ failed: ret i32 -2 } +define i32 @test_02a_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_02a_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; CHECK-NEXT: [[PRECOND_1:%.*]] = icmp uge i32 [[X]], 0 +; CHECK-NEXT: [[PRECOND_2:%.*]] = icmp uge i32 [[LENGTH]], 0 +; CHECK-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[ARITH:%.*]] = add nuw i32 [[X]], [[IV]] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4 +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; CHECK: failed: +; CHECK-NEXT: ret i32 -2 +; +entry: + %x = load i32, ptr %x_p + %length = load i32, ptr %length_p + %precond_1 = icmp uge i32 %x, 0 + %precond_2 = icmp uge i32 %length, 0 + %precond = and i1 %precond_1, %precond_2 + br i1 %precond, label %loop, label %failed + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = add nuw i32 %x, %iv + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 + +failed: + ret i32 -2 +} + ; iv - x < 4 ==> iv < 4 + x define i32 @test_03(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_03 ; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG2]] ; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]] ; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nsw i32 [[X]], 4 ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -328,6 +598,55 @@ out_of_bounds: ret i32 -1 } +define i32 @test_03_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_03_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG2]] +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = add nuw i32 [[X]], 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_OP]] +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; +entry: + %x = load i32, ptr %x_p, !range !1 + %length = load i32, ptr %length_p, !range !1 + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = sub nuw i32 %iv, %x + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 +} + ; TODO: iv - x < 4 ==> iv < 4 + x define i32 @test_03a(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_03a @@ -391,6 +710,68 @@ failed: ret i32 -2 } +define i32 @test_03a_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_03a_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; CHECK-NEXT: [[PRECOND_1:%.*]] = icmp ult i32 [[X]], 2147483640 +; CHECK-NEXT: [[PRECOND_2:%.*]] = icmp uge i32 [[LENGTH]], 0 +; CHECK-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[ARITH:%.*]] = sub nuw i32 [[IV]], [[X]] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4 +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; CHECK: failed: +; CHECK-NEXT: ret i32 -2 +; +entry: + %x = load i32, ptr %x_p + %length = load i32, ptr %length_p + %precond_1 = icmp ult i32 %x, 2147483640 + %precond_2 = icmp uge i32 %length, 0 + %precond = and i1 %precond_1, %precond_2 + br i1 %precond, label %loop, label %failed + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = sub nuw i32 %iv, %x + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 + +failed: + ret i32 -2 +} + ; iv + x < 4 ==> iv < 4 - x define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_04 @@ -441,6 +822,55 @@ out_of_bounds: ret i32 -1 } +define i32 @test_04_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_04_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG3]] +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG2]] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nuw i32 4, [[X]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_OP]] +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; +entry: + %x = load i32, ptr %x_p, !range !3 + %length = load i32, ptr %length_p, !range !1 + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = add nuw i32 %iv, %x + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 +} + ; TODO: iv + x < 4 ==> iv < 4 - x define i32 @test_04a(ptr %p, ptr %x_p, ptr %length_p) { ; CHECK-LABEL: define i32 @test_04a @@ -504,5 +934,69 @@ failed: ret i32 -2 } +define i32 @test_04a_unsigned(ptr %p, ptr %x_p, ptr %length_p) { +; CHECK-LABEL: define i32 @test_04a_unsigned +; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4 +; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4 +; CHECK-NEXT: [[PRECOND_1:%.*]] = icmp sge i32 [[X]], 0 +; CHECK-NEXT: [[PRECOND_2:%.*]] = icmp sge i32 [[LENGTH]], 0 +; CHECK-NEXT: [[PRECOND:%.*]] = and i1 [[PRECOND_1]], [[PRECOND_2]] +; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[FAILED:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[ARITH:%.*]] = add nuw i32 [[IV]], [[X]] +; CHECK-NEXT: [[X_CHECK:%.*]] = icmp ult i32 [[ARITH]], 4 +; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]] +; CHECK: backedge: +; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: store i32 1, ptr [[EL_PTR]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 4 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], [[LENGTH]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[IV_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_NEXT]], [[BACKEDGE]] ] +; CHECK-NEXT: ret i32 [[IV_NEXT_LCSSA]] +; CHECK: out_of_bounds: +; CHECK-NEXT: ret i32 -1 +; CHECK: failed: +; CHECK-NEXT: ret i32 -2 +; +entry: + %x = load i32, ptr %x_p + %length = load i32, ptr %length_p + %precond_1 = icmp sge i32 %x, 0 + %precond_2 = icmp sge i32 %length, 0 + %precond = and i1 %precond_1, %precond_2 + br i1 %precond, label %loop, label %failed + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %arith = add nuw i32 %iv, %x + %x_check = icmp ult i32 %arith, 4 + br i1 %x_check, label %out_of_bounds, label %backedge + +backedge: + %el.ptr = getelementptr i32, ptr %p, i32 %iv + store i32 1, ptr %el.ptr + %iv.next = add nuw nsw i32 %iv, 4 + %loop_cond = icmp ult i32 %iv.next, %length + br i1 %loop_cond, label %loop, label %exit + +exit: + ret i32 %iv.next + +out_of_bounds: + ret i32 -1 + +failed: + ret i32 -2 +} + !0 = !{i32 0, i32 2147483648} !1 = !{i32 0, i32 2147483640} +!2 = !{i32 256, i32 32768} +!3 = !{i32 0, i32 2} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index f467f3cf262d2f..93034f4dbe56ec 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -215,16 +215,14 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFCOMMON-NEXT: [[TMP6:%.*]] = icmp ugt [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i64 50, i64 0), poison, zeroinitializer) ; TFCOMMON-NEXT: [[TMP7:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP6]], zeroinitializer ; TFCOMMON-NEXT: [[TMP8:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP7]]) -; TFCOMMON-NEXT: [[TMP9:%.*]] = xor [[TMP6]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], zeroinitializer, [[TMP8]] -; TFCOMMON-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP11]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], zeroinitializer +; TFCOMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFCOMMON-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP9]], i32 8, [[ACTIVE_LANE_MASK]]) ; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFCOMMON-NEXT: [[TMP12:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFCOMMON-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; TFCOMMON-NEXT: br i1 [[TMP13]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFCOMMON-NEXT: [[TMP10:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TFCOMMON-NEXT: [[TMP11:%.*]] = extractelement [[TMP10]], i32 0 +; TFCOMMON-NEXT: br i1 [[TMP11]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFCOMMON: for.cond.cleanup: ; TFCOMMON-NEXT: ret void ; @@ -259,27 +257,23 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP12]], zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP15:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD]], [[TMP13]]) ; TFA_INTERLEAVE-NEXT: [[TMP16:%.*]] = call @foo_vector( [[WIDE_MASKED_LOAD3]], [[TMP14]]) -; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = xor [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = select [[ACTIVE_LANE_MASK2]], [[TMP18]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP19]], zeroinitializer, [[TMP15]] -; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP20]], zeroinitializer, [[TMP16]] -; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP23]] -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP21]], i32 8, [[ACTIVE_LANE_MASK]]) -; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP24]], i32 8, [[ACTIVE_LANE_MASK2]]) +; TFA_INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[TMP15]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[PREDPHI4:%.*]] = select [[TMP14]], [[TMP16]], zeroinitializer +; TFA_INTERLEAVE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]] +; TFA_INTERLEAVE-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP19]] +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP17]], i32 8, [[ACTIVE_LANE_MASK]]) +; TFA_INTERLEAVE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI4]], ptr [[TMP20]], i32 8, [[ACTIVE_LANE_MASK2]]) ; TFA_INTERLEAVE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 2 -; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX_NEXT]], [[TMP26]] +; TFA_INTERLEAVE-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 2 +; TFA_INTERLEAVE-NEXT: [[TMP23:%.*]] = add i64 [[INDEX_NEXT]], [[TMP22]] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP27]], i64 1025) -; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement [[TMP28]], i32 0 -; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT5]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP23]], i64 1025) +; TFA_INTERLEAVE-NEXT: [[TMP24:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement [[TMP24]], i32 0 +; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFA_INTERLEAVE: for.cond.cleanup: ; TFA_INTERLEAVE-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll index f922873210b052..66d001498e457b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll @@ -1216,7 +1216,7 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF: vector.body: ; CHECK-ORDERED-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 ; CHECK-ORDERED-TF-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0 @@ -1226,41 +1226,39 @@ define float @fadd_conditional(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-ORDERED-TF-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[A]], i64 [[TMP10]] ; CHECK-ORDERED-TF-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP15]], i32 0 ; CHECK-ORDERED-TF-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr [[TMP16]], i32 4, [[TMP14]], poison) -; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP18:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP17]], zeroinitializer -; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP18]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer), [[WIDE_MASKED_LOAD1]] -; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP20]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP19]]) +; CHECK-ORDERED-TF-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], [[WIDE_MASKED_LOAD1]], shufflevector ( insertelement ( poison, float 3.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP17:%.*]] = select [[ACTIVE_LANE_MASK]], [[PREDPHI]], shufflevector ( insertelement ( poison, float -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP18]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], [[TMP17]]) ; CHECK-ORDERED-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] ; CHECK-ORDERED-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]]) -; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = extractelement [[TMP21]], i32 0 -; CHECK-ORDERED-TF-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-ORDERED-TF-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-ORDERED-TF-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 +; CHECK-ORDERED-TF-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-ORDERED-TF: middle.block: ; CHECK-ORDERED-TF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK-ORDERED-TF: scalar.ph: ; CHECK-ORDERED-TF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-ORDERED-TF-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] ; CHECK-ORDERED-TF-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-ORDERED-TF: for.body: ; CHECK-ORDERED-TF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; CHECK-ORDERED-TF-NEXT: [[RES:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[FADD:%.*]], [[FOR_INC]] ] ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP23]], 0.000000e+00 +; CHECK-ORDERED-TF-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP21]], 0.000000e+00 ; CHECK-ORDERED-TF-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-ORDERED-TF: if.then: ; CHECK-ORDERED-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] -; CHECK-ORDERED-TF-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-ORDERED-TF-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 ; CHECK-ORDERED-TF-NEXT: br label [[FOR_INC]] ; CHECK-ORDERED-TF: for.inc: -; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP24]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] +; CHECK-ORDERED-TF-NEXT: [[PHI:%.*]] = phi float [ [[TMP22]], [[IF_THEN]] ], [ 3.000000e+00, [[FOR_BODY]] ] ; CHECK-ORDERED-TF-NEXT: [[FADD]] = fadd float [[RES]], [[PHI]] ; CHECK-ORDERED-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-ORDERED-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-ORDERED-TF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-ORDERED-TF: for.end: -; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ] +; CHECK-ORDERED-TF-NEXT: [[RDX:%.*]] = phi float [ [[FADD]], [[FOR_INC]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] ; CHECK-ORDERED-TF-NEXT: ret float [[RDX]] ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 9641dd7d21fd2a..852a967e764819 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1521,10 +1521,10 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV2:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[I1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 +; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP33]] to i32 ; CHECK-NEXT: [[I1]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[IV1:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[IV2]] = add nuw nsw i64 [[IV]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index f6a6d021f03c9f..6fa1e7fbbac602 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -467,16 +467,15 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK-NEXT: [[TMP14:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], i32 4, [[TMP15]], poison) -; CHECK-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP13]], zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP16]], zeroinitializer, [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP18]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP15]], [[WIDE_MASKED_GATHER]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[PREDPHI]], ptr [[TMP17]], i32 4, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]] ; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) -; CHECK-NEXT: [[TMP19:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[TMP19]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP19:%.*]] = extractelement [[TMP18]], i32 0 +; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -485,14 +484,14 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr ; CHECK: for.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[IF_END:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[COND]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP21]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP20]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[SRC]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[SRC]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[VAL_0:%.*]] = phi i32 [ [[TMP22]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] +; CHECK-NEXT: [[VAL_0:%.*]] = phi i32 [ [[TMP21]], [[IF_THEN]] ], [ 0, [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: store i32 [[VAL_0]], ptr [[ARRAYIDX1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll index aafe849b7042ab..e3af831f83c970 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/widened-massv-vfabi-attr.ll @@ -7,19 +7,19 @@ define dso_local double @test(ptr %Arr) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[ARR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double> -; CHECK-NEXT: [[TMP4:%.*]] = call fast <2 x double> @__sind2_P8(<2 x double> [[TMP3]]) -; CHECK-NEXT: [[TMP5]] = fadd fast <2 x double> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double> +; CHECK-NEXT: [[TMP3:%.*]] = call fast <2 x double> @__sind2_P8(<2 x double> [[TMP2]]) +; CHECK-NEXT: [[TMP4]] = fadd fast <2 x double> [[VEC_PHI]], [[TMP3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP5]]) -; CHECK-NEXT: ret double [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP4]]) +; CHECK-NEXT: ret double [[TMP6]] ; entry: br label %for.cond diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll index 1db718a0e42f9f..3e2f290a497db1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll @@ -152,6 +152,77 @@ exit: ret void } +; Test case for https://github.com/llvm/llvm-project/issues/106641. +define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 { +; CHECK-LABEL: define void @truncate_to_i1_used_by_branch( +; CHECK-SAME: i8 [[X:%.*]], ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1> +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[VEC_IV]], i32 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 [[TMP1]], i32 2) +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> , [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer +; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[F_039:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = or i8 23, [[X]] +; CHECK-NEXT: [[EXTRACT_T:%.*]] = trunc i8 [[TMP4]] to i1 +; CHECK-NEXT: br i1 [[EXTRACT_T]], label %[[THEN:.*]], label %[[LOOP_LATCH]] +; CHECK: [[THEN]]: +; CHECK-NEXT: store i8 0, ptr [[DST]], align 1 +; CHECK-NEXT: br label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[ADD]] = add i8 [[F_039]], 1 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[F_039]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], 1 +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %f.039 = phi i8 [ 0, %entry ], [ %add, %loop.latch ] + %0 = or i8 23, %x + %extract.t = trunc i8 %0 to i1 + br i1 %extract.t, label %then, label %loop.latch + +then: + store i8 0, ptr %dst, align 1 + br label %loop.latch + +loop.latch: + %add = add i8 %f.039, 1 + %conv = sext i8 %f.039 to i32 + %cmp = icmp slt i32 %conv, 1 + br i1 %cmp, label %loop.header, label %exit + +exit: + ret void +} + +attributes #0 = { "target-features"="+64bit,+v,+zvl256b" } + ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -159,4 +230,6 @@ exit: ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll index 04289d43f40e2f..c051e2f18380bd 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll @@ -414,6 +414,7 @@ for.end: define void @acos_f32_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @acos_f32_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]]) ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]]) ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]]) @@ -487,7 +488,10 @@ for.end: define void @asin_f64_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @asin_f64_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -510,6 +514,7 @@ for.end: define void @asin_f32_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @asin_f32_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]]) ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]]) ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]]) @@ -588,6 +593,7 @@ define void @atan_f64_intrinsic(ptr nocapture %varray) { ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]]) ; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -610,6 +616,7 @@ for.end: define void @atan_f32_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @atan_f32_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]]) ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]]) ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]]) @@ -683,6 +690,9 @@ for.end: define void @cosh_f64_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @cosh_f64_intrinsic( ; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]]) +; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -705,8 +715,10 @@ for.end: define void @cosh_f32_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @cosh_f32_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]]) ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -754,6 +766,7 @@ for.end: define void @tanh_f32_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @tanh_f32_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]]) ; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]]) ; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]]) ; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 7752af558f7d61..7cf4070f76d76e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -84,20 +84,20 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], ; SSE-NEXT: [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], -; SSE-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] -; SSE-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]] -; SSE-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP10]], <2 x double> [[VEC_PHI]] -; SSE-NEXT: [[PREDPHI3]] = select <2 x i1> [[TMP7]], <2 x double> [[TMP11]], <2 x double> [[VEC_PHI1]] +; SSE-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] +; SSE-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]] +; SSE-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP8]], <2 x double> [[VEC_PHI]] +; SSE-NEXT: [[PREDPHI3]] = select <2 x i1> [[TMP7]], <2 x double> [[TMP9]], <2 x double> [[VEC_PHI1]] ; SSE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; SSE-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; SSE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SSE-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; SSE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SSE: middle.block: ; SSE-NEXT: [[BIN_RDX:%.*]] = fadd fast <2 x double> [[PREDPHI3]], [[PREDPHI]] -; SSE-NEXT: [[TMP13:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[BIN_RDX]]) +; SSE-NEXT: [[TMP11:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[BIN_RDX]]) ; SSE-NEXT: br i1 true, label [[DONE:%.*]], label [[SCALAR_PH]] ; SSE: scalar.ph: ; SSE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SSE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; SSE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; SSE-NEXT: br label [[LOOP:%.*]] ; SSE: loop: ; SSE-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] @@ -117,7 +117,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 ; SSE-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop [[LOOP3:![0-9]+]] ; SSE: done: -; SSE-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] +; SSE-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; SSE-NEXT: ret double [[TOT_NEXT_LCSSA]] ; ; AVX-LABEL: @sumIfVector( @@ -151,26 +151,26 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], ; AVX-NEXT: [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], ; AVX-NEXT: [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], -; AVX-NEXT: [[TMP20:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] -; AVX-NEXT: [[TMP21:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]] -; AVX-NEXT: [[TMP22:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]] -; AVX-NEXT: [[TMP23:%.*]] = fadd fast <4 x double> [[VEC_PHI3]], [[WIDE_LOAD6]] -; AVX-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP20]], <4 x double> [[VEC_PHI]] -; AVX-NEXT: [[PREDPHI7]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP21]], <4 x double> [[VEC_PHI1]] -; AVX-NEXT: [[PREDPHI8]] = select <4 x i1> [[TMP14]], <4 x double> [[TMP22]], <4 x double> [[VEC_PHI2]] -; AVX-NEXT: [[PREDPHI9]] = select <4 x i1> [[TMP15]], <4 x double> [[TMP23]], <4 x double> [[VEC_PHI3]] +; AVX-NEXT: [[TMP16:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] +; AVX-NEXT: [[TMP17:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]] +; AVX-NEXT: [[TMP18:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]] +; AVX-NEXT: [[TMP19:%.*]] = fadd fast <4 x double> [[VEC_PHI3]], [[WIDE_LOAD6]] +; AVX-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP16]], <4 x double> [[VEC_PHI]] +; AVX-NEXT: [[PREDPHI7]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP17]], <4 x double> [[VEC_PHI1]] +; AVX-NEXT: [[PREDPHI8]] = select <4 x i1> [[TMP14]], <4 x double> [[TMP18]], <4 x double> [[VEC_PHI2]] +; AVX-NEXT: [[PREDPHI9]] = select <4 x i1> [[TMP15]], <4 x double> [[TMP19]], <4 x double> [[VEC_PHI3]] ; AVX-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 -; AVX-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 -; AVX-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; AVX-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 +; AVX-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; AVX: middle.block: ; AVX-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x double> [[PREDPHI7]], [[PREDPHI]] ; AVX-NEXT: [[BIN_RDX10:%.*]] = fadd fast <4 x double> [[PREDPHI8]], [[BIN_RDX]] ; AVX-NEXT: [[BIN_RDX11:%.*]] = fadd fast <4 x double> [[PREDPHI9]], [[BIN_RDX10]] -; AVX-NEXT: [[TMP25:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX11]]) +; AVX-NEXT: [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[BIN_RDX11]]) ; AVX-NEXT: br i1 true, label [[DONE:%.*]], label [[SCALAR_PH]] ; AVX: scalar.ph: ; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; AVX-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP25]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; AVX-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; AVX-NEXT: br label [[LOOP:%.*]] ; AVX: loop: ; AVX-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] @@ -190,7 +190,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 ; AVX-NEXT: br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop [[LOOP3:![0-9]+]] ; AVX: done: -; AVX-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] +; AVX-NEXT: [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] ; AVX-NEXT: ret double [[TOT_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll index d19ca172a8c0a8..8b0c99b353c8b7 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll @@ -33,18 +33,15 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP13]], <4 x i32> , <4 x i32> -; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], +; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> ; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -54,16 +51,16 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] ; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]] ; CHECK: if.then: -; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], 19 +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], 19 ; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]] ; CHECK: if.else: -; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP16]], 4 +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP13]], 4 ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[CMP10]], i32 4, i32 5 ; CHECK-NEXT: br label [[IF_END14]] ; CHECK: if.end14: @@ -112,3 +109,122 @@ for.end: ret i32 undef } +; As above but with multiple variables set per block. +define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) { +; CHECK-LABEL: @multi_variable_if_nest( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP26]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[N]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], +; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], +; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]] +; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]] +; CHECK-NEXT: store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[IF_END14:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END14]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP16]], 19 +; CHECK-NEXT: br i1 [[CMP6]], label [[IF_END14]], label [[IF_ELSE:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[CMP10:%.*]] = icmp slt i32 [[TMP17]], 4 +; CHECK-NEXT: [[X_ELSE:%.*]] = select i1 [[CMP10]], i32 4, i32 5 +; CHECK-NEXT: [[Y_ELSE:%.*]] = select i1 [[CMP10]], i32 6, i32 11 +; CHECK-NEXT: br label [[IF_END14]] +; CHECK: if.end14: +; CHECK-NEXT: [[X_0:%.*]] = phi i32 [ 9, [[FOR_BODY]] ], [ 3, [[IF_THEN]] ], [ [[X_ELSE]], [[IF_ELSE]] ] +; CHECK-NEXT: [[Y_0:%.*]] = phi i32 [ 18, [[FOR_BODY]] ], [ 7, [[IF_THEN]] ], [ [[Y_ELSE]], [[IF_ELSE]] ] +; CHECK-NEXT: store i32 [[X_0]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 [[Y_0]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret i32 undef +; +entry: + %cmp26 = icmp sgt i32 %n, 0 + br i1 %cmp26, label %for.body, label %for.end + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %if.end14 ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv + %1 = load i32, ptr %arrayidx2, align 4 + %cmp3 = icmp sgt i32 %0, %1 + br i1 %cmp3, label %if.then, label %if.end14 + +if.then: + %cmp6 = icmp sgt i32 %0, 19 + br i1 %cmp6, label %if.end14, label %if.else + +if.else: + %cmp10 = icmp slt i32 %1, 4 + %x.else = select i1 %cmp10, i32 4, i32 5 + %y.else = select i1 %cmp10, i32 6, i32 11 + br label %if.end14 + +if.end14: + %x.0 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %x.else, %if.else ] ; <------------- A PHI with 3 entries that we can still vectorize. + %y.0 = phi i32 [ 18, %for.body ], [ 7, %if.then ], [ %y.else, %if.else ] ; <------------- A PHI with 3 entries that we can still vectorize. + store i32 %x.0, ptr %arrayidx, align 4 + store i32 %y.0, ptr %arrayidx2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i32 undef +} diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index e9761a60fd6ebe..0d5871e24c5247 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -678,9 +678,8 @@ for.end: ; preds = %for.inc, %entry ; CHECK-DAG: %[[C21:.*]] = xor <4 x i1> %[[C2]], ; CHECK-DAG: %[[ADD:.*]] = fadd fast <4 x float> -; CHECK-DAG: %[[C12:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C2]], <4 x i1> zeroinitializer -; CHECK: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer -; CHECK: %[[S1:.*]] = select <4 x i1> %[[C12]], <4 x float> %[[SUB]], <4 x float> %[[ADD]] +; CHECK-DAG: %[[C22:.*]] = select <4 x i1> %[[C11]], <4 x i1> %[[C21]], <4 x i1> zeroinitializer +; CHECK: %[[S1:.*]] = select <4 x i1> %[[C1]], <4 x float> %[[ADD]], <4 x float> %[[SUB]] ; CHECK: %[[S2:.*]] = select <4 x i1> %[[C22]], {{.*}} <4 x float> %[[S1]] define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly { entry: diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll index e571b624ed1940..8d407c969b5278 100644 --- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll @@ -49,8 +49,8 @@ for.end: ; CHECK: define void @phi_three_incoming_values( ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] -; CHECK: [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> , <2 x i32> -; CHECK: [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> [[PREDPHI]] +; CHECK: [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> +; CHECK: [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> [[PREDPHI]], <2 x i32> ; CHECK: store <2 x i32> [[PREDPHI7]], ptr {{.*}} ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll index c50bcf8ae88f5c..2e111332ef6c48 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -587,9 +587,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] ; CHECK: pred.load.continue14: ; CHECK-NEXT: [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ] -; CHECK-NEXT: [[TMP47:%.*]] = xor <4 x i1> [[TMP25]], -; CHECK-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP47]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP48]], <4 x i64> [[TMP24]], <4 x i64> [[TMP46]] +; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> ; CHECK-NEXT: [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll index 7fd762c7b735a0..40383c7e551bcf 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -65,7 +65,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[DOTLR_PH:%.*]] ; CHECK: .lr.ph: -; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: ._crit_edge: ; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll index 8ee12cc2241c35..6407583061e601 100644 --- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll @@ -111,17 +111,16 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) { ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], -; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i16> [[WIDE_LOAD]], <2 x i16> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]] ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> , <2 x i16> [[PREDPHI]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP11]], align 2 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0 +; CHECK-NEXT: store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -304,17 +303,16 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) { ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP15]], -; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP17]], <2 x i1> zeroinitializer -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP18]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]] ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> , <2 x i16> [[PREDPHI]] -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[TMP19]], i32 0 -; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP20]], align 2 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0 +; CHECK-NEXT: store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll index eae38295ba08cf..da827a5b674d8b 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions-inseltpoison.ll @@ -611,6 +611,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_asin_4x(ptr %a) { +; CHECK-LABEL: @int_asin_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_asin_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.asin.f32(float %vecext) + %vecins = insertelement <4 x float> poison, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.asin.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.asin.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.asin.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @acosf(float) readonly nounwind willreturn define <4 x float> @acos_4x(ptr %a) { ; CHECK-LABEL: @acos_4x( @@ -652,6 +690,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_acos_4x(ptr %a) { +; CHECK-LABEL: @int_acos_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_acos_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.acos.f32(float %vecext) + %vecins = insertelement <4 x float> poison, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.acos.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.acos.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.acos.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @atanf(float) readonly nounwind willreturn define <4 x float> @atan_4x(ptr %a) { ; CHECK-LABEL: @atan_4x( @@ -693,6 +769,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_atan_4x(ptr %a) { +; CHECK-LABEL: @int_atan_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_atan_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.atan.f32(float %vecext) + %vecins = insertelement <4 x float> poison, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.atan.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.atan.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.atan.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @sinhf(float) readonly nounwind willreturn define <4 x float> @sinh_4x(ptr %a) { ; CHECK-LABEL: @sinh_4x( @@ -734,6 +848,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_sinh_4x(ptr %a) { +; CHECK-LABEL: @int_sinh_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_sinh_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.sinh.f32(float %vecext) + %vecins = insertelement <4 x float> poison, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.sinh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.sinh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.sinh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @coshf(float) readonly nounwind willreturn define <4 x float> @cosh_4x(ptr %a) { ; CHECK-LABEL: @cosh_4x( @@ -775,6 +927,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_cosh_4x(ptr %a) { +; CHECK-LABEL: @int_cosh_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_cosh_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.cosh.f32(float %vecext) + %vecins = insertelement <4 x float> poison, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @tanhf(float) readonly nounwind willreturn define <4 x float> @tanh_4x(ptr %a) { ; CHECK-LABEL: @tanh_4x( @@ -816,6 +1006,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_tanh_4x(ptr %a) { +; CHECK-LABEL: @int_tanh_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_tanh_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.tanh.f32(float %vecext) + %vecins = insertelement <4 x float> poison, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.tanh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.tanh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.tanh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @asinhf(float) readonly nounwind willreturn define <4 x float> @asinh_4x(ptr %a) { ; CHECK-LABEL: @asinh_4x( diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll index 5e2dd305f05576..62b8c0ce3291a6 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll @@ -611,6 +611,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_asin_4x(ptr %a) { +; CHECK-LABEL: @int_asin_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_asin_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.asin.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.asin.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.asin.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.asin.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @acosf(float) readonly nounwind willreturn define <4 x float> @acos_4x(ptr %a) { ; CHECK-LABEL: @acos_4x( @@ -652,6 +690,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_acos_4x(ptr %a) { +; CHECK-LABEL: @int_acos_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_acos_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.acos.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.acos.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.acos.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.acos.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @atanf(float) readonly nounwind willreturn define <4 x float> @atan_4x(ptr %a) { ; CHECK-LABEL: @atan_4x( @@ -693,6 +769,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_atan_4x(ptr %a) { +; CHECK-LABEL: @int_atan_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_atan_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.atan.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.atan.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.atan.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.atan.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @sinhf(float) readonly nounwind willreturn define <4 x float> @sinh_4x(ptr %a) { ; CHECK-LABEL: @sinh_4x( @@ -734,6 +848,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_sinh_4x(ptr %a) { +; CHECK-LABEL: @int_sinh_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_sinh_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.sinh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.sinh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.sinh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.sinh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @coshf(float) readonly nounwind willreturn define <4 x float> @cosh_4x(ptr %a) { ; CHECK-LABEL: @cosh_4x( @@ -775,6 +927,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_cosh_4x(ptr %a) { +; CHECK-LABEL: @int_cosh_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_cosh_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.cosh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.cosh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.cosh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.cosh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @tanhf(float) readonly nounwind willreturn define <4 x float> @tanh_4x(ptr %a) { ; CHECK-LABEL: @tanh_4x( @@ -816,6 +1006,44 @@ entry: %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 ret <4 x float> %vecins.3 } +define <4 x float> @int_tanh_4x(ptr %a) { +; CHECK-LABEL: @int_tanh_4x( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; +; NOACCELERATE-LABEL: @int_tanh_4x( +; NOACCELERATE-NEXT: entry: +; NOACCELERATE-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16 +; NOACCELERATE-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; NOACCELERATE-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) +; NOACCELERATE-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; NOACCELERATE-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; NOACCELERATE-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) +; NOACCELERATE-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; NOACCELERATE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; NOACCELERATE-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP3]]) +; NOACCELERATE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; NOACCELERATE-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; NOACCELERATE-NEXT: ret <4 x float> [[VECINS_31]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.tanh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.tanh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.tanh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.tanh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} declare float @asinhf(float) readonly nounwind willreturn define <4 x float> @asinh_4x(ptr %a) { ; CHECK-LABEL: @asinh_4x( diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll index 059e4c38b519bd..6fbd05aaedfe5b 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll @@ -597,6 +597,690 @@ entry: ret <4 x float> %vecins.3 } +declare float @cosf(float) readonly nounwind willreturn + +; We can not vectorized cos cosce RISCV has no such instruction. +define <4 x float> @cos_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @cos_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @cos_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @cosf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @cosf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @cosf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @cosf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.cos.f32(float) + +; We can not vectorized cos cosce RISCV has no such instruction. +define <4 x float> @int_cos_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_cos_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_cos_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.cos.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.cos.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.cos.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.cos.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @acosf(float) readonly nounwind willreturn + +; We can not vectorized acos cosce RISCV has no such instruction. +define <4 x float> @acos_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @acos_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @acos_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @acosf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @acosf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @acosf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @acosf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.acos.f32(float) + +; We can not vectorized acos cosce RISCV has no such instruction. +define <4 x float> @int_acos_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_acos_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_acos_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.acos.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.acos.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.acos.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.acos.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @tanf(float) readonly nounwind willreturn + +; We can not vectorized tan tance RISCV has no such instruction. +define <4 x float> @tan_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @tan_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @tan_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @tanf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @tanf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @tanf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @tanf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.tan.f32(float) + +; We can not vectorized tan tance RISCV has no such instruction. +define <4 x float> @int_tan_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_tan_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_tan_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.tan.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.tan.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.tan.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.tan.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @atanf(float) readonly nounwind willreturn + +; We can not vectorized atan tance RISCV has no such instruction. +define <4 x float> @atan_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @atan_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @atan_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @atanf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @atanf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @atanf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @atanf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.atan.f32(float) + +; We can not vectorized atan tance RISCV has no such instruction. +define <4 x float> @int_atan_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_atan_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_atan_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.atan.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.atan.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.atan.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.atan.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @sinhf(float) readonly nounwind willreturn + +; We can not vectorized sinh since RISCV has no such instruction. +define <4 x float> @sinh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @sinh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @sinh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @sinhf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @sinhf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @sinhf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @sinhf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.sinh.f32(float) + +; We can not vectorized sinh since RISCV has no such instruction. +define <4 x float> @int_sinh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_sinh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_sinh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.sinh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.sinh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.sinh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.sinh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @asinhf(float) readonly nounwind willreturn + +; We can not vectorized asinh since RISCV has no such instruction. +define <4 x float> @asinh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @asinh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @asinh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @asinhf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @asinhf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @asinhf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @asinhf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.asinh.f32(float) + +; We can not vectorized asinh since RISCV has no such instruction. +define <4 x float> @int_asinh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_asinh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_asinh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.asinh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.asinh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.asinh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.asinh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + declare float @coshf(float) readonly nounwind willreturn ; We can not vectorized cosh since RISCV has no such instruction. @@ -711,6 +1395,234 @@ entry: ret <4 x float> %vecins.3 } +declare float @acoshf(float) readonly nounwind willreturn + +; We can not vectorized acosh since RISCV has no such instruction. +define <4 x float> @acosh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @acosh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @acosh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @acoshf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @acoshf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @acoshf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @acoshf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.acosh.f32(float) + +; We can not vectorized acosh since RISCV has no such instruction. +define <4 x float> @int_acosh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_acosh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_acosh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.acosh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.acosh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.acosh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.acosh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @tanhf(float) readonly nounwind willreturn + +; We can not vectorized tanh since RISCV has no such instruction. +define <4 x float> @tanh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @tanh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @tanh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @tanhf(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @tanhf(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @tanhf(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @tanhf(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + +declare float @llvm.tanh.f32(float) + +; We can not vectorized tanh since RISCV has no such instruction. +define <4 x float> @int_tanh_4x(ptr %a) { +; CHECK-LABEL: define <4 x float> @int_tanh_4x +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) +; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) +; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]]) +; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]]) +; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; CHECK-NEXT: ret <4 x float> [[VECINS_3]] +; +; DEFAULT-LABEL: define <4 x float> @int_tanh_4x +; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 +; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 +; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]]) +; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 +; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 +; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]]) +; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 +; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]]) +; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 +; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 +; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]]) +; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 +; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; +entry: + %0 = load <4 x float>, ptr %a, align 16 + %vecext = extractelement <4 x float> %0, i32 0 + %1 = tail call fast float @llvm.tanh.f32(float %vecext) + %vecins = insertelement <4 x float> undef, float %1, i32 0 + %vecext.1 = extractelement <4 x float> %0, i32 1 + %2 = tail call fast float @llvm.tanh.f32(float %vecext.1) + %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1 + %vecext.2 = extractelement <4 x float> %0, i32 2 + %3 = tail call fast float @llvm.tanh.f32(float %vecext.2) + %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2 + %vecext.3 = extractelement <4 x float> %0, i32 3 + %4 = tail call fast float @llvm.tanh.f32(float %vecext.3) + %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3 + ret <4 x float> %vecins.3 +} + declare float @atanhf(float) readonly nounwind willreturn ; We can not vectorized atanh since RISCV has no such instruction. diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll index 54dc33dbc0d00b..c9a3158acdda34 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-whole-regs-loads.ll @@ -4,15 +4,11 @@ define i64 @test(ptr %p) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0) -; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4) -; CHECK-NEXT: [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]]) -; CHECK-NEXT: ret i64 [[TMP6]] +; CHECK-NEXT: [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]]) +; CHECK-NEXT: ret i64 [[TMP3]] ; entry: %arrayidx.1 = getelementptr inbounds i64, ptr %p, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll index af27572cfeaef8..4352b3d0c80d32 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/dot-product.ll @@ -12,19 +12,19 @@ define double @dot4f64(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %p ; CHECK-LABEL: @dot4f64( ; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds double, ptr [[PTRX:%.*]], i64 2 ; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds double, ptr [[PTRY:%.*]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[PTRX2]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[PTRY2]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd double [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP10]], i32 0 -; CHECK-NEXT: [[DOT012:%.*]] = fadd double [[DOT01]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP10]], i32 1 -; CHECK-NEXT: [[DOT0123:%.*]] = fadd double [[DOT012]], [[TMP14]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[PTRX2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[PTRY2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT01:%.*]] = fadd double [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[DOT012:%.*]] = fadd double [[DOT01]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[DOT0123:%.*]] = fadd double [[DOT012]], [[TMP10]] ; CHECK-NEXT: ret double [[DOT0123]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 @@ -55,19 +55,19 @@ define float @dot4f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %pt ; CHECK-LABEL: @dot4f32( ; CHECK-NEXT: [[PTRX2:%.*]] = getelementptr inbounds float, ptr [[PTRX:%.*]], i64 2 ; CHECK-NEXT: [[PTRY2:%.*]] = getelementptr inbounds float, ptr [[PTRY:%.*]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, ptr [[PTRX2]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = load <2 x float>, ptr [[PTRY2]], align 4 -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x float> [[TMP7]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[TMP10]], i32 0 -; CHECK-NEXT: [[DOT012:%.*]] = fadd float [[DOT01]], [[TMP13]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[TMP10]], i32 1 -; CHECK-NEXT: [[DOT0123:%.*]] = fadd float [[DOT012]], [[TMP14]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[PTRX2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x float>, ptr [[PTRY2]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT01:%.*]] = fadd float [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 +; CHECK-NEXT: [[DOT012:%.*]] = fadd float [[DOT01]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 +; CHECK-NEXT: [[DOT0123:%.*]] = fadd float [[DOT012]], [[TMP10]] ; CHECK-NEXT: ret float [[DOT0123]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 @@ -96,11 +96,11 @@ define float @dot4f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %pt define double @dot4f64_fast(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %ptry) { ; CHECK-LABEL: @dot4f64_fast( -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x double>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP5]]) -; CHECK-NEXT: ret double [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[PTRX:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x double>, ptr [[PTRY:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP3]]) +; CHECK-NEXT: ret double [[TMP4]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 %ptry1 = getelementptr inbounds double, ptr %ptry, i64 1 @@ -128,11 +128,11 @@ define double @dot4f64_fast(ptr dereferenceable(32) %ptrx, ptr dereferenceable(3 define float @dot4f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot4f32_fast( -; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]]) -; CHECK-NEXT: ret float [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[PTRX:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[PTRY:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP3]]) +; CHECK-NEXT: ret float [[TMP4]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 %ptry1 = getelementptr inbounds float, ptr %ptry, i64 1 @@ -169,13 +169,13 @@ define double @dot3f64(ptr dereferenceable(32) %ptrx, ptr dereferenceable(32) %p ; CHECK-NEXT: [[X0:%.*]] = load double, ptr [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load double, ptr [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul double [[X0]], [[Y0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY1]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[DOT01:%.*]] = fadd double [[MUL0]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT012:%.*]] = fadd double [[DOT01]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[DOT01:%.*]] = fadd double [[MUL0]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT012:%.*]] = fadd double [[DOT01]], [[TMP5]] ; CHECK-NEXT: ret double [[DOT012]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 @@ -203,13 +203,13 @@ define float @dot3f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %pt ; CHECK-NEXT: [[X0:%.*]] = load float, ptr [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load float, ptr [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul float [[X0]], [[Y0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY1]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK-NEXT: [[DOT01:%.*]] = fadd float [[MUL0]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT012:%.*]] = fadd float [[DOT01]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[DOT01:%.*]] = fadd float [[MUL0]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT012:%.*]] = fadd float [[DOT01]], [[TMP5]] ; CHECK-NEXT: ret float [[DOT012]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 @@ -237,13 +237,13 @@ define double @dot3f64_fast(ptr dereferenceable(32) %ptrx, ptr dereferenceable(3 ; CHECK-NEXT: [[X0:%.*]] = load double, ptr [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load double, ptr [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul double [[X0]], [[Y0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY1]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[DOT01:%.*]] = fadd fast double [[MUL0]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT012:%.*]] = fadd fast double [[DOT01]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[DOT01:%.*]] = fadd fast double [[MUL0]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT012:%.*]] = fadd fast double [[DOT01]], [[TMP5]] ; CHECK-NEXT: ret double [[DOT012]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 @@ -271,13 +271,13 @@ define float @dot3f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16 ; CHECK-NEXT: [[X0:%.*]] = load float, ptr [[PTRX]], align 4 ; CHECK-NEXT: [[Y0:%.*]] = load float, ptr [[PTRY]], align 4 ; CHECK-NEXT: [[MUL0:%.*]] = fmul float [[X0]], [[Y0]] -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY1]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK-NEXT: [[DOT01:%.*]] = fadd fast float [[MUL0]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT012:%.*]] = fadd fast float [[DOT01]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[DOT01:%.*]] = fadd fast float [[MUL0]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT012:%.*]] = fadd fast float [[DOT01]], [[TMP5]] ; CHECK-NEXT: ret float [[DOT012]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 @@ -304,12 +304,12 @@ define float @dot3f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16 define double @dot2f64(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f64( -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd double [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT01:%.*]] = fadd double [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret double [[DOT01]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 @@ -326,12 +326,12 @@ define double @dot2f64(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %p define float @dot2f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f32( -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd float [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT01:%.*]] = fadd float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[DOT01]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 @@ -348,12 +348,12 @@ define float @dot2f32(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %pt define double @dot2f64_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f64_fast( -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd fast double [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[PTRX:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[PTRY:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT01:%.*]] = fadd fast double [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret double [[DOT01]] ; %ptrx1 = getelementptr inbounds double, ptr %ptrx, i64 1 @@ -370,12 +370,12 @@ define double @dot2f64_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(1 define float @dot2f32_fast(ptr dereferenceable(16) %ptrx, ptr dereferenceable(16) %ptry) { ; CHECK-LABEL: @dot2f32_fast( -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1 -; CHECK-NEXT: [[DOT01:%.*]] = fadd fast float [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[PTRX:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[PTRY:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 +; CHECK-NEXT: [[DOT01:%.*]] = fadd fast float [[TMP4]], [[TMP5]] ; CHECK-NEXT: ret float [[DOT01]] ; %ptrx1 = getelementptr inbounds float, ptr %ptrx, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-bv-vectorized.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-bv-vectorized.ll new file mode 100644 index 00000000000000..c44ef376f81fab --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-bv-vectorized.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @test(ptr %p) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[GEP1]], align 16 +; CHECK-NEXT: ret void +; +entry: + %conv548.2.i.13 = zext i32 0 to i64 + %and551.2.i.13 = and i64 0, %conv548.2.i.13 + %conv548.3.i.13 = zext i32 0 to i64 + %and551.3.i.13 = and i64 0, %conv548.3.i.13 + %0 = trunc i64 %and551.2.i.13 to i32 + %conv54.2.i.14 = and i32 %0, 0 + %conv548.2.i.14 = zext i32 %conv54.2.i.14 to i64 + %and551.2.i.14 = and i64 %and551.2.i.13, %conv548.2.i.14 + %1 = trunc i64 %and551.3.i.13 to i32 + %conv54.3.i.14 = and i32 %1, 0 + %conv548.3.i.14 = zext i32 %conv54.3.i.14 to i64 + %and551.3.i.14 = and i64 %and551.3.i.13, %conv548.3.i.14 + %and551.2.i.15 = and i64 %and551.2.i.14, 0 + %and551.3.i.15 = and i64 %and551.3.i.14, 0 + %and551.2.i.16 = and i64 %and551.2.i.15, 0 + %and551.3.i.16 = and i64 %and551.3.i.15, 0 + %and551.2.i.17 = and i64 %and551.2.i.16, 0 + %and551.3.i.17 = and i64 %and551.3.i.16, 0 + %and551.2.i.18 = and i64 %and551.2.i.17, 0 + %and551.3.i.18 = and i64 %and551.3.i.17, 0 + %and551.2.i.19 = and i64 %and551.2.i.18, 0 + %and551.3.i.19 = and i64 %and551.3.i.18, 0 + %and551.2.i.20 = and i64 %and551.2.i.19, 0 + %and551.3.i.20 = and i64 %and551.3.i.19, 0 + %and551.2.i.21 = and i64 %and551.2.i.20, 0 + %and551.3.i.21 = and i64 %and551.3.i.20, 0 + %gep1 = getelementptr inbounds i8, ptr %p, i64 16 + %gep2 = getelementptr inbounds i8, ptr %p, i64 24 + store i64 %and551.2.i.21, ptr %gep1, align 16 + store i64 %and551.3.i.21, ptr %gep2, align 8 + ret void +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll index 5b33c6e889363e..89bc44dc1d530a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629-inseltpoison.ll @@ -180,12 +180,20 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_2( -; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], -; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4 +; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40 +; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 +; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20 +; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1 +; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2 +; AVX512VL-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 +; AVX512VL-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], +; AVX512VL-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll index 09d6c77557efaa..c1b501015e81e4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr47629.ll @@ -180,12 +180,20 @@ define void @gather_load_2(ptr noalias nocapture %0, ptr noalias nocapture reado ; AVX512F-NEXT: ret void ; ; AVX512VL-LABEL: @gather_load_2( -; AVX512VL-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP1:%.*]], i64 0 -; AVX512VL-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer -; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr i32, <4 x ptr> [[TMP4]], <4 x i64> -; AVX512VL-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP5]], i32 4, <4 x i1> , <4 x i32> poison), !tbaa [[TBAA0]] -; AVX512VL-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], -; AVX512VL-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1:%.*]], i64 4 +; AVX512VL-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 40 +; AVX512VL-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 +; AVX512VL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 20 +; AVX512VL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !tbaa [[TBAA0]] +; AVX512VL-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 +; AVX512VL-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP6]], i64 1 +; AVX512VL-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP8]], i64 2 +; AVX512VL-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i64 3 +; AVX512VL-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], +; AVX512VL-NEXT: store <4 x i32> [[TMP15]], ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0]] ; AVX512VL-NEXT: ret void ; %3 = getelementptr inbounds i32, ptr %1, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll index 83457cc4966f7c..ebd35448ba72f7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-buildvector.ll @@ -11,18 +11,18 @@ define void @test(ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARG:%.*]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, <8 x ptr> [[SHUFFLE]], <8 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[TMP0]], <8 x ptr> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr double, <8 x ptr> [[TMP1]], <8 x i64> ; CHECK-NEXT: [[GEP2_0:%.*]] = getelementptr inbounds double, ptr [[ARG1:%.*]], i64 16 -; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP1]], i32 8, <8 x i1> , <8 x double> poison) +; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP2]], i32 8, <8 x i1> , <8 x double> poison) ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x double>, ptr [[GEP2_0]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP2]] -; CHECK-NEXT: [[TMP7:%.*]] = load <8 x double>, ptr [[ARG1]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast <8 x double> [[TMP7]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP8]]) -; CHECK-NEXT: [[TMP10:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP5]]) -; CHECK-NEXT: [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i64 0 -; CHECK-NEXT: [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <8 x double> [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = load <8 x double>, ptr [[ARG1]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <8 x double> [[TMP6]], [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP5]]) +; CHECK-NEXT: [[I142:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 +; CHECK-NEXT: [[I143:%.*]] = insertelement <2 x double> [[I142]], double [[TMP9]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> ; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I143]], <2 x ptr> [[P]], i32 8, <2 x i1> ) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll index 19d0bc9b330657..20c5bda328c100 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/redux-feed-insertelement.ll @@ -6,10 +6,10 @@ declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32 immarg define void @rdx_feeds_single_insert(<2 x double> %v, ptr nocapture readonly %arg, ptr nocapture readonly %arg1, ptr nocapture %arg2) { ; CHECK-LABEL: @rdx_feeds_single_insert( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x double>, ptr [[ARG1:%.*]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <8 x double> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP2]]) -; CHECK-NEXT: [[I:%.*]] = insertelement <2 x double> [[V:%.*]], double [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x double>, ptr [[ARG1:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = fmul fast <8 x double> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = call fast double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> [[TMP1]]) +; CHECK-NEXT: [[I:%.*]] = insertelement <2 x double> [[V:%.*]], double [[TMP2]], i64 1 ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds double, ptr [[ARG2:%.*]], <2 x i64> ; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> [[I]], <2 x ptr> [[P]], i32 8, <2 x i1> ) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll index 40dcc79f79ffce..09a5ace101e645 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark-masked-loads-consecutive-loads-same-ptr.ll @@ -8,19 +8,23 @@ ; YAML-NEXT: Function: test ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-5' +; YAML-NEXT: - Cost: '-7' ; YAML-NEXT: - String: ' and with tree size ' -; YAML-NEXT: - TreeSize: '7' +; YAML-NEXT: - TreeSize: '5' define void @test(ptr noalias %p, ptr noalias %p1) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x ptr> poison, ptr [[P:%.*]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x ptr> [[TMP0]], <4 x ptr> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, <4 x ptr> [[TMP1]], <4 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> , <4 x i32> poison) -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[I:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i32, ptr [[P]], i64 32 +; CHECK-NEXT: [[I2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr i32, ptr [[P]], i64 33 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARRAYIDX11]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[I2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP3]], <2 x i32> [[TMP0]], i64 2) +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], [[TMP1]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[P1:%.*]], align 4 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll index cfbbe14186b501..eacfbda5447c7b 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-possible-strided-node.ll @@ -5,13 +5,23 @@ define void @test() { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i32, ptr null, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr null, align 4 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60 -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP8]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr <4 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ; entry: @@ -57,15 +67,25 @@ define void @test1() { ; CHECK-LABEL: define void @test1( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i32, ptr null, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr null, align 4 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60 -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], [[TMP8]] ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ; entry: @@ -111,12 +131,22 @@ define void @test_div() { ; CHECK-LABEL: define void @test_div( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i32, ptr null, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr null, align 4 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60 -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = udiv <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = udiv <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ; @@ -163,12 +193,22 @@ define void @test_rem() { ; CHECK-LABEL: define void @test_rem( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i32, ptr null, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i32, ptr null, i64 33 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4 +; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr i32, ptr null, i64 7 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX13]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr null, align 4 ; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr i32, ptr null, i64 60 -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> ), i32 4, <4 x i1> , <4 x i32> poison) ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[ARRAYIDX22]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = urem <4 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[TMP1]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = urem <4 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([4 x i32], ptr null, i64 8, i64 0), align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll index 30f328293cdaa3..c114c5dee78e99 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll @@ -6,18 +6,20 @@ target triple = "x86_64-unknown-linux-gnu" define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 -; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> , <4 x float> poison) -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP7]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x float> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> poison, <8 x i32> -; CHECK-NEXT: store <8 x float> [[TMP13]], ptr addrspace(1) [[TMP6]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0:%.*]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 24 +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x float>, ptr addrspace(1) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load <2 x float>, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr addrspace(1) [[TMP4]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP10]], <2 x float> [[TMP8]], i64 2) +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP11]], <4 x float> poison, <8 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = fmul <8 x float> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd <8 x float> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> +; CHECK-NEXT: store <8 x float> [[TMP15]], ptr addrspace(1) [[TMP3]], align 4 ; CHECK-NEXT: ret void ; %3 = getelementptr inbounds i8, ptr addrspace(1) %0, i64 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll b/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll index 02c7e4a03325ed..1f3c0fb9e297c5 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/slp-fma-loss.ll @@ -54,10 +54,10 @@ define double @hr_or_mul() { ; CHECK-LABEL: @hr_or_mul( ; CHECK-NEXT: [[CVT0:%.*]] = uitofp i16 3 to double ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> poison, double [[CVT0]], i32 0 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x double> , [[SHUFFLE]] -; CHECK-NEXT: [[TMP3:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP2]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast double [[TMP3]], [[CVT0]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double -0.000000e+00, <4 x double> [[TMP3]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast double [[TMP4]], [[CVT0]] ; CHECK-NEXT: ret double [[OP_RDX]] ; %cvt0 = uitofp i16 3 to double diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll new file mode 100644 index 00000000000000..56281424c7114a --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -slp-vectorize-hor=false < %s | FileCheck %s + +define void @func(i32 %0) { +; CHECK-LABEL: define void @func( +; CHECK-SAME: i32 [[TMP0:%.*]]) { +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i32> [[TMP11]], <32 x i32> , <32 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 0, i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16) +; CHECK-NEXT: [[TMP15:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP14]], <4 x i32> zeroinitializer, i64 24) +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP15]], <2 x i32> zeroinitializer, i64 14) +; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP16]], <2 x i32> zeroinitializer, i64 28) +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i32> [[TMP8]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64> +; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP20]], i32 31 +; CHECK-NEXT: [[TMP22:%.*]] = and i1 false, [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP20]], i32 30 +; CHECK-NEXT: [[TMP24:%.*]] = and i1 false, [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP20]], i32 29 +; CHECK-NEXT: [[TMP26:%.*]] = and i1 false, [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP20]], i32 28 +; CHECK-NEXT: [[TMP28:%.*]] = and i1 false, [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP20]], i32 27 +; CHECK-NEXT: [[TMP30:%.*]] = and i1 false, [[TMP29]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP20]], i32 26 +; CHECK-NEXT: [[TMP32:%.*]] = and i1 false, [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i1> [[TMP20]], i32 25 +; CHECK-NEXT: [[TMP34:%.*]] = and i1 false, [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i1> [[TMP20]], i32 24 +; CHECK-NEXT: [[TMP36:%.*]] = and i1 false, [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP20]], i32 23 +; CHECK-NEXT: [[TMP38:%.*]] = and i1 false, [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <32 x i1> [[TMP20]], i32 22 +; CHECK-NEXT: [[TMP40:%.*]] = and i1 false, [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP20]], i32 21 +; CHECK-NEXT: [[TMP42:%.*]] = and i1 false, [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i1> [[TMP20]], i32 20 +; CHECK-NEXT: [[TMP44:%.*]] = and i1 false, [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <32 x i1> [[TMP20]], i32 19 +; CHECK-NEXT: [[TMP46:%.*]] = and i1 false, [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP20]], i32 18 +; CHECK-NEXT: [[TMP48:%.*]] = and i1 false, [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i1> [[TMP20]], i32 17 +; CHECK-NEXT: [[TMP50:%.*]] = and i1 false, [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <32 x i1> [[TMP20]], i32 16 +; CHECK-NEXT: [[TMP52:%.*]] = and i1 false, [[TMP51]] +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i1> [[TMP20]], i32 15 +; CHECK-NEXT: [[TMP54:%.*]] = and i1 false, [[TMP53]] +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP20]], i32 14 +; CHECK-NEXT: [[TMP56:%.*]] = and i1 false, [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP20]], i32 13 +; CHECK-NEXT: [[TMP58:%.*]] = and i1 false, [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP20]], i32 12 +; CHECK-NEXT: [[TMP60:%.*]] = and i1 false, [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP20]], i32 11 +; CHECK-NEXT: [[TMP62:%.*]] = and i1 false, [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i1> [[TMP20]], i32 10 +; CHECK-NEXT: [[TMP64:%.*]] = and i1 false, [[TMP63]] +; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i1> [[TMP20]], i32 9 +; CHECK-NEXT: [[TMP66:%.*]] = and i1 false, [[TMP65]] +; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP20]], i32 8 +; CHECK-NEXT: [[TMP68:%.*]] = and i1 false, [[TMP67]] +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <32 x i1> [[TMP20]], i32 7 +; CHECK-NEXT: [[TMP70:%.*]] = and i1 false, [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i1> [[TMP20]], i32 6 +; CHECK-NEXT: [[TMP72:%.*]] = and i1 false, [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i1> [[TMP20]], i32 5 +; CHECK-NEXT: [[TMP74:%.*]] = and i1 false, [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = extractelement <32 x i1> [[TMP20]], i32 4 +; CHECK-NEXT: [[TMP76:%.*]] = and i1 false, [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i32> [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP78:%.*]] = sext i32 [[TMP77]] to i64 +; CHECK-NEXT: [[TMP79:%.*]] = getelementptr float, ptr addrspace(1) null, i64 [[TMP78]] +; CHECK-NEXT: ret void +; + %2 = shl i32 %0, 0 + %3 = sext i32 %2 to i64 + %4 = shl i32 0, 0 + %5 = sext i32 %4 to i64 + %6 = or i32 0, 0 + %7 = or i32 0, 0 + %8 = zext i32 %6 to i64 + %9 = zext i32 %7 to i64 + %10 = zext i32 0 to i64 + %11 = zext i32 0 to i64 + %12 = zext i32 0 to i64 + %13 = zext i32 0 to i64 + %14 = zext i32 0 to i64 + %15 = zext i32 0 to i64 + %16 = zext i32 0 to i64 + %17 = zext i32 0 to i64 + %18 = zext i32 0 to i64 + %19 = zext i32 0 to i64 + %20 = zext i32 0 to i64 + %21 = zext i32 0 to i64 + %22 = zext i32 0 to i64 + %23 = zext i32 0 to i64 + %24 = zext i32 0 to i64 + %25 = zext i32 0 to i64 + %26 = zext i32 0 to i64 + %27 = or i64 %3, 0 + %28 = or i64 %3, %8 + %29 = or i64 %3, %9 + %30 = or i64 %3, %10 + %31 = or i64 %3, %11 + %32 = or i64 %3, %12 + %33 = or i64 %3, %13 + %34 = or i64 %3, %14 + %35 = or i64 %3, %15 + %36 = or i64 %3, %16 + %37 = or i64 %3, %17 + %38 = or i64 %3, %18 + %39 = or i64 %3, %19 + %40 = or i64 %3, %20 + %41 = or i64 %3, %21 + %42 = or i64 %3, %22 + %43 = or i64 %3, %23 + %44 = or i64 %3, %24 + %45 = or i64 %3, %25 + %46 = or i64 %3, 0 + %47 = or i64 %3, 0 + %48 = or i64 %3, 0 + %49 = or i64 %3, 0 + %50 = or i64 %3, 0 + %51 = or i64 %3, 0 + %52 = or i64 %3, 0 + %53 = or i64 %3, 0 + %54 = or i64 %3, 0 + %55 = or i64 %3, 0 + %56 = or i64 %3, 0 + %57 = or i64 %3, 0 + %58 = or i64 %3, 0 + %59 = icmp slt i64 %28, 0 + %60 = icmp slt i64 %29, 0 + %61 = icmp slt i64 %30, 0 + %62 = icmp slt i64 %31, 0 + %63 = icmp slt i64 %32, 0 + %64 = icmp slt i64 %33, 0 + %65 = icmp slt i64 %34, 0 + %66 = icmp slt i64 %35, 0 + %67 = icmp slt i64 %36, 0 + %68 = icmp slt i64 %37, 0 + %69 = icmp slt i64 %38, 0 + %70 = icmp slt i64 %39, 0 + %71 = icmp slt i64 %40, 0 + %72 = icmp slt i64 %41, 0 + %73 = icmp slt i64 %42, 0 + %74 = icmp slt i64 %43, 0 + %75 = icmp slt i64 %44, 0 + %76 = icmp slt i64 %45, 0 + %77 = icmp slt i64 %46, 0 + %78 = icmp slt i64 %47, 0 + %79 = icmp slt i64 %48, 0 + %80 = icmp slt i64 %49, 0 + %81 = icmp slt i64 %50, 0 + %82 = icmp slt i64 %51, 0 + %83 = icmp slt i64 %52, 0 + %84 = icmp slt i64 %53, 0 + %85 = icmp slt i64 %54, 0 + %86 = icmp slt i64 %55, 0 + %87 = icmp slt i64 %56, 0 + %88 = icmp slt i64 %57, 0 + %89 = icmp slt i64 %58, 0 + %90 = and i1 false, %59 + %91 = and i1 false, %60 + %92 = and i1 false, %61 + %93 = and i1 false, %62 + %94 = and i1 false, %63 + %95 = and i1 false, %64 + %96 = and i1 false, %65 + %97 = and i1 false, %66 + %98 = and i1 false, %67 + %99 = and i1 false, %68 + %100 = and i1 false, %69 + %101 = and i1 false, %70 + %102 = and i1 false, %71 + %103 = and i1 false, %72 + %104 = and i1 false, %73 + %105 = and i1 false, %74 + %106 = and i1 false, %75 + %107 = and i1 false, %76 + %108 = icmp eq i32 %2, 0 + %109 = and i1 false, %77 + %110 = and i1 false, %78 + %111 = and i1 false, %79 + %112 = and i1 false, %80 + %113 = and i1 false, %81 + %114 = and i1 false, %82 + %115 = and i1 false, %83 + %116 = and i1 false, %84 + %117 = and i1 false, %85 + %118 = and i1 false, %86 + %119 = or i64 %5, %26 + %120 = getelementptr float, ptr addrspace(1) null, i64 %119 + %121 = icmp slt i64 %119, 0 + ret void +} diff --git a/llvm/test/Verifier/rtsan-attrs.ll b/llvm/test/Verifier/rtsan-attrs.ll deleted file mode 100644 index 42ab85163642b1..00000000000000 --- a/llvm/test/Verifier/rtsan-attrs.ll +++ /dev/null @@ -1,9 +0,0 @@ -; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s - -; CHECK: Attributes 'sanitize_realtime and nosanitize_realtime' are incompatible! -; CHECK-NEXT: ptr @sanitize_nosanitize -define void @sanitize_nosanitize() #0 { - ret void -} - -attributes #0 = { sanitize_realtime nosanitize_realtime } diff --git a/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll b/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll index 708b5a006be60e..d269f92763853c 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll @@ -1,24 +1,30 @@ -; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0 +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s ; 3 kernels: ; - A does a direct call to HelperA ; - B is storing @HelperA ; - C does a direct call to HelperA ; -; The helper functions will get externalized, so C/A will end up -; in the same partition. - -; P0 is empty. -; CHECK0: declare - -; CHECK1: define amdgpu_kernel void @B(ptr %dst) - -; CHECK2: define hidden void @HelperA() -; CHECK2: define amdgpu_kernel void @A() -; CHECK2: define amdgpu_kernel void @C() +; The helper functions will get externalized, which will force A and C into P0 as +; external functions cannot be duplicated. + +; CHECK0: define hidden void @HelperA() +; CHECK0: define amdgpu_kernel void @A() +; CHECK0: declare amdgpu_kernel void @B(ptr) +; CHECK0: define amdgpu_kernel void @C() + +; CHECK1: declare hidden void @HelperA() +; CHECK1: declare amdgpu_kernel void @A() +; CHECK1: declare amdgpu_kernel void @B(ptr) +; CHECK1: declare amdgpu_kernel void @C() + +; CHECK2: declare hidden void @HelperA() +; CHECK2: declare amdgpu_kernel void @A() +; CHECK2: define amdgpu_kernel void @B(ptr %dst) +; CHECK2: declare amdgpu_kernel void @C() define internal void @HelperA() { ret void diff --git a/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize.ll b/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize.ll index 81f6c8f0fbb3a6..731cf4b374c95b 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize.ll @@ -1,4 +1,4 @@ -; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 +; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0 ; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s ; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s diff --git a/llvm/test/tools/llvm-split/AMDGPU/debug-name-hiding.ll b/llvm/test/tools/llvm-split/AMDGPU/debug-name-hiding.ll new file mode 100644 index 00000000000000..6a07ed51ba1beb --- /dev/null +++ b/llvm/test/tools/llvm-split/AMDGPU/debug-name-hiding.ll @@ -0,0 +1,20 @@ +; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -debug -amdgpu-module-splitting-log-private 2>&1 | FileCheck %s --implicit-check-not=MyCustomKernel +; REQUIRES: asserts + +; SHA256 of the kernel names. + +; CHECK: a097723d21cf9f35d90e6fb7881995ac8c398b3366a6c97efc657404f9fe301c +; CHECK: 626bc23242de8fcfda7f0e66318d29455c081df6b5380e64d14703c95fcbcd59 +; CHECK: c38d90a7ca71dc5d694bb9e093dadcdedfc4cb4adf7ed7e46d42fe95a0b4ef55 + +define amdgpu_kernel void @MyCustomKernel0() { + ret void +} + +define amdgpu_kernel void @MyCustomKernel1() { + ret void +} + +define amdgpu_kernel void @MyCustomKernel2() { + ret void +} diff --git a/llvm/test/tools/llvm-split/AMDGPU/debug-non-kernel-root.ll b/llvm/test/tools/llvm-split/AMDGPU/debug-non-kernel-root.ll new file mode 100644 index 00000000000000..836b5c05d0653d --- /dev/null +++ b/llvm/test/tools/llvm-split/AMDGPU/debug-non-kernel-root.ll @@ -0,0 +1,36 @@ +; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -debug 2>&1 | FileCheck %s --implicit-check-not="[root]" +; REQUIRES: asserts + +; func_3 is never directly called, it needs to be considered +; as a root to handle this module correctly. + +; CHECK: [root] kernel_1 +; CHECK-NEXT: [dependency] func_1 +; CHECK-NEXT: [dependency] func_2 +; CHECK-NEXT: [root] func_3 +; CHECK-NEXT: [dependency] func_2 + +define amdgpu_kernel void @kernel_1() { +entry: + call void @func_1() + ret void +} + +define linkonce_odr hidden void @func_1() { +entry: + %call = call i32 @func_2() + ret void +} + +define linkonce_odr hidden i32 @func_2() #0 { +entry: + ret i32 0 +} + +define void @func_3() { +entry: + %call = call i32 @func_2() + ret void +} + +attributes #0 = { noinline optnone } diff --git a/llvm/test/tools/llvm-split/AMDGPU/declarations.ll b/llvm/test/tools/llvm-split/AMDGPU/declarations.ll index 755676061b2557..10b6cdfef4055f 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/declarations.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/declarations.ll @@ -1,13 +1,16 @@ ; RUN: rm -rf %t0 %t1 ; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa ; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s -; RUN: not llvm-dis -o - %t1 +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s -; Empty module without any defs should result in a single output module that is -; an exact copy of the input. +; Check that all declarations are put into each partition. ; CHECK0: declare void @A ; CHECK0: declare void @B +; CHECK1: declare void @A +; CHECK1: declare void @B + declare void @A() + declare void @B() diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll index d7e84abd5f968d..c2746d1398924c 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll @@ -1,6 +1,6 @@ ; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s ; 3 kernels: ; - A calls nothing @@ -13,12 +13,16 @@ ; Additionally, @PerryThePlatypus gets externalized as ; the alias counts as taking its address. -; CHECK0: define amdgpu_kernel void @A +; CHECK0-NOT: define +; CHECK0: @Perry = internal alias ptr (), ptr @PerryThePlatypus +; CHECK0: define hidden void @PerryThePlatypus() +; CHECK0: define amdgpu_kernel void @B +; CHECK0: define amdgpu_kernel void @C +; CHECK0-NOT: define -; CHECK1: @Perry = internal alias ptr (), ptr @PerryThePlatypus -; CHECK1: define hidden void @PerryThePlatypus() -; CHECK1: define amdgpu_kernel void @B -; CHECK1: define amdgpu_kernel void @C +; CHECK1-NOT: define +; CHECK1: define amdgpu_kernel void @A +; CHECK1-NOT: define @Perry = internal alias ptr(), ptr @PerryThePlatypus diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-cost-ranking.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-cost-ranking.ll index c7e13304dc6dec..4635264aefb39a 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-cost-ranking.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-cost-ranking.ll @@ -1,21 +1,27 @@ ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s ; 3 kernels with each their own dependencies should go into 3 ; distinct partitions. The most expensive kernel should be ; seen first and go into the last partition. +; CHECK0-NOT: define ; CHECK0: define amdgpu_kernel void @C ; CHECK0: define internal void @HelperC ; CHECK0-NOT: define +; CHECK1-NOT: define ; CHECK1: define amdgpu_kernel void @A ; CHECK1: define internal void @HelperA +; CHECK1-NOT: define +; CHECK2-NOT: define ; CHECK2: define amdgpu_kernel void @B ; CHECK2: define internal void @HelperB +; CHECK2-NOT: define + define amdgpu_kernel void @A() { call void @HelperA() diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-external.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-external.ll index 332344a776e82e..435e97a5813400 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-external.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-external.ll @@ -1,20 +1,29 @@ ; RUN: llvm-split -o %t %s -j 4 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t3 | FileCheck --check-prefix=CHECK3 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s +; RUN: llvm-dis -o - %t3 | FileCheck --check-prefix=CHECK3 %s -; CHECK0: define internal void @PrivateHelper1() -; CHECK0: define amdgpu_kernel void @D +; Both overridable helper should go in P0. -; CHECK1: define internal void @PrivateHelper0() -; CHECK1: define amdgpu_kernel void @C +; CHECK0-NOT: define +; CHECK0: define available_externally void @OverridableHelper0() +; CHECK0: define internal void @OverridableHelper1() +; CHECK0: define amdgpu_kernel void @A +; CHECK0: define amdgpu_kernel void @B +; CHECK0-NOT: define -; CHECK2: define internal void @OverridableHelper1() -; CHECK2: define amdgpu_kernel void @B +; CHECK1-NOT: define -; CHECK3: define available_externally void @OverridableHelper0() -; CHECK3: define amdgpu_kernel void @A +; CHECK2-NOT: define +; CHECK2: define internal void @PrivateHelper1() +; CHECK2: define amdgpu_kernel void @D +; CHECK2-NOT: define + +; CHECK3-NOT: define +; CHECK3: define internal void @PrivateHelper0() +; CHECK3: define amdgpu_kernel void @C +; CHECK3-NOT: define define available_externally void @OverridableHelper0() { ret void diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll index 5be945bda48bf4..2d870039112cbf 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll @@ -1,7 +1,7 @@ ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s ; We have 4 kernels: ; - Each kernel has an internal helper @@ -15,19 +15,25 @@ ; indirect call. HelperC/D should also end up in P0 as they ; are dependencies of HelperB. +; CHECK0-NOT: define +; CHECK0: define hidden void @HelperA +; CHECK0: define hidden void @HelperB +; CHECK0: define hidden void @CallCandidate +; CHECK0: define internal void @HelperC ; CHECK0: define internal void @HelperD -; CHECK0: define amdgpu_kernel void @D +; CHECK0: define amdgpu_kernel void @A +; CHECK0: define amdgpu_kernel void @B +; CHECK0-NOT: define -; CHECK1: define internal void @HelperC -; CHECK1: define amdgpu_kernel void @C +; CHECK1-NOT: define +; CHECK1: define internal void @HelperD +; CHECK1: define amdgpu_kernel void @D +; CHECK1-NOT: define -; CHECK2: define hidden void @HelperA -; CHECK2: define hidden void @HelperB -; CHECK2: define hidden void @CallCandidate +; CHECK2-NOT: define ; CHECK2: define internal void @HelperC -; CHECK2: define internal void @HelperD -; CHECK2: define amdgpu_kernel void @A -; CHECK2: define amdgpu_kernel void @B +; CHECK2: define amdgpu_kernel void @C +; CHECK2-NOT: define @addrthief = global [3 x ptr] [ptr @HelperA, ptr @HelperB, ptr @CallCandidate] diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-overridable.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-overridable.ll index 9205a5d1930e52..dc2c5c3c07bee6 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-overridable.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-overridable.ll @@ -1,15 +1,21 @@ ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s - -; CHECK0: define amdgpu_kernel void @D - -; CHECK1: define amdgpu_kernel void @C - -; CHECK2: define void @ExternalHelper -; CHECK2: define amdgpu_kernel void @A -; CHECK2: define amdgpu_kernel void @B +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s + +; CHECK0-NOT: define +; CHECK0: define void @ExternalHelper +; CHECK0: define amdgpu_kernel void @A +; CHECK0: define amdgpu_kernel void @B +; CHECK0-NOT: define + +; CHECK1-NOT: define +; CHECK1: define amdgpu_kernel void @D +; CHECK1-NOT: define + +; CHECK2-NOT: define +; CHECK2: define amdgpu_kernel void @C +; CHECK2-NOT: define define void @ExternalHelper() { ret void diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables-noexternal.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables-noexternal.ll index a184d92aea9b9f..0fc76934afc548 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables-noexternal.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables-noexternal.ll @@ -1,20 +1,26 @@ ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-globals -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s ; 3 kernels use private/internal global variables. ; The GVs should be copied in each partition as needed. +; CHECK0-NOT: define ; CHECK0: @bar = internal constant ptr ; CHECK0: define amdgpu_kernel void @C +; CHECK0-NOT: define +; CHECK1-NOT: define ; CHECK1: @foo = private constant ptr ; CHECK1: define amdgpu_kernel void @A +; CHECK1-NOT: define +; CHECK2-NOT: define ; CHECK2: @foo = private constant ptr ; CHECK2: @bar = internal constant ptr ; CHECK2: define amdgpu_kernel void @B +; CHECK2-NOT: define @foo = private constant ptr poison @bar = internal constant ptr poison diff --git a/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll b/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll index be84a0b5916f0d..7564662e7c7c0c 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/kernels-global-variables.ll @@ -1,22 +1,28 @@ ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s ; 3 kernels use private/internal global variables. ; The GVs should be copied in each partition as needed. +; CHECK0-NOT: define ; CHECK0: @foo = hidden constant ptr poison ; CHECK0: @bar = hidden constant ptr poison ; CHECK0: define amdgpu_kernel void @C +; CHECK0-NOT: define +; CHECK1-NOT: define ; CHECK1: @foo = external hidden constant ptr{{$}} ; CHECK1: @bar = external hidden constant ptr{{$}} ; CHECK1: define amdgpu_kernel void @A +; CHECK1-NOT: define +; CHECK2-NOT: define ; CHECK2: @foo = external hidden constant ptr{{$}} ; CHECK2: @bar = external hidden constant ptr{{$}} ; CHECK2: define amdgpu_kernel void @B +; CHECK2-NOT: define @foo = private constant ptr poison @bar = internal constant ptr poison diff --git a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll index 807fb2e5f33cea..459c5a7f1a2db3 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll @@ -1,12 +1,12 @@ -; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5 -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=1.2 -amdgpu-module-splitting-large-function-merge-overlap=0.5 +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s -; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0 -; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 --implicit-check-not=define %s +; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0 +; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 %s +; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 %s +; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 %s ; 2 kernels (A/B) are large and share all their dependencies. ; They should go in the same partition, the remaining kernel should @@ -15,12 +15,14 @@ ; Also check w/o large kernels processing to verify they are indeed handled ; differently. -; P0 is empty -; CHECK0: declare +; CHECK0-NOT: define +; CHECK1-NOT: define ; CHECK1: define internal void @HelperC() ; CHECK1: define amdgpu_kernel void @C +; CHECK1-NOT: define +; CHECK2-NOT: define ; CHECK2: define internal void @large2() ; CHECK2: define internal void @large1() ; CHECK2: define internal void @large0() @@ -28,9 +30,12 @@ ; CHECK2: define internal void @HelperB() ; CHECK2: define amdgpu_kernel void @A ; CHECK2: define amdgpu_kernel void @B +; CHECK2-NOT: define +; NOLARGEKERNELS-CHECK0-NOT: define ; NOLARGEKERNELS-CHECK0: define internal void @HelperC() ; NOLARGEKERNELS-CHECK0: define amdgpu_kernel void @C +; NOLARGEKERNELS-CHECK0-NOT: define ; NOLARGEKERNELS-CHECK1: define internal void @large2() ; NOLARGEKERNELS-CHECK1: define internal void @large1() @@ -44,7 +49,6 @@ ; NOLARGEKERNELS-CHECK2: define internal void @HelperA() ; NOLARGEKERNELS-CHECK2: define amdgpu_kernel void @A - define internal void @large2() { store volatile i32 42, ptr null call void @large2() diff --git a/llvm/test/tools/llvm-split/AMDGPU/non-kernels-dependency-indirect.ll b/llvm/test/tools/llvm-split/AMDGPU/non-kernels-dependency-indirect.ll index 1314a78b42f3b0..167930ce0e8063 100644 --- a/llvm/test/tools/llvm-split/AMDGPU/non-kernels-dependency-indirect.ll +++ b/llvm/test/tools/llvm-split/AMDGPU/non-kernels-dependency-indirect.ll @@ -1,7 +1,7 @@ ; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=DEFINE %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=DEFINE %s +; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=DEFINE %s ; We have 4 function: ; - Each function has an internal helper @@ -11,19 +11,19 @@ ; @CallCandidate doesn't have to be in A/B's partition, unlike ; in the corresponding tests for kernels where it has to. +; CHECK0: define hidden void @HelperA +; CHECK0: define hidden void @HelperB ; CHECK0: define internal void @HelperC ; CHECK0: define internal void @HelperD -; CHECK0: define internal void @C -; CHECK0: define internal void @D +; CHECK0: define void @A +; CHECK0: define void @B -; CHECK1: define hidden void @HelperA -; CHECK1: define hidden void @CallCandidate() -; CHECK1: define internal void @A +; CHECK1: define internal void @HelperD +; CHECK1: define void @D -; CHECK2: define hidden void @HelperB +; CHECK2: define hidden void @CallCandidate ; CHECK2: define internal void @HelperC -; CHECK2: define internal void @HelperD -; CHECK2: define internal void @B +; CHECK2: define void @C @addrthief = global [3 x ptr] [ptr @HelperA, ptr @HelperB, ptr @CallCandidate] @@ -51,22 +51,22 @@ define internal void @HelperD() { ret void } -define internal void @A(ptr %call) { +define void @A(ptr %call) { call void @HelperA(ptr %call) ret void } -define internal void @B(ptr %call) { +define void @B(ptr %call) { call void @HelperB(ptr %call) ret void } -define internal void @C() { +define void @C() { call void @HelperC() ret void } -define internal void @D() { +define void @D() { call void @HelperD() ret void } diff --git a/llvm/test/tools/llvm-split/AMDGPU/recursive-search-2.ll b/llvm/test/tools/llvm-split/AMDGPU/recursive-search-2.ll deleted file mode 100644 index 01f2f3627f9905..00000000000000 --- a/llvm/test/tools/llvm-split/AMDGPU/recursive-search-2.ll +++ /dev/null @@ -1,128 +0,0 @@ -; RUN: llvm-split -o %t_s3_ %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=2 -; RUN: llvm-dis -o - %t_s3_0 | FileCheck --check-prefix=SPLIT3-CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s3_1 | FileCheck --check-prefix=SPLIT3-CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s3_2 | FileCheck --check-prefix=SPLIT3-CHECK2 --implicit-check-not=define %s - -; RUN: llvm-split -o %t_s5_ %s -j 5 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=2 -; RUN: llvm-dis -o - %t_s5_0 | FileCheck --check-prefix=SPLIT5-CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_1 | FileCheck --check-prefix=SPLIT5-CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_2 | FileCheck --check-prefix=SPLIT5-CHECK2 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_3 | FileCheck --check-prefix=SPLIT5-CHECK3 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_4 | FileCheck --check-prefix=SPLIT5-CHECK4 --implicit-check-not=define %s - -; Test the specifics of the search algorithm. -; This test will change depending on new heuristics we add or remove. - -; -------------------------------------------- - -; SPLIT3-CHECK0: define internal void @HelperA() -; SPLIT3-CHECK0: define internal void @HelperB() -; SPLIT3-CHECK0: define internal void @HelperC() -; SPLIT3-CHECK0: define amdgpu_kernel void @AB() -; SPLIT3-CHECK0: define amdgpu_kernel void @BC() - -; SPLIT3-CHECK1: define amdgpu_kernel void @A() -; SPLIT3-CHECK1: define internal void @HelperA() -; SPLIT3-CHECK1: define amdgpu_kernel void @C() -; SPLIT3-CHECK1: define internal void @HelperC() - -; SPLIT3-CHECK2: define internal void @HelperA() -; SPLIT3-CHECK2: define amdgpu_kernel void @B() -; SPLIT3-CHECK2: define internal void @HelperB() -; SPLIT3-CHECK2: define internal void @HelperC() -; SPLIT3-CHECK2: define amdgpu_kernel void @ABC() - -; -------------------------------------------- - -; SPLIT5-CHECK0: define amdgpu_kernel void @A() -; SPLIT5-CHECK0: define internal void @HelperA() -; SPLIT5-CHECK0: define amdgpu_kernel void @B() -; SPLIT5-CHECK0: define internal void @HelperB() - -; SPLIT5-CHECK1: define internal void @HelperB() -; SPLIT5-CHECK1: define internal void @HelperC() -; SPLIT5-CHECK1: define amdgpu_kernel void @BC - -; SPLIT5-CHECK2: define internal void @HelperA() -; SPLIT5-CHECK2: define internal void @HelperB() -; SPLIT5-CHECK2: define amdgpu_kernel void @AB() - -; SPLIT5-CHECK3: define amdgpu_kernel void @C() -; SPLIT5-CHECK3: define internal void @HelperC() - -; SPLIT5-CHECK4: define internal void @HelperA() -; SPLIT5-CHECK4: define internal void @HelperB() -; SPLIT5-CHECK4: define internal void @HelperC() -; SPLIT5-CHECK4: define amdgpu_kernel void @ABC() - -define amdgpu_kernel void @A() { - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - call void @HelperA() - ret void -} - -define internal void @HelperA() { - store volatile i32 42, ptr null - store volatile i32 42, ptr null - ret void -} - -define amdgpu_kernel void @B() { - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - call void @HelperB() - ret void -} - -define internal void @HelperB() { - store volatile i32 42, ptr null - store volatile i32 42, ptr null - store volatile i32 42, ptr null - ret void -} - -define amdgpu_kernel void @C() { - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - call void @HelperC() - ret void -} - -define internal void @HelperC() { - store volatile i32 42, ptr null - ret void -} - -define amdgpu_kernel void @AB() { - store volatile i32 42, ptr null - call void @HelperA() - call void @HelperB() - ret void -} - -define amdgpu_kernel void @BC() { - store volatile i32 42, ptr null - store volatile i32 42, ptr null - call void @HelperB() - call void @HelperC() - ret void -} - -define amdgpu_kernel void @ABC() { - call void @HelperA() - call void @HelperB() - call void @HelperC() - ret void -} diff --git a/llvm/test/tools/llvm-split/AMDGPU/recursive-search-8.ll b/llvm/test/tools/llvm-split/AMDGPU/recursive-search-8.ll deleted file mode 100644 index eae57a19883106..00000000000000 --- a/llvm/test/tools/llvm-split/AMDGPU/recursive-search-8.ll +++ /dev/null @@ -1,128 +0,0 @@ -; RUN: llvm-split -o %t_s3_ %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=8 -; RUN: llvm-dis -o - %t_s3_0 | FileCheck --check-prefix=SPLIT3-CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s3_1 | FileCheck --check-prefix=SPLIT3-CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s3_2 | FileCheck --check-prefix=SPLIT3-CHECK2 --implicit-check-not=define %s - -; RUN: llvm-split -o %t_s5_ %s -j 5 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=8 -; RUN: llvm-dis -o - %t_s5_0 | FileCheck --check-prefix=SPLIT5-CHECK0 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_1 | FileCheck --check-prefix=SPLIT5-CHECK1 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_2 | FileCheck --check-prefix=SPLIT5-CHECK2 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_3 | FileCheck --check-prefix=SPLIT5-CHECK3 --implicit-check-not=define %s -; RUN: llvm-dis -o - %t_s5_4 | FileCheck --check-prefix=SPLIT5-CHECK4 --implicit-check-not=define %s - -; Test the specifics of the search algorithm. -; This test will change depending on new heuristics we add or remove. - -; -------------------------------------------- - -; SPLIT3-CHECK0: define internal void @HelperA() -; SPLIT3-CHECK0: define internal void @HelperB() -; SPLIT3-CHECK0: define internal void @HelperC() -; SPLIT3-CHECK0: define amdgpu_kernel void @AB() -; SPLIT3-CHECK0: define amdgpu_kernel void @BC() - -; SPLIT3-CHECK1: define amdgpu_kernel void @A() -; SPLIT3-CHECK1: define internal void @HelperA() -; SPLIT3-CHECK1: define amdgpu_kernel void @C() -; SPLIT3-CHECK1: define internal void @HelperC() - -; SPLIT3-CHECK2: define internal void @HelperA() -; SPLIT3-CHECK2: define amdgpu_kernel void @B() -; SPLIT3-CHECK2: define internal void @HelperB() -; SPLIT3-CHECK2: define internal void @HelperC() -; SPLIT3-CHECK2: define amdgpu_kernel void @ABC() - -; -------------------------------------------- - -; SPLIT5-CHECK0: define amdgpu_kernel void @A() -; SPLIT5-CHECK0: define internal void @HelperA() -; SPLIT5-CHECK0: define amdgpu_kernel void @B() -; SPLIT5-CHECK0: define internal void @HelperB() - -; SPLIT5-CHECK1: define internal void @HelperB() -; SPLIT5-CHECK1: define internal void @HelperC() -; SPLIT5-CHECK1: define amdgpu_kernel void @BC - -; SPLIT5-CHECK2: define internal void @HelperA() -; SPLIT5-CHECK2: define internal void @HelperB() -; SPLIT5-CHECK2: define amdgpu_kernel void @AB() - -; SPLIT5-CHECK3: define amdgpu_kernel void @C() -; SPLIT5-CHECK3: define internal void @HelperC() - -; SPLIT5-CHECK4: define internal void @HelperA() -; SPLIT5-CHECK4: define internal void @HelperB() -; SPLIT5-CHECK4: define internal void @HelperC() -; SPLIT5-CHECK4: define amdgpu_kernel void @ABC() - -define amdgpu_kernel void @A() { - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - call void @HelperA() - ret void -} - -define internal void @HelperA() { - store volatile i32 42, ptr null - store volatile i32 42, ptr null - ret void -} - -define amdgpu_kernel void @B() { - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - call void @HelperB() - ret void -} - -define internal void @HelperB() { - store volatile i32 42, ptr null - store volatile i32 42, ptr null - store volatile i32 42, ptr null - ret void -} - -define amdgpu_kernel void @C() { - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - store volatile i64 42, ptr null - call void @HelperC() - ret void -} - -define internal void @HelperC() { - store volatile i32 42, ptr null - ret void -} - -define amdgpu_kernel void @AB() { - store volatile i32 42, ptr null - call void @HelperA() - call void @HelperB() - ret void -} - -define amdgpu_kernel void @BC() { - store volatile i32 42, ptr null - store volatile i32 42, ptr null - call void @HelperB() - call void @HelperC() - ret void -} - -define amdgpu_kernel void @ABC() { - call void @HelperA() - call void @HelperB() - call void @HelperC() - ret void -} diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index 728f2ed3e62aca..364a7d63d486e1 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -835,7 +835,7 @@ int dsymutil_main(int argc, char **argv, const llvm::ToolContext &) { if (Crashed) (*Repro)->generate(); - if (!AllOK) + if (!AllOK || Crashed) return EXIT_FAILURE; if (NeedsTempFiles) { diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp index 5615a4493d20a1..5ce14d3f6b9cef 100644 --- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp +++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp @@ -1569,14 +1569,12 @@ TEST(BasicBlockDbgInfoTest, CloneTrailingRecordsToEmptyBlock) { // The trailing records should've been absorbed into NewBB. EXPECT_FALSE(BB.getTrailingDbgRecords()); EXPECT_TRUE(NewBB->getTrailingDbgRecords()); - if (NewBB->getTrailingDbgRecords()) { - EXPECT_EQ( - llvm::range_size(NewBB->getTrailingDbgRecords()->getDbgRecordRange()), - 1u); + if (DbgMarker *Trailing = NewBB->getTrailingDbgRecords()) { + EXPECT_EQ(llvm::range_size(Trailing->getDbgRecordRange()), 1u); + // Drop the trailing records now, to prevent a cleanup assertion. + Trailing->eraseFromParent(); + NewBB->deleteTrailingDbgRecords(); } - - // Drop the trailing records now, to prevent a cleanup assertion. - NewBB->deleteTrailingDbgRecords(); } } // End anonymous namespace. diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index c543846eb2686e..01fe21eb5cfa43 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -130,6 +130,161 @@ define void @foo(i32 %v0) { EXPECT_NE(FortyThree, FortyTwo); } +TEST_F(SandboxIRTest, ConstantFP) { + parseIR(C, R"IR( +define void @foo(float %v0, double %v1) { + %fadd0 = fadd float %v0, 42.0 + %fadd1 = fadd double %v1, 43.0 + ret void +} +)IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto &BB = *F.begin(); + auto It = BB.begin(); + auto *FAdd0 = cast(&*It++); + auto *FAdd1 = cast(&*It++); + auto *FortyTwo = cast(FAdd0->getOperand(1)); + [[maybe_unused]] auto *FortyThree = + cast(FAdd1->getOperand(1)); + + auto *FloatTy = sandboxir::Type::getFloatTy(Ctx); + auto *DoubleTy = sandboxir::Type::getDoubleTy(Ctx); + auto *LLVMFloatTy = Type::getFloatTy(C); + auto *LLVMDoubleTy = Type::getDoubleTy(C); + // Check that creating an identical constant gives us the same object. + auto *NewFortyTwo = sandboxir::ConstantFP::get(FloatTy, 42.0); + EXPECT_EQ(NewFortyTwo, FortyTwo); + // Check get(Type, double). + auto *FortyFour = + cast(sandboxir::ConstantFP::get(FloatTy, 44.0)); + auto *LLVMFortyFour = + cast(llvm::ConstantFP::get(LLVMFloatTy, 44.0)); + EXPECT_NE(FortyFour, FortyTwo); + EXPECT_EQ(FortyFour, Ctx.getValue(LLVMFortyFour)); + // Check get(Type, APFloat). + auto *FortyFive = cast( + sandboxir::ConstantFP::get(DoubleTy, APFloat(45.0))); + auto *LLVMFortyFive = cast( + llvm::ConstantFP::get(LLVMDoubleTy, APFloat(45.0))); + EXPECT_EQ(FortyFive, Ctx.getValue(LLVMFortyFive)); + // Check get(Type, StringRef). + auto *FortySix = sandboxir::ConstantFP::get(FloatTy, "46.0"); + EXPECT_EQ(FortySix, Ctx.getValue(llvm::ConstantFP::get(LLVMFloatTy, "46.0"))); + // Check get(APFloat). + auto *FortySeven = sandboxir::ConstantFP::get(APFloat(47.0), Ctx); + EXPECT_EQ(FortySeven, Ctx.getValue(llvm::ConstantFP::get(C, APFloat(47.0)))); + // Check getNaN(). + { + auto *NaN = sandboxir::ConstantFP::getNaN(FloatTy); + EXPECT_EQ(NaN, Ctx.getValue(llvm::ConstantFP::getNaN(LLVMFloatTy))); + } + { + auto *NaN = sandboxir::ConstantFP::getNaN(FloatTy, /*Negative=*/true); + EXPECT_EQ(NaN, Ctx.getValue(llvm::ConstantFP::getNaN(LLVMFloatTy, + /*Negative=*/true))); + } + { + auto *NaN = sandboxir::ConstantFP::getNaN(FloatTy, /*Negative=*/true, + /*Payload=*/1); + EXPECT_EQ(NaN, Ctx.getValue(llvm::ConstantFP::getNaN( + LLVMFloatTy, /*Negative=*/true, /*Payload=*/1))); + } + // Check getQNaN(). + { + auto *QNaN = sandboxir::ConstantFP::getQNaN(FloatTy); + EXPECT_EQ(QNaN, Ctx.getValue(llvm::ConstantFP::getQNaN(LLVMFloatTy))); + } + { + auto *QNaN = sandboxir::ConstantFP::getQNaN(FloatTy, /*Negative=*/true); + EXPECT_EQ(QNaN, Ctx.getValue(llvm::ConstantFP::getQNaN(LLVMFloatTy, + /*Negative=*/true))); + } + { + APInt Payload(1, 1); + auto *QNaN = + sandboxir::ConstantFP::getQNaN(FloatTy, /*Negative=*/true, &Payload); + EXPECT_EQ(QNaN, Ctx.getValue(llvm::ConstantFP::getQNaN( + LLVMFloatTy, /*Negative=*/true, &Payload))); + } + // Check getSNaN(). + { + auto *SNaN = sandboxir::ConstantFP::getSNaN(FloatTy); + EXPECT_EQ(SNaN, Ctx.getValue(llvm::ConstantFP::getSNaN(LLVMFloatTy))); + } + { + auto *SNaN = sandboxir::ConstantFP::getSNaN(FloatTy, /*Negative=*/true); + EXPECT_EQ(SNaN, Ctx.getValue(llvm::ConstantFP::getSNaN(LLVMFloatTy, + /*Negative=*/true))); + } + { + APInt Payload(1, 1); + auto *SNaN = + sandboxir::ConstantFP::getSNaN(FloatTy, /*Negative=*/true, &Payload); + EXPECT_EQ(SNaN, Ctx.getValue(llvm::ConstantFP::getSNaN( + LLVMFloatTy, /*Negative=*/true, &Payload))); + } + + // Check getZero(). + { + auto *Zero = sandboxir::ConstantFP::getZero(FloatTy); + EXPECT_EQ(Zero, Ctx.getValue(llvm::ConstantFP::getZero(LLVMFloatTy))); + } + { + auto *Zero = sandboxir::ConstantFP::getZero(FloatTy, /*Negative=*/true); + EXPECT_EQ(Zero, Ctx.getValue(llvm::ConstantFP::getZero(LLVMFloatTy, + /*Negative=*/true))); + } + + // Check getNegativeZero(). + auto *NegZero = cast( + sandboxir::ConstantFP::getNegativeZero(FloatTy)); + EXPECT_EQ(NegZero, + Ctx.getValue(llvm::ConstantFP::getNegativeZero(LLVMFloatTy))); + + // Check getInfinity(). + { + auto *Inf = sandboxir::ConstantFP::getInfinity(FloatTy); + EXPECT_EQ(Inf, Ctx.getValue(llvm::ConstantFP::getInfinity(LLVMFloatTy))); + } + { + auto *Inf = sandboxir::ConstantFP::getInfinity(FloatTy, /*Negative=*/true); + EXPECT_EQ(Inf, Ctx.getValue(llvm::ConstantFP::getInfinity( + LLVMFloatTy, /*Negative=*/true))); + } + + // Check isValueValidForType(). + APFloat V(1.1); + EXPECT_EQ(sandboxir::ConstantFP::isValueValidForType(FloatTy, V), + llvm::ConstantFP::isValueValidForType(LLVMFloatTy, V)); + // Check getValueAPF(). + EXPECT_EQ(FortyFour->getValueAPF(), LLVMFortyFour->getValueAPF()); + // Check getValue(). + EXPECT_EQ(FortyFour->getValue(), LLVMFortyFour->getValue()); + // Check isZero(). + EXPECT_EQ(FortyFour->isZero(), LLVMFortyFour->isZero()); + EXPECT_TRUE(sandboxir::ConstantFP::getZero(FloatTy)); + EXPECT_TRUE(sandboxir::ConstantFP::getZero(FloatTy, /*Negative=*/true)); + // Check isNegative(). + EXPECT_TRUE(cast( + sandboxir::ConstantFP::getZero(FloatTy, /*Negative=*/true)) + ->isNegative()); + // Check isInfinity(). + EXPECT_TRUE( + cast(sandboxir::ConstantFP::getInfinity(FloatTy)) + ->isInfinity()); + // Check isNaN(). + EXPECT_TRUE( + cast(sandboxir::ConstantFP::getNaN(FloatTy)) + ->isNaN()); + // Check isExactlyValue(APFloat). + EXPECT_TRUE(NegZero->isExactlyValue(NegZero->getValueAPF())); + // Check isExactlyValue(double). + EXPECT_TRUE(NegZero->isExactlyValue(-0.0)); +} + TEST_F(SandboxIRTest, Use) { parseIR(C, R"IR( define i32 @foo(i32 %v0, i32 %v1) { diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index c5e4ad4219c91d..9b9be69ee38448 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -1,11 +1,8 @@ -from __future__ import print_function - import argparse import bisect import collections import copy import glob -import itertools import os import re import subprocess @@ -517,12 +514,13 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False): sep="", file=sys.stderr, ) - # Python 2.7 doesn't have subprocess.DEVNULL: - with open(os.devnull, "w") as devnull: - pp = subprocess.Popen( - preprocess_cmd, shell=True, stdin=devnull, stdout=subprocess.PIPE - ) - ir_file = pp.stdout + pp = subprocess.Popen( + preprocess_cmd, + shell=True, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + ) + ir_file = pp.stdout if isinstance(cmd_args, list): args = [applySubstitutions(a, substitutions) for a in cmd_args] diff --git a/llvm/utils/git/pre-push.py b/llvm/utils/git/pre-push.py index d7ae3767d2923d..dfa009dd1a6f62 100755 --- a/llvm/utils/git/pre-push.py +++ b/llvm/utils/git/pre-push.py @@ -27,7 +27,6 @@ """ import argparse -import os import shutil import subprocess import sys @@ -70,14 +69,6 @@ def ask_confirm(prompt): return query.lower() == "y" -def get_dev_null(): - """Lazily create a /dev/null fd for use in shell()""" - global dev_null_fd - if dev_null_fd is None: - dev_null_fd = open(os.devnull, "w") - return dev_null_fd - - def shell( cmd, strip=True, @@ -95,10 +86,8 @@ def shell( cwd_msg = " in %s" % cwd log_verbose("Running%s: %s" % (cwd_msg, " ".join(quoted_cmd))) - err_pipe = subprocess.PIPE - if ignore_errors: - # Silence errors if requested. - err_pipe = get_dev_null() + # Silence errors if requested. + err_pipe = subprocess.DEVNULL if ignore_errors else subprocess.PIPE start = time.time() p = subprocess.Popen( diff --git a/llvm/utils/gn/gn.py b/llvm/utils/gn/gn.py index 290c6941bceea2..6b7919b7faeb92 100755 --- a/llvm/utils/gn/gn.py +++ b/llvm/utils/gn/gn.py @@ -42,7 +42,7 @@ def main(): if ( subprocess.call( "gn --version", - stdout=open(os.devnull, "w"), + stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT, shell=True, ) diff --git a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn index 7ff3faf63bedc9..f176d8b94b5322 100644 --- a/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/SandboxIR/BUILD.gn @@ -7,5 +7,6 @@ static_library("SandboxIR") { sources = [ "SandboxIR.cpp", "Tracker.cpp", + "Type.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn index 243a92f2e62587..aa594df8c164a1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/BUILD.gn @@ -71,6 +71,7 @@ static_library("LLVMBPFCodeGen") { "BPFISelLowering.cpp", "BPFInstrInfo.cpp", "BPFMCInstLower.cpp", + "BPFMIChecking.cpp", "BPFMIPeephole.cpp", "BPFMISimplifyPatchable.cpp", "BPFPreserveDIType.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn index 2d246eccb872ea..02ef303a6946f3 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/SandboxIR/BUILD.gn @@ -9,5 +9,6 @@ unittest("SandboxIRTests") { sources = [ "SandboxIRTest.cpp", "TrackerTest.cpp", + "TypesTest.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn index 3a660a87d8af63..47b03b42d096d2 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn @@ -64,6 +64,7 @@ unittest("SupportTests") { "MemoryBufferRefTest.cpp", "MemoryBufferTest.cpp", "MemoryTest.cpp", + "ModRefTest.cpp", "NativeFormatTests.cpp", "OptimizedStructLayoutTest.cpp", "ParallelTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn index f5b162dd102320..ad44635f107a16 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn @@ -10,6 +10,7 @@ unittest("IPOTests") { sources = [ "AttributorTest.cpp", "FunctionSpecializationTest.cpp", + "ImportIDTableTests.cpp", "LowerTypeTests.cpp", "WholeProgramDevirt.cpp", ] diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 4d48b3de7a57ed..709dd922b8fa2f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -19,6 +19,7 @@ include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Dialect/LLVMIR/BasicPtxBuilderInterface.td" +def LLVM_PointerGeneric : LLVM_PointerInAddressSpace<0>; def LLVM_PointerGlobal : LLVM_PointerInAddressSpace<1>; def LLVM_PointerShared : LLVM_PointerInAddressSpace<3>; @@ -531,8 +532,10 @@ def ProxyAlias : I32EnumAttrCase<"alias", 0, "alias">; def ProxyAsync : I32EnumAttrCase<"async", 1, "async">; def ProxyAsyncGlobal : I32EnumAttrCase<"async_global", 2, "async.global">; def ProxyAsyncShared : I32EnumAttrCase<"async_shared", 3, "async.shared">; +def ProxyTensorMap : I32EnumAttrCase<"TENSORMAP", 4, "tensormap">; +def ProxyGeneric : I32EnumAttrCase<"GENERIC", 5, "generic">; def ProxyKind : I32EnumAttr<"ProxyKind", "Proxy kind", - [ProxyAlias, ProxyAsync, ProxyAsyncGlobal, ProxyAsyncShared]> { + [ProxyAlias, ProxyAsync, ProxyAsyncGlobal, ProxyAsyncShared, ProxyTensorMap, ProxyGeneric]> { let genSpecializedAttr = 0; let cppNamespace = "::mlir::NVVM"; } @@ -565,6 +568,80 @@ def NVVM_FenceProxyOp : NVVM_PTXBuilder_Op<"fence.proxy">, let hasVerifier = 1; } +// Attrs describing the scope of the Memory Operation +def MemScopeKindCTA : I32EnumAttrCase<"CTA", 0, "cta">; +def MemScopeKindCluster : I32EnumAttrCase<"CLUSTER", 1, "cluster">; +def MemScopeKindGPU : I32EnumAttrCase<"GPU", 2, "gpu">; +def MemScopeKindSYS : I32EnumAttrCase<"SYS", 3, "sys">; + +def MemScopeKind : I32EnumAttr<"MemScopeKind", "NVVM Memory Scope kind", + [MemScopeKindCTA, MemScopeKindCluster, MemScopeKindGPU, MemScopeKindSYS]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def MemScopeKindAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; +} + +def NVVM_FenceProxyAcquireOp : NVVM_Op<"fence.proxy.acquire">, + Arguments<(ins MemScopeKindAttr:$scope, LLVM_PointerGeneric:$addr, I32:$size, + DefaultValuedAttr:$fromProxy, + DefaultValuedAttr:$toProxy)> { + let summary = "Uni-directional proxy fence operation with acquire semantics"; + let description = [{ + `fence.proxy.acquire` is a uni-directional fence used to establish ordering + between a prior memory access performed via the generic proxy and a + subsequent memory access performed via the tensormap proxy + + The address operand `addr` and the operand `size` together specify the + memory range `[addr, addr+size)` on which the ordering guarantees on the + memory accesses across the proxies is to be provided. The only supported + value for the `size` operand is 128 and must be an immediate. Generic Addressing + is used unconditionally, and the address specified by the operand `addr` must + fall within the `.global` state space. Otherwise, the behavior is undefined + [For more information, see PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "$scope $addr `,` $size (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict"; + let llvmBuilder = [{ + createIntrinsicCall( + builder, + getUnidirectionalFenceProxyID($fromProxy, $toProxy, $scope, false), + {$addr, $size}); + }]; + + let hasVerifier = 1; +} + +def NVVM_FenceProxyReleaseOp : NVVM_Op<"fence.proxy.release">, + Arguments<(ins MemScopeKindAttr:$scope, + DefaultValuedAttr:$fromProxy, + DefaultValuedAttr:$toProxy)> { + let summary = "Uni-directional proxy fence operation with release semantics"; + let description = [{ + `fence.proxy.release` is a uni-directional fence used to establish ordering + between a prior memory access performed via the generic proxy and a + subsequent memory access performed via the tensormap proxy. `fence.proxy.release` + operation can form a release sequence that synchronizes with an acquire + sequence that contains the fence.proxy.acquire proxy fence operation + [For more information, see PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-membar) + }]; + + let assemblyFormat = "$scope (`from_proxy` `=` $fromProxy^)? (`to_proxy` `=` $toProxy^)? attr-dict"; + let llvmBuilder = [{ + createIntrinsicCall(builder, getUnidirectionalFenceProxyID( + $fromProxy, $toProxy, $scope, true)); + }]; + + let hasVerifier = 1; +} + def SetMaxRegisterActionIncrease : I32EnumAttrCase<"increase", 0>; def SetMaxRegisterActionDecrease : I32EnumAttrCase<"decrease", 1>; def SetMaxRegisterAction : I32EnumAttr<"SetMaxRegisterAction", "NVVM set max register action", diff --git a/mlir/include/mlir/IR/Block.h b/mlir/include/mlir/IR/Block.h index e4fddfcb7608e6..67825eb1704bbe 100644 --- a/mlir/include/mlir/IR/Block.h +++ b/mlir/include/mlir/IR/Block.h @@ -27,8 +27,8 @@ template class ValueTypeRange; /// `Block` represents an ordered list of `Operation`s. -class Block : public IRObjectWithUseList, - public llvm::ilist_node_with_parent { +class alignas(8) Block : public IRObjectWithUseList, + public llvm::ilist_node_with_parent { public: explicit Block() = default; ~Block(); diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 5f680e8eca7559..60113bdef16a23 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -1124,17 +1124,6 @@ struct ConversionConfig { // already been modified) and iterators into past IR state cannot be // represented at the moment. RewriterBase::Listener *listener = nullptr; - - /// If set to "true", the dialect conversion attempts to build source/target/ - /// argument materializations through the type converter API in lieu of - /// builtin.unrealized_conversion_cast ops. The conversion process fails if - /// at least one materialization could not be built. - /// - /// If set to "false", the dialect conversion does not does not build any - /// custom materializations and instead inserts - /// builtin.unrealized_conversion_cast ops to ensure that the resulting IR - /// is valid. - bool buildMaterializations = true; }; //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 4d1896551101ed..2c7c3e9d535f7d 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -1004,6 +1004,10 @@ void NVVM::WgmmaMmaAsyncOp::getAsmValues( } } LogicalResult NVVM::FenceProxyOp::verify() { + if (getKind() == NVVM::ProxyKind::TENSORMAP) + return emitOpError() << "tensormap proxy is not a supported proxy kind"; + if (getKind() == NVVM::ProxyKind::GENERIC) + return emitOpError() << "generic proxy not a supported proxy kind"; if (getKind() == NVVM::ProxyKind::async_shared && !getSpace().has_value()) { return emitOpError() << "async_shared fence requires space attribute"; } @@ -1013,6 +1017,30 @@ LogicalResult NVVM::FenceProxyOp::verify() { return success(); } +LogicalResult NVVM::FenceProxyAcquireOp::verify() { + if (getFromProxy() != NVVM::ProxyKind::GENERIC) + return emitOpError("uni-directional proxies only support generic for " + "from_proxy attribute"); + + if (getToProxy() != NVVM::ProxyKind::TENSORMAP) + return emitOpError("uni-directional proxies only support tensormap " + "for to_proxy attribute"); + + return success(); +} + +LogicalResult NVVM::FenceProxyReleaseOp::verify() { + if (getFromProxy() != NVVM::ProxyKind::GENERIC) + return emitOpError("uni-directional proxies only support generic for " + "from_proxy attribute"); + + if (getToProxy() != NVVM::ProxyKind::TENSORMAP) + return emitOpError("uni-directional proxies only support tensormap " + "for to_proxy attribute"); + + return success(); +} + LogicalResult NVVM::SetMaxRegisterOp::verify() { if (getRegCount() % 8) return emitOpError("new register size must be multiple of 8"); diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 5f51d4b25ccc25..bd18bd66bb7cdb 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -1028,6 +1028,10 @@ LogicalResult tosa::TileOp::verify() { return emitOpError("expect 'multiples' array to have length ") << outputType.getRank() << " but got " << multiples.size() << "."; + if (llvm::any_of(multiples, [](int64_t v) { return v <= 0 && v != -1; })) + return emitOpError( + "expect element of 'multiples' to be positive integer or -1."); + return success(); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index a09c24dda82afc..9cc66207660f64 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -120,6 +120,41 @@ static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout, } } +static unsigned getUnidirectionalFenceProxyID(NVVM::ProxyKind fromProxy, + NVVM::ProxyKind toProxy, + NVVM::MemScopeKind scope, + bool isRelease) { + if (fromProxy == NVVM::ProxyKind::GENERIC && + toProxy == NVVM::ProxyKind::TENSORMAP) { + switch (scope) { + case NVVM::MemScopeKind::CTA: { + if (isRelease) + return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_cta; + return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_cta; + } + case NVVM::MemScopeKind::CLUSTER: { + if (isRelease) + return llvm::Intrinsic:: + nvvm_fence_proxy_tensormap_generic_release_cluster; + return llvm::Intrinsic:: + nvvm_fence_proxy_tensormap_generic_acquire_cluster; + } + case NVVM::MemScopeKind::GPU: { + if (isRelease) + return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_gpu; + return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_gpu; + } + case NVVM::MemScopeKind::SYS: { + if (isRelease) + return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_release_sys; + return llvm::Intrinsic::nvvm_fence_proxy_tensormap_generic_acquire_sys; + } + } + llvm_unreachable("Unknown scope for uni-directional fence.proxy operation"); + } + llvm_unreachable("Unsupported proxy kinds"); +} + namespace { /// Implementation of the dialect interface that converts operations belonging /// to the NVVM dialect to LLVM IR. diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index cc9c9495e5155c..b23fb97959ed67 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -702,12 +702,14 @@ class UnresolvedMaterializationRewrite : public OperationRewrite { return rewrite->getKind() == Kind::UnresolvedMaterialization; } - void rollback() override; - UnrealizedConversionCastOp getOperation() const { return cast(op); } + void rollback() override; + + void cleanup(RewriterBase &rewriter) override; + /// Return the type converter of this materialization (which may be null). const TypeConverter *getConverter() const { return converterAndKind.getPointer(); @@ -764,7 +766,7 @@ namespace detail { struct ConversionPatternRewriterImpl : public RewriterBase::Listener { explicit ConversionPatternRewriterImpl(MLIRContext *ctx, const ConversionConfig &config) - : context(ctx), eraseRewriter(ctx), config(config) {} + : context(ctx), config(config) {} //===--------------------------------------------------------------------===// // State Management @@ -832,7 +834,6 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { //===--------------------------------------------------------------------===// // Materializations //===--------------------------------------------------------------------===// - /// Build an unresolved materialization operation given an output type and set /// of input operands. Value buildUnresolvedMaterialization(MaterializationKind kind, @@ -881,7 +882,7 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// Erase the given op (unless it was already erased). void eraseOp(Operation *op) override { - if (wasErased(op)) + if (erased.contains(op)) return; op->dropAllUses(); RewriterBase::eraseOp(op); @@ -889,24 +890,17 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// Erase the given block (unless it was already erased). void eraseBlock(Block *block) override { - if (wasErased(block)) + if (erased.contains(block)) return; assert(block->empty() && "expected empty block"); block->dropAllDefinedValueUses(); RewriterBase::eraseBlock(block); } - bool wasErased(void *ptr) const { return erased.contains(ptr); } - - bool wasErased(OperationRewrite *rewrite) const { - return wasErased(rewrite->getOperation()); - } - void notifyOperationErased(Operation *op) override { erased.insert(op); } void notifyBlockErased(Block *block) override { erased.insert(block); } - private: /// Pointers to all erased operations and blocks. DenseSet erased; }; @@ -918,11 +912,6 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// MLIR context. MLIRContext *context; - /// A rewriter that keeps track of ops/block that were already erased and - /// skips duplicate op/block erasures. This rewriter is used during the - /// "cleanup" phase. - SingleEraseRewriter eraseRewriter; - // Mapping between replaced values that differ in type. This happens when // replacing a value with one of a different type. ConversionValueMapping mapping; @@ -1069,6 +1058,10 @@ void UnresolvedMaterializationRewrite::rollback() { op->erase(); } +void UnresolvedMaterializationRewrite::cleanup(RewriterBase &rewriter) { + rewriter.eraseOp(op); +} + void ConversionPatternRewriterImpl::applyRewrites() { // Commit all rewrites. IRRewriter rewriter(context, config.listener); @@ -1076,6 +1069,7 @@ void ConversionPatternRewriterImpl::applyRewrites() { rewrite->commit(rewriter); // Clean up all rewrites. + SingleEraseRewriter eraseRewriter(context); for (auto &rewrite : rewrites) rewrite->cleanup(eraseRewriter); } @@ -2359,6 +2353,12 @@ struct OperationConverter { ConversionPatternRewriterImpl &rewriterImpl, DenseMap> &inverseMapping); + /// Legalize any unresolved type materializations. + LogicalResult legalizeUnresolvedMaterializations( + ConversionPatternRewriter &rewriter, + ConversionPatternRewriterImpl &rewriterImpl, + DenseMap> &inverseMapping); + /// Legalize an operation result that was marked as "erased". LogicalResult legalizeErasedResult(Operation *op, OpResult result, @@ -2405,56 +2405,6 @@ LogicalResult OperationConverter::convert(ConversionPatternRewriter &rewriter, return success(); } -static LogicalResult -legalizeUnresolvedMaterialization(RewriterBase &rewriter, - UnresolvedMaterializationRewrite *rewrite) { - UnrealizedConversionCastOp op = rewrite->getOperation(); - assert(!op.use_empty() && - "expected that dead materializations have already been DCE'd"); - Operation::operand_range inputOperands = op.getOperands(); - Type outputType = op.getResultTypes()[0]; - - // Try to materialize the conversion. - if (const TypeConverter *converter = rewrite->getConverter()) { - rewriter.setInsertionPoint(op); - Value newMaterialization; - switch (rewrite->getMaterializationKind()) { - case MaterializationKind::Argument: - // Try to materialize an argument conversion. - newMaterialization = converter->materializeArgumentConversion( - rewriter, op->getLoc(), outputType, inputOperands); - if (newMaterialization) - break; - // If an argument materialization failed, fallback to trying a target - // materialization. - [[fallthrough]]; - case MaterializationKind::Target: - newMaterialization = converter->materializeTargetConversion( - rewriter, op->getLoc(), outputType, inputOperands); - break; - case MaterializationKind::Source: - newMaterialization = converter->materializeSourceConversion( - rewriter, op->getLoc(), outputType, inputOperands); - break; - } - if (newMaterialization) { - assert(newMaterialization.getType() == outputType && - "materialization callback produced value of incorrect type"); - rewriter.replaceOp(op, newMaterialization); - return success(); - } - } - - InFlightDiagnostic diag = op->emitError() - << "failed to legalize unresolved materialization " - "from (" - << inputOperands.getTypes() << ") to " << outputType - << " that remained live after conversion"; - diag.attachNote(op->getUsers().begin()->getLoc()) - << "see existing live user here: " << *op->getUsers().begin(); - return failure(); -} - LogicalResult OperationConverter::convertOperations(ArrayRef ops) { if (ops.empty()) return success(); @@ -2496,37 +2446,6 @@ LogicalResult OperationConverter::convertOperations(ArrayRef ops) { } else { rewriterImpl.applyRewrites(); } - - // Gather all unresolved materializations. - SmallVector allCastOps; - DenseMap rewriteMap; - for (std::unique_ptr &rewrite : rewriterImpl.rewrites) { - auto *mat = dyn_cast(rewrite.get()); - if (!mat) - continue; - if (rewriterImpl.eraseRewriter.wasErased(mat)) - continue; - allCastOps.push_back(mat->getOperation()); - rewriteMap[mat->getOperation()] = mat; - } - - // Reconcile all UnrealizedConversionCastOps that were inserted by the - // dialect conversion frameworks. (Not the one that were inserted by - // patterns.) - SmallVector remainingCastOps; - reconcileUnrealizedCasts(allCastOps, &remainingCastOps); - - // Try to legalize all unresolved materializations. - if (config.buildMaterializations) { - IRRewriter rewriter(rewriterImpl.context, config.listener); - for (UnrealizedConversionCastOp castOp : remainingCastOps) { - auto it = rewriteMap.find(castOp.getOperation()); - assert(it != rewriteMap.end() && "inconsistent state"); - if (failed(legalizeUnresolvedMaterialization(rewriter, it->second))) - return failure(); - } - } - return success(); } @@ -2540,6 +2459,9 @@ OperationConverter::finalize(ConversionPatternRewriter &rewriter) { if (failed(legalizeConvertedOpResultTypes(rewriter, rewriterImpl, inverseMapping))) return failure(); + if (failed(legalizeUnresolvedMaterializations(rewriter, rewriterImpl, + inverseMapping))) + return failure(); return success(); } @@ -2655,6 +2577,279 @@ LogicalResult OperationConverter::legalizeConvertedArgumentTypes( return success(); } +/// Replace the results of a materialization operation with the given values. +static void +replaceMaterialization(ConversionPatternRewriterImpl &rewriterImpl, + ResultRange matResults, ValueRange values, + DenseMap> &inverseMapping) { + matResults.replaceAllUsesWith(values); + + // For each of the materialization results, update the inverse mappings to + // point to the replacement values. + for (auto [matResult, newValue] : llvm::zip(matResults, values)) { + auto inverseMapIt = inverseMapping.find(matResult); + if (inverseMapIt == inverseMapping.end()) + continue; + + // Update the reverse mapping, or remove the mapping if we couldn't update + // it. Not being able to update signals that the mapping would have become + // circular (i.e. %foo -> newValue -> %foo), which may occur as values are + // propagated through temporary materializations. We simply drop the + // mapping, and let the post-conversion replacement logic handle updating + // uses. + for (Value inverseMapVal : inverseMapIt->second) + if (!rewriterImpl.mapping.tryMap(inverseMapVal, newValue)) + rewriterImpl.mapping.erase(inverseMapVal); + } +} + +/// Compute all of the unresolved materializations that will persist beyond the +/// conversion process, and require inserting a proper user materialization for. +static void computeNecessaryMaterializations( + DenseMap + &materializationOps, + ConversionPatternRewriter &rewriter, + ConversionPatternRewriterImpl &rewriterImpl, + DenseMap> &inverseMapping, + SetVector &necessaryMaterializations) { + // Helper function to check if the given value or a not yet materialized + // replacement of the given value is live. + // Note: `inverseMapping` maps from replaced values to original values. + auto isLive = [&](Value value) { + auto findFn = [&](Operation *user) { + auto matIt = materializationOps.find(user); + if (matIt != materializationOps.end()) + return !necessaryMaterializations.count(matIt->second); + return rewriterImpl.isOpIgnored(user); + }; + // A worklist is needed because a value may have gone through a chain of + // replacements and each of the replaced values may have live users. + SmallVector worklist; + worklist.push_back(value); + while (!worklist.empty()) { + Value next = worklist.pop_back_val(); + if (llvm::find_if_not(next.getUsers(), findFn) != next.user_end()) + return true; + // This value may be replacing another value that has a live user. + llvm::append_range(worklist, inverseMapping.lookup(next)); + } + return false; + }; + + llvm::unique_function lookupRemappedValue = + [&](Value invalidRoot, Value value, Type type) { + // Check to see if the input operation was remapped to a variant of the + // output. + Value remappedValue = rewriterImpl.mapping.lookupOrDefault(value, type); + if (remappedValue.getType() == type && remappedValue != invalidRoot) + return remappedValue; + + // Check to see if the input is a materialization operation that + // provides an inverse conversion. We just check blindly for + // UnrealizedConversionCastOp here, but it has no effect on correctness. + auto inputCastOp = value.getDefiningOp(); + if (inputCastOp && inputCastOp->getNumOperands() == 1) + return lookupRemappedValue(invalidRoot, inputCastOp->getOperand(0), + type); + + return Value(); + }; + + SetVector worklist; + for (auto &rewrite : rewriterImpl.rewrites) { + auto *mat = dyn_cast(rewrite.get()); + if (!mat) + continue; + materializationOps.try_emplace(mat->getOperation(), mat); + worklist.insert(mat); + } + while (!worklist.empty()) { + UnresolvedMaterializationRewrite *mat = worklist.pop_back_val(); + UnrealizedConversionCastOp op = mat->getOperation(); + + // We currently only handle target materializations here. + assert(op->getNumResults() == 1 && "unexpected materialization type"); + OpResult opResult = op->getOpResult(0); + Type outputType = opResult.getType(); + Operation::operand_range inputOperands = op.getOperands(); + + // Try to forward propagate operands for user conversion casts that result + // in the input types of the current cast. + for (Operation *user : llvm::make_early_inc_range(opResult.getUsers())) { + auto castOp = dyn_cast(user); + if (!castOp) + continue; + if (castOp->getResultTypes() == inputOperands.getTypes()) { + replaceMaterialization(rewriterImpl, user->getResults(), inputOperands, + inverseMapping); + necessaryMaterializations.remove(materializationOps.lookup(user)); + } + } + + // Try to avoid materializing a resolved materialization if possible. + // Handle the case of a 1-1 materialization. + if (inputOperands.size() == 1) { + // Check to see if the input operation was remapped to a variant of the + // output. + Value remappedValue = + lookupRemappedValue(opResult, inputOperands[0], outputType); + if (remappedValue && remappedValue != opResult) { + replaceMaterialization(rewriterImpl, opResult, remappedValue, + inverseMapping); + necessaryMaterializations.remove(mat); + continue; + } + } else { + // TODO: Avoid materializing other types of conversions here. + } + + // If the materialization does not have any live users, we don't need to + // generate a user materialization for it. + bool isMaterializationLive = isLive(opResult); + if (!isMaterializationLive) + continue; + if (!necessaryMaterializations.insert(mat)) + continue; + + // Reprocess input materializations to see if they have an updated status. + for (Value input : inputOperands) { + if (auto parentOp = input.getDefiningOp()) { + if (auto *mat = materializationOps.lookup(parentOp)) + worklist.insert(mat); + } + } + } +} + +/// Legalize the given unresolved materialization. Returns success if the +/// materialization was legalized, failure otherise. +static LogicalResult legalizeUnresolvedMaterialization( + UnresolvedMaterializationRewrite &mat, + DenseMap + &materializationOps, + ConversionPatternRewriter &rewriter, + ConversionPatternRewriterImpl &rewriterImpl, + DenseMap> &inverseMapping) { + auto findLiveUser = [&](auto &&users) { + auto liveUserIt = llvm::find_if_not( + users, [&](Operation *user) { return rewriterImpl.isOpIgnored(user); }); + return liveUserIt == users.end() ? nullptr : *liveUserIt; + }; + + llvm::unique_function lookupRemappedValue = + [&](Value value, Type type) { + // Check to see if the input operation was remapped to a variant of the + // output. + Value remappedValue = rewriterImpl.mapping.lookupOrDefault(value, type); + if (remappedValue.getType() == type) + return remappedValue; + return Value(); + }; + + UnrealizedConversionCastOp op = mat.getOperation(); + if (!rewriterImpl.ignoredOps.insert(op)) + return success(); + + // We currently only handle target materializations here. + OpResult opResult = op->getOpResult(0); + Operation::operand_range inputOperands = op.getOperands(); + Type outputType = opResult.getType(); + + // If any input to this materialization is another materialization, resolve + // the input first. + for (Value value : op->getOperands()) { + auto valueCast = value.getDefiningOp(); + if (!valueCast) + continue; + + auto matIt = materializationOps.find(valueCast); + if (matIt != materializationOps.end()) + if (failed(legalizeUnresolvedMaterialization( + *matIt->second, materializationOps, rewriter, rewriterImpl, + inverseMapping))) + return failure(); + } + + // Perform a last ditch attempt to avoid materializing a resolved + // materialization if possible. + // Handle the case of a 1-1 materialization. + if (inputOperands.size() == 1) { + // Check to see if the input operation was remapped to a variant of the + // output. + Value remappedValue = lookupRemappedValue(inputOperands[0], outputType); + if (remappedValue && remappedValue != opResult) { + replaceMaterialization(rewriterImpl, opResult, remappedValue, + inverseMapping); + return success(); + } + } else { + // TODO: Avoid materializing other types of conversions here. + } + + // Try to materialize the conversion. + if (const TypeConverter *converter = mat.getConverter()) { + rewriter.setInsertionPoint(op); + Value newMaterialization; + switch (mat.getMaterializationKind()) { + case MaterializationKind::Argument: + // Try to materialize an argument conversion. + newMaterialization = converter->materializeArgumentConversion( + rewriter, op->getLoc(), outputType, inputOperands); + if (newMaterialization) + break; + // If an argument materialization failed, fallback to trying a target + // materialization. + [[fallthrough]]; + case MaterializationKind::Target: + newMaterialization = converter->materializeTargetConversion( + rewriter, op->getLoc(), outputType, inputOperands); + break; + case MaterializationKind::Source: + newMaterialization = converter->materializeSourceConversion( + rewriter, op->getLoc(), outputType, inputOperands); + break; + } + if (newMaterialization) { + assert(newMaterialization.getType() == outputType && + "materialization callback produced value of incorrect type"); + replaceMaterialization(rewriterImpl, opResult, newMaterialization, + inverseMapping); + return success(); + } + } + + InFlightDiagnostic diag = op->emitError() + << "failed to legalize unresolved materialization " + "from (" + << inputOperands.getTypes() << ") to " << outputType + << " that remained live after conversion"; + if (Operation *liveUser = findLiveUser(op->getUsers())) { + diag.attachNote(liveUser->getLoc()) + << "see existing live user here: " << *liveUser; + } + return failure(); +} + +LogicalResult OperationConverter::legalizeUnresolvedMaterializations( + ConversionPatternRewriter &rewriter, + ConversionPatternRewriterImpl &rewriterImpl, + DenseMap> &inverseMapping) { + // As an initial step, compute all of the inserted materializations that we + // expect to persist beyond the conversion process. + DenseMap materializationOps; + SetVector necessaryMaterializations; + computeNecessaryMaterializations(materializationOps, rewriter, rewriterImpl, + inverseMapping, necessaryMaterializations); + + // Once computed, legalize any necessary materializations. + for (auto *mat : necessaryMaterializations) { + if (failed(legalizeUnresolvedMaterialization( + *mat, materializationOps, rewriter, rewriterImpl, inverseMapping))) + return failure(); + } + return success(); +} + LogicalResult OperationConverter::legalizeErasedResult( Operation *op, OpResult result, ConversionPatternRewriterImpl &rewriterImpl) { diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 75362378daaaaa..156a8a468d5b42 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -1286,6 +1286,7 @@ func.func @warpgroup_matrix_multiply_m128n128k64( // CHECK-DAG: %[[S0:.+]] = builtin.unrealized_conversion_cast %[[arg0]] : !nvgpu.warpgroup.descriptor> to i64 // CHECK-DAG: %[[S1:.+]] = builtin.unrealized_conversion_cast %[[arg1]] : !nvgpu.warpgroup.descriptor> to i64 +// CHECK-DAG: %[[S2:.+]] = builtin.unrealized_conversion_cast %[[arg2]] : memref<128x128xf32, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[S3:.+]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 // CHECK: %[[S4:.+]] = llvm.mlir.undef : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: %[[S5:.+]] = llvm.extractvalue %[[S4]][0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> @@ -1298,8 +1299,8 @@ func.func @warpgroup_matrix_multiply_m128n128k64( // CHECK: %[[S136:.+]] = llvm.insertvalue %[[S134]], %[[S135]][1] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> // CHECK: nvvm.wgmma.fence.aligned // CHECK: %[[S137:.+]] = llvm.mlir.undef : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> -// CHECK: %[[S138:.+]] = llvm.extractvalue %{{.*}}[0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> -// CHECK: %[[S139:.+]] = nvvm.wgmma.mma_async %[[S0]], %[[S1]], %[[S138]], , D[, , ], A[, , ], B[, , ] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> +// CHECK: %[[S138:.+]] = llvm.extractvalue %136[0] : !llvm.struct<(struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>, struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)>)> +// CHECK: %[[S139:.+]] = nvvm.wgmma.mma_async %[[S0]], %1, %[[S138]], , D[, , ], A[, , ], B[, , ] : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32)> // CHECK: nvvm.wgmma.mma_async // CHECK: nvvm.wgmma.mma_async // CHECK: %[[S154:.+]] = nvvm.wgmma.mma_async diff --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir index ab18ce05e355d3..a192434c5accf8 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir @@ -80,7 +80,6 @@ func.func @no_layout_to_dyn_layout_cast(%m: memref) -> memref // expected-error @+1 {{failed to legalize unresolved materialization from ('memref') to 'memref>' that remained live after conversion}} %1 = bufferization.to_memref %0 : memref> - // expected-note @below{{see existing live user here}} return %1 : memref> } diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 54d299a3efcecd..8b42e952bb1b7c 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -538,6 +538,24 @@ func.func @test_tile_invalid_multiples() { // ----- +func.func @test_tile_invalid_multiples_value() { + %0 = tensor.empty() : tensor<4x31xf32> + // expected-error@+1 {{'tosa.tile' op expect element of 'multiples' to be positive integer or -1.}} + %1 = tosa.tile %0 {multiples = array} : (tensor<4x31xf32>) -> tensor<4x31xf32> + return +} + +// ----- + +func.func @test_tile_io_rank_mismatch() { + %0 = tensor.empty() : tensor<4x31xf32> + // expected-error@+1 {{'tosa.tile' op expect same input and output tensor rank.}} + %1 = tosa.tile %0 {multiples = array} : (tensor<4x31xf32>) -> tensor<4x31x31xf32> + return +} + +// ----- + // CHECK-LABEL: @test_invalid_constant_permutation func.func @test_invalid_constant_permutation() { // expected-error@+3 {{permutation must be within input bounds}} diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/outerproduct-f32.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/outerproduct-f32.mlir index cc05940fb6d02c..0ee016627440f7 100644 --- a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/outerproduct-f32.mlir +++ b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/outerproduct-f32.mlir @@ -22,7 +22,7 @@ func.func @test_outerproduct_no_accumulator_4x4xf32() { %c0 = arith.constant 0 : index - %vector_i32 = llvm.intr.experimental.stepvector : vector<[4]xi32> + %vector_i32 = llvm.intr.stepvector : vector<[4]xi32> %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> %tile = vector.outerproduct %vector, %vector : vector<[4]xf32>, vector<[4]xf32> @@ -47,7 +47,7 @@ func.func @test_outerproduct_with_accumulator_4x4xf32() { %f10 = arith.constant 10.0 : f32 %acc = vector.splat %f10 : vector<[4]x[4]xf32> - %vector_i32 = llvm.intr.experimental.stepvector : vector<[4]xi32> + %vector_i32 = llvm.intr.stepvector : vector<[4]xi32> %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> %tile = vector.outerproduct %vector, %vector, %acc : vector<[4]xf32>, vector<[4]xf32> @@ -71,7 +71,7 @@ func.func @test_masked_outerproduct_no_accumulator_4x4xf32() { %c0 = arith.constant 0 : index %ones = arith.constant dense<1> : vector<[4]xi32> - %step_vector = llvm.intr.experimental.stepvector : vector<[4]xi32> + %step_vector = llvm.intr.stepvector : vector<[4]xi32> %vector_i32 = arith.addi %step_vector, %ones : vector<[4]xi32> %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> @@ -104,7 +104,7 @@ func.func @test_masked_outerproduct_with_accumulator_4x4xf32() { %f10 = arith.constant 10.0 : f32 %acc = vector.splat %f10 : vector<[4]x[4]xf32> - %step_vector = llvm.intr.experimental.stepvector : vector<[4]xi32> + %step_vector = llvm.intr.stepvector : vector<[4]xi32> %vector_i32 = arith.addi %step_vector, %ones : vector<[4]xi32> %vector = arith.sitofp %vector_i32 : vector<[4]xi32> to vector<[4]xf32> diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir new file mode 100644 index 00000000000000..0e563808da970b --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -0,0 +1,33 @@ +// RUN: mlir-translate -verify-diagnostics -split-input-file -mlir-to-llvmir %s + +// ----- + +llvm.func @nvvm_fence_proxy_acquire(%addr : !llvm.ptr, %size : i32) { + // expected-error @below {{'nvvm.fence.proxy.acquire' op uni-directional proxies only support generic for from_proxy attribute}} + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %size from_proxy=#nvvm.proxy_kind to_proxy=#nvvm.proxy_kind + llvm.return +} + +// ----- + +llvm.func @nvvm_fence_proxy_release() { + // expected-error @below {{'nvvm.fence.proxy.release' op uni-directional proxies only support generic for from_proxy attribute}} + nvvm.fence.proxy.release #nvvm.mem_scope from_proxy=#nvvm.proxy_kind to_proxy=#nvvm.proxy_kind + llvm.return +} + +// ----- + +llvm.func @nvvm_fence_proxy_acquire(%addr : !llvm.ptr, %size : i32) { + // expected-error @below {{'nvvm.fence.proxy.acquire' op uni-directional proxies only support tensormap for to_proxy attribute}} + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %size from_proxy=#nvvm.proxy_kind to_proxy=#nvvm.proxy_kind + llvm.return +} + +// ----- + +llvm.func @nvvm_fence_proxy_release() { + // expected-error @below {{'nvvm.fence.proxy.release' op uni-directional proxies only support tensormap for to_proxy attribute}} + nvvm.fence.proxy.release #nvvm.mem_scope from_proxy=#nvvm.proxy_kind to_proxy=#nvvm.proxy_kind + llvm.return +} \ No newline at end of file diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index a8ae4d97888c90..6e2787d121ae64 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -574,3 +574,40 @@ llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}) llvm.func @kernel_func(%arg0: !llvm.ptr {llvm.byval = i32, nvvm.grid_constant}, %arg1: f32, %arg2: !llvm.ptr {llvm.byval = f32, nvvm.grid_constant}) attributes {nvvm.kernel} { llvm.return } + + +// ----- +// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_release +llvm.func @nvvm_fence_proxy_tensormap_generic_release() { + %c128 = llvm.mlir.constant(128) : i32 + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cta() + nvvm.fence.proxy.release #nvvm.mem_scope + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.cluster() + nvvm.fence.proxy.release #nvvm.mem_scope + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.gpu() + nvvm.fence.proxy.release #nvvm.mem_scope + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.release.sys() + nvvm.fence.proxy.release #nvvm.mem_scope + llvm.return +} + +// ----- +// CHECK-LABEL: @nvvm_fence_proxy_tensormap_generic_acquire +llvm.func @nvvm_fence_proxy_tensormap_generic_acquire(%addr : !llvm.ptr) { + %c128 = llvm.mlir.constant(128) : i32 + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cta(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.cluster(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.gpu(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + + // CHECK: call void @llvm.nvvm.fence.proxy.tensormap_generic.acquire.sys(ptr {{%[0-9]+}}, i32 128) + nvvm.fence.proxy.acquire #nvvm.mem_scope %addr, %c128 + llvm.return +} \ No newline at end of file diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index f130adff42f8cd..cf2c9f6a8ec441 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -4,7 +4,6 @@ func.func @test_invalid_arg_materialization( // expected-error@below {{failed to legalize unresolved materialization from () to 'i16' that remained live after conversion}} %arg0: i16) { - // expected-note@below{{see existing live user here}} "foo.return"(%arg0) : (i16) -> () } @@ -23,7 +22,6 @@ func.func @test_valid_arg_materialization(%arg0: i64) { func.func @test_invalid_result_materialization() { // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} %result = "test.type_producer"() : () -> f16 - // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () } @@ -32,7 +30,6 @@ func.func @test_invalid_result_materialization() { func.func @test_invalid_result_materialization() { // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} %result = "test.type_producer"() : () -> f16 - // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () } @@ -52,7 +49,6 @@ func.func @test_transitive_use_materialization() { func.func @test_transitive_use_invalid_materialization() { // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} %result = "test.another_type_producer"() : () -> f16 - // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () } @@ -103,9 +99,9 @@ func.func @test_block_argument_not_converted() { func.func @test_signature_conversion_no_converter() { "test.signature_conversion_no_converter"() ({ // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}} + // expected-note@below {{see existing live user here}} ^bb0(%arg0: f32): "test.type_consumer"(%arg0) : (f32) -> () - // expected-note@below{{see existing live user here}} "test.return"(%arg0) : (f32) -> () }) : () -> () return diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py index 98d0ddd9a2be11..f0d4f35ba3e229 100644 --- a/mlir/test/lit.cfg.py +++ b/mlir/test/lit.cfg.py @@ -18,12 +18,15 @@ # name: The name of this test suite. config.name = "MLIR" +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# # We prefer the lit internal shell which provides a better user experience on failures # unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 env var. use_lit_shell = True lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") if lit_shell_env: - use_lit_shell = not lit.util.pythonize_bool(lit_shell_env) + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) diff --git a/polly/test/UnitIsl/lit.cfg b/polly/test/UnitIsl/lit.cfg index 0944d543572d86..4b68f1460c3d83 100644 --- a/polly/test/UnitIsl/lit.cfg +++ b/polly/test/UnitIsl/lit.cfg @@ -17,12 +17,15 @@ config.name = 'Polly - isl unit tests' # For now we require '&&' between commands, until they get globally killed and # the test runner updated. # +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# # We prefer the lit internal shell which provides a better user experience on failures # unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 env var. use_lit_shell = True lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") if lit_shell_env: - use_lit_shell = not lit.util.pythonize_bool(lit_shell_env) + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) diff --git a/polly/test/lit.cfg b/polly/test/lit.cfg index 156c1f97f5d3ae..075ebdacbdc946 100644 --- a/polly/test/lit.cfg +++ b/polly/test/lit.cfg @@ -20,12 +20,15 @@ config.name = 'Polly' # For now we require '&&' between commands, until they get globally killed and # the test runner updated. # +# TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. +# See https://github.com/llvm/llvm-project/issues/106636 for more details. +# # We prefer the lit internal shell which provides a better user experience on failures # unless the user explicitly disables it with LIT_USE_INTERNAL_SHELL=0 env var. use_lit_shell = True lit_shell_env = os.environ.get("LIT_USE_INTERNAL_SHELL") if lit_shell_env: - use_lit_shell = not lit.util.pythonize_bool(lit_shell_env) + use_lit_shell = lit.util.pythonize_bool(lit_shell_env) config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 1bf6cdbb447a4c..b2dcc696b0ad06 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4160,6 +4160,7 @@ cc_library( ":Option", ":Support", ":WindowsManifest", + ":config", ], ) @@ -4601,6 +4602,7 @@ cc_binary( ":Target", ":TargetParser", ":TransformUtils", + ":config", ], )