0053-RISCV-WIP-Codegen-support-for-RV32F-fused-multiply-a.patch

From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Subject: [RISCV][WIP] Codegen support for RV32F fused multiply-add operations

---
 lib/Target/RISCV/RISCVInstrInfoF.td | 10 +++++++
 test/CodeGen/RISCV/float-fma.ll     | 54 +++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 test/CodeGen/RISCV/float-fma.ll

diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td
index fa64775af00..8c2a8ace895 100644
--- a/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -239,6 +239,16 @@ def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
 def : PatFpr32Fpr32<fminnum, FMIN_S>;
 def : PatFpr32Fpr32<fmaxnum, FMAX_S>;
 
+/// Fused multiply-add operations
+
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
+          (FMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+// fmsub: rs1*rs2-rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
+          (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// TODO: other FMA patterns
+
 /// Setcc
 
 def : PatFpr32Fpr32<seteq, FEQ_S>;
diff --git a/test/CodeGen/RISCV/float-fma.ll b/test/CodeGen/RISCV/float-fma.ll
new file mode 100644
index 00000000000..ca6baba1530
--- /dev/null
+++ b/test/CodeGen/RISCV/float-fma.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32IF %s
+
+declare float @llvm.fma.f32(float, float, float)
+
+define float @fmadd_s_fma_intrinsic(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fmadd_s_fma_intrinsic:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    fmv.w.x ft1, a1
+; RV32IF-NEXT:    fmv.w.x ft2, a0
+; RV32IF-NEXT:    fmadd.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    jalr zero, ra, 0
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %1
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+define float @fmadd_s_fmuladd_intrinsic(float %a, float %b, float %c) nounwind {
+; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd
+; RV32IF-LABEL: fmadd_s_fmuladd_intrinsic:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    fmv.w.x ft0, a1
+; RV32IF-NEXT:    fmv.w.x ft1, a0
+; RV32IF-NEXT:    fmul.s ft0, ft1, ft0
+; RV32IF-NEXT:    fmv.w.x ft1, a2
+; RV32IF-NEXT:    fadd.s ft0, ft0, ft1
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    jalr zero, ra, 0
+  %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+  ret float %1
+}
+
+define float @fmsub_s_fma_intrinsic(float %a, float %b, float %c) nounwind {
+; TODO: the DAG combiner converts the fneg of a bitcasted value to a xor,
+;       meaning the fmsub pattern fails
+; RV32IF-LABEL: fmsub_s_fma_intrinsic:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    lui a3, 524288
+; RV32IF-NEXT:    addi a3, a3, 0
+; RV32IF-NEXT:    xor a2, a2, a3
+; RV32IF-NEXT:    fmv.w.x ft0, a2
+; RV32IF-NEXT:    fmv.w.x ft1, a1
+; RV32IF-NEXT:    fmv.w.x ft2, a0
+; RV32IF-NEXT:    fmadd.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
+; RV32IF-NEXT:    jalr zero, ra, 0
+  %1 = fsub float -0.00, %c
+  %2 = call float @llvm.fma.f32(float %a, float %b, float %1)
+  ret float %2
+}
-- 
2.16.2