-
Notifications
You must be signed in to change notification settings - Fork 53
/
0053-RISCV-WIP-Codegen-support-for-RV32F-fused-multiply-a.patch
94 lines (89 loc) · 3.33 KB
/
0053-RISCV-WIP-Codegen-support-for-RV32F-fused-multiply-a.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <[email protected]>
Subject: [RISCV][WIP] Codegen support for RV32F fused multiply-add operations
---
lib/Target/RISCV/RISCVInstrInfoF.td | 10 +++++++
test/CodeGen/RISCV/float-fma.ll | 54 +++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 test/CodeGen/RISCV/float-fma.ll
diff --git a/lib/Target/RISCV/RISCVInstrInfoF.td b/lib/Target/RISCV/RISCVInstrInfoF.td
index fa64775af00..8c2a8ace895 100644
--- a/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -239,6 +239,16 @@ def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
def : PatFpr32Fpr32<fminnum, FMIN_S>;
def : PatFpr32Fpr32<fmaxnum, FMAX_S>;
+/// Fused multiply-add operations
+
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
+ (FMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+// fmsub: rs1*rs2-rs3
+def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
+ (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+
+// TODO: other FMA patterns
+
/// Setcc
def : PatFpr32Fpr32<seteq, FEQ_S>;
diff --git a/test/CodeGen/RISCV/float-fma.ll b/test/CodeGen/RISCV/float-fma.ll
new file mode 100644
index 00000000000..ca6baba1530
--- /dev/null
+++ b/test/CodeGen/RISCV/float-fma.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32IF %s
+
+declare float @llvm.fma.f32(float, float, float)
+
+define float @fmadd_s_fma_intrinsic(float %a, float %b, float %c) nounwind {
+; RV32IF-LABEL: fmadd_s_fma_intrinsic:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmv.w.x ft0, a2
+; RV32IF-NEXT: fmv.w.x ft1, a1
+; RV32IF-NEXT: fmv.w.x ft2, a0
+; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT: fmv.x.w a0, ft0
+; RV32IF-NEXT: jalr zero, ra, 0
+ %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+ ret float %1
+}
+
+declare float @llvm.fmuladd.f32(float, float, float)
+
+define float @fmadd_s_fmuladd_intrinsic(float %a, float %b, float %c) nounwind {
+; Use of fmadd depends on TargetLowering::isFMAFasterthanFMulAndFAdd
+; RV32IF-LABEL: fmadd_s_fmuladd_intrinsic:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: fmv.w.x ft0, a1
+; RV32IF-NEXT: fmv.w.x ft1, a0
+; RV32IF-NEXT: fmul.s ft0, ft1, ft0
+; RV32IF-NEXT: fmv.w.x ft1, a2
+; RV32IF-NEXT: fadd.s ft0, ft0, ft1
+; RV32IF-NEXT: fmv.x.w a0, ft0
+; RV32IF-NEXT: jalr zero, ra, 0
+ %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+ ret float %1
+}
+
+define float @fmsub_s_fma_intrinsic(float %a, float %b, float %c) nounwind {
+; TODO: the DAG combiner converts the fneg of a bitcasted value to a xor,
+; meaning the fmsub pattern fails
+; RV32IF-LABEL: fmsub_s_fma_intrinsic:
+; RV32IF: # %bb.0:
+; RV32IF-NEXT: lui a3, 524288
+; RV32IF-NEXT: addi a3, a3, 0
+; RV32IF-NEXT: xor a2, a2, a3
+; RV32IF-NEXT: fmv.w.x ft0, a2
+; RV32IF-NEXT: fmv.w.x ft1, a1
+; RV32IF-NEXT: fmv.w.x ft2, a0
+; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0
+; RV32IF-NEXT: fmv.x.w a0, ft0
+; RV32IF-NEXT: jalr zero, ra, 0
+ %1 = fsub float -0.00, %c
+ %2 = call float @llvm.fma.f32(float %a, float %b, float %1)
+ ret float %2
+}
--
2.16.2