forked from testaco/DCP6
-
Notifications
You must be signed in to change notification settings - Fork 1
/
cordic16p8.v
152 lines (152 loc) · 7.09 KB
/
cordic16p8.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
//
// cordic16p8.v - 16-bit Input/8-bit Output Pipelined CORDIC Processing Engine
//
// (C) Copyright 2010 John B. Stephensen
//
// This Verilog source file and all its derivatives are licensed only for
// personal non-profit educational use in the Amateur Radio Service and
// the license is not transferrable. The information is provided as-is for
// experimental purposes and the author does not warranty its freedom
// from defects or its suitability for any specific application.
//
// This module implements the CORDIC algorithm with 24-bit internal resolution. The X
// and Y inputs are rotated into the first and fourth quadrants before microrotations.
// Side effects during rotation multiply the accumulated X and Y values by a factor of
// 1.64676 which is corrected with multipliers on the X and Y outputs. The accumulated
// Z value is also corrected by a factor of +/-Pi radians using a 2-bit adder on the
// output.
//
// The CORDIC engine is operated in vector mode for polar to rectangular conversion.
// Vector mode rotates X and Y until Y is zero. 8 rotations are used for 7-bit accuracy.
// Accuracy will decrease when both the X and Y inputs are less than 1/64 of full scale.
// Rotation and correction takes 10 clock cycles.
//
// Truncate angle: 409 LUTs, 357 registers and 1 DSP48A1 are used. 199 MHz maximum clock rate.
// Round angle: 418 LUTs, 358 registers and 1 DSP48A1 are used. 199 MHz maximum clock rate.
//
// History:
// 12-27-10 created from cordic16 (serial)
//
module cordic16p8(
input [15:0] xi,yi, // 16-bit X and Y inputs
input iv, // load input data
output [7:0] xo,zo, // 8-bit X and Z outputs
output ov, // output data ready
input clk // master clock
);
// internal signals
wire cmpx,cmpy,swap; // coarse rotation controls
wire [15:0] xx,yy; // input coarse rotation complementors
reg [15:0] x0,y0; // coarse rotation output
reg [8:0] xs,ys; // sign bits
wire [15:0] x1,x2,x3,x4,x5,x6,x7,x8;// adder/subtractor I/O
wire [15:0] y1,y2,y3,y4,y5,y6,y7,y8;
wire [11:0] z1,z2,z3,z4,z5,z6,z7,z8;
wire s0,s1,s2,s3,s4,s5,s6,s7; // 0 = add, 1 = subtract
reg [10:1] v; // data valid delay
wire [35:0] xc,yc; // corrected X and Y outputs
reg [7:0] zc; // corrected phase output
// vector mode: exchange and invert X and Y inputs to move inputs into 1st and 4th quadrants
// Q X Y X0 Y0 Cor.
// 1 + + X Y 0
// 2 - + Y -X +90
// 3 - - -Y X -90
// 4 + - X Y 0
// true/complement logic inverts inputs when necessary
assign cmpx = xi[15] & ~yi[15];
assign cmpy = xi[15] & yi[15];
tc16a tcix (
.D(xi),
.CMP(cmpx), // -90
.Y(xx)
);
tc16a tciy (
.D(yi),
.CMP(cmpy), // +90
.Y(yy)
);
// save mode and sign bits for output correction
// swap X and Y axis when necessary
// Z input is zero
assign swap = xi[15];
always @ (posedge clk)
begin
xs <= {xs[7:0],xi[15]}; // save signs
ys <= {ys[7:0],yi[15]};
x0 <= swap ? yy : xx; // 0 or +/-90 rotation
y0 <= swap ? xx : yy;
end
// vector mode drives Y towards zero by watching sign bit.
// inp add/sub mode
// Y X Y Z
// + ADD SUB ADD
// - SUB ADD SUB
// X = X +/- Y/2^1
// Y = Y +/- X/2^1
// Z = Z +/- atan(1)
addsub16s accx1 (.A(x0), .B(y0), .Q(x1), .SUB(y0[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy1 (.A(y0), .B(x0), .Q(y1), .SUB(~y0[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz1 (.A(12'h000), .B(12'h200), .Q(z1), .SUB(y0[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx2 (.A(x1), .B({y1[15],y1[15:1]}), .Q(x2), .SUB(y1[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy2 (.A(y1), .B({x1[15],x1[15:1]}), .Q(y2), .SUB(~y1[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz2 (.A(z1), .B(12'h12E), .Q(z2), .SUB(y1[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx3 (.A(x2), .B({y2[15],y2[15],y2[15:2]}), .Q(x3), .SUB(y2[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy3 (.A(y2), .B({x2[15],x2[15],x2[15:2]}), .Q(y3), .SUB(~y2[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz3 (.A(z2), .B(12'h0A0), .Q(z3), .SUB(y2[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx4 (.A(x3), .B({y3[15],y3[15],y3[15],y3[15:3]}), .Q(x4), .SUB(y3[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy4 (.A(y3), .B({x3[15],x3[15],x3[15],x3[15:3]}), .Q(y4), .SUB(~y3[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz4 (.A(z3), .B(12'h051), .Q(z4), .SUB(y3[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx5 (.A(x4), .B({y4[15],y4[15],y4[15],y4[15],y4[15:4]}), .Q(x5), .SUB(y4[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy5 (.A(y4), .B({x4[15],x4[15],x4[15],x4[15],x4[15:4]}), .Q(y5), .SUB(~y4[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz5 (.A(z4), .B(12'h029), .Q(z5), .SUB(y4[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx6 (.A(x5), .B({y5[15],y5[15],y5[15],y5[15],y5[15],y5[15:5]}), .Q(x6), .SUB(y5[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy6 (.A(y5), .B({x5[15],x5[15],x5[15],x5[15],x5[15],x5[15:5]}), .Q(y6), .SUB(~y5[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz6 (.A(z5), .B(12'h014), .Q(z6), .SUB(y5[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx7 (.A(x6), .B({y6[15],y6[15],y6[15],y6[15],y6[15],y6[15],y6[15:6]}), .Q(x7), .SUB(y6[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accy7 (.A(y6), .B({x6[15],x6[15],x6[15],x6[15],x6[15],x6[15],x6[15:6]}), .Q(y7), .SUB(~y6[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz7 (.A(z6), .B(12'h00A), .Q(z7), .SUB(y6[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub16s accx8 (.A(x7), .B({y7[15],y7[15],y7[15],y7[15],y7[15],y7[15],y7[15],y7[15:7]}), .Q(x8), .SUB(y7[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
//addsub16s accy8 (.A(y7), .B({x7[15],x7[15],x7[15],x7[15],x7[15],x7[15],x7[15],x7[15:7]}), .Q(y8), .SUB(~y7[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
addsub12s accz8 (.A(z7), .B(12'h005), .Q(z8), .SUB(y7[15]), .CE(1'b1), .CLK(clk), .RST(1'b0));
// correct magnitude growth in CORDIC algorithm
// multiply by 0.6072530 = 79593.86/131072 = 136E9.D/40000
MULT18X18SIO #(
.AREG(0), // 1 clock delay
.BREG(0),
.PREG(1)
) xadj (
.A({x8,2'b00}),// 18-bit signed input
.B(18'h136E7), // must be less than 1
.BCIN(),
.BCOUT(),
.P(xc), // 36-bit output
.CEA(1'b1),
.CEB(1'b1),
.CEP(1'b1),
.CLK(clk),
.RSTA(1'b0),
.RSTB(1'b0),
.RSTP(1'b0)
);
// trim X output to 8 bits and correct phase output with +/-Pi/2 adjustment
// Q X Y X0 Y0 Cor.
// 1 + + X Y 00
// 2 - + Y -X 01 (+90)
// 3 - - -Y X 11 (-90)
// 4 + - X Y 00
// then trim to 8 bits and round up
always @ (posedge clk)
begin
zc <= {(z8[11:10] + {(xs[8]&ys[8]),xs[8]}),z8[9:4]} + z8[3];
v <= {v[9:1],iv};
end
// connect outputs - 8-bit X-axis/magnitude and phase angle outputs
// Corrected X output drops one bit due to multiplication by 0.6072530 and
// maximum value of 1.4142 * 1.6468 = 2.3289 and one bit due to replicated sign on
// multiplier output. A third bit is dropped by assuming the output is positive.
// Two more bits are dropped by assuming that all subcarriers are less than -12 dBFS.
// The output saturates at 255 if the subcarrier is greater than -12 dBFS.
assign xo = (|xc[35:32]) ? 8'hFF : xc[31:24];
assign zo = zc;
assign ov = v[10]; // output ready 10 clock cycles after TC
endmodule