Attention
This documentation is a work in progress. Expect to see errors and unfinished things.
biquad Source File
1// Biquad IIR filter
2//
3// y(t) = u(t)*b0 + u(t-1)*b1 + u(t-2)*b2 + y(t-1)*(-a1) + y(t-2)*(-a2)
4//
5// Minimizes latency by computing all but u(t)*b0 at end of previous sample.
6//
7// Minimizes resource consumption by using internal
8// DSP registers for between-sample storage.
9//
10// Writing a coefficient holds the filter in reset until address 7 is written.
11// This allows usage of a consistent set of coefficients.
12//
13module biquad #(
14 parameter DATA_WIDTH = 28,
15 parameter DATA_COUNT = 1,
16 parameter COEFFICIENT_WIDTH = 25,
17 parameter DEBUG = "false"
18) (
19 input sysClk,
20 input sysCoefficientStrobe,
21 input [2:0] sysCoefficientAddress,
22 input [COEFFICIENT_WIDTH-1:0] sysCoefficientValue,
23
24 input dataClk,
25 (*mark_debug=DEBUG*) input [(DATA_COUNT*DATA_WIDTH)-1:0] S_TDATA,
26 (*mark_debug=DEBUG*) input S_TVALID,
27 (*mark_debug=DEBUG*) output reg S_TREADY,
28 (*mark_debug=DEBUG*) output reg [(DATA_COUNT*DATA_WIDTH)-1:0] M_TDATA,
29 (*mark_debug=DEBUG*) output reg M_TVALID,
30 (*mark_debug=DEBUG*) input M_TREADY
31);
32
33localparam MAC_WIDEN = 4;
34localparam MAC_WIDTH = DATA_WIDTH + COEFFICIENT_WIDTH + MAC_WIDEN;
35
36// Coefficient dual-port RAM 0:b0, 1:b1, 2:b2, 3:-a2, 4:-a1
37// Coefficient range [-2,2) -- i.e. two bits to the left of the binary point
38reg [COEFFICIENT_WIDTH-1:0] coefficientRAM [0:4], coefficientRAMq;
39reg sysReset = 1;
40always @(posedge sysClk) begin
41 if (sysCoefficientStrobe) begin
42 if (sysCoefficientAddress <= 4) begin
43 coefficientRAM[sysCoefficientAddress] <= sysCoefficientValue;
44 sysReset <= 1;
45 end else if (sysCoefficientAddress == 7) begin
46 sysReset <= 0;
47 end
48 end
49end
50
51// I/O history
52(*mark_debug=DEBUG*) reg [(DATA_COUNT*DATA_WIDTH)-1:0] u, uOld = 0, yOld = 0;
53
54// MAC parameter input multiplexer
55reg [2:0] state = 0;
56wire [(DATA_COUNT*DATA_WIDTH)-1:0] parameterMux = (state == 1) ? u :
57 (state == 2) ? u :
58 (state == 3) ? uOld :
59 (state == 4) ? yOld : M_TDATA;
60
61// Move sysReset to our clock domain
62wire reset;
63reg_tech_cdc reset_cdc(.I(sysReset), .C(dataClk), .O(reset));
64
65// Computation state machine
66reg enMAC = 0, ldMAC = 0;
67
68always @(posedge dataClk) begin
69 coefficientRAMq <= coefficientRAM[state];
70end
71
72always @(posedge dataClk) begin
73 if (reset) begin
74 state <= 0;
75 u <= 0;
76 uOld <= 0;
77 yOld <= 0;
78 S_TREADY <= 0;
79 M_TVALID <= 0;
80 end
81 else begin
82 case (state)
83 0: begin
84 if (S_TVALID && S_TREADY) begin
85 u <= S_TDATA;
86 S_TREADY <= 0;
87 enMAC <= 1;
88 state <= 1;
89 end
90 else begin
91 S_TREADY <= 1;
92 end
93 end
94 1: begin // MAC inputs: u(t), b0
95 // Multiplier inputs: y(t-1), -a1
96 // Accumulator input: y(t-2)*-a2
97 // Clip input: u(t-2)*b2 + u(t-1)*b1
98 // M_TDATA: u(t-1)*b1
99 state <= 2;
100 end
101 2: begin // MAC inputs: u(t-1), b1 for next cycle
102 // Multiplier inputs: u(t), b0
103 // Accumulator inputx: y(t-1)*-a1
104 // Clip input: y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
105 // M_TDATA: u(t-2)*b2 + u(t-1)*b1
106 ldMAC <= 1;
107 state <= 3;
108 end
109 3: begin // MAC inputs: u(t-2), b2 for next cycle
110 // Multiplier inputs: u(t-1), b1 for next cycle
111 // Accumulator input: u(t)*b0
112 // Clip input: y(t-1)*a1 + y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
113 // M_TDATA: y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
114 // sload = 1
115 ldMAC <= 0;
116 uOld <= u;
117 state <= 4;
118 end
119 4: begin // MAC inputs: y(t-2), -a2 for next cycle
120 // Multiplier inputs: u(t-2), b2 for next cycle
121 // Accumulator input: u(t-1)*b1 for next cycle
122 // Clip input: u(t)*b0+y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
123 // M_TDATA: y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
124 // sload_reg = 1
125 M_TVALID <= 1;
126 state <= 5;
127 end
128 5: begin // MAC inputs: y(t-1), -a1 for next cycle
129 // Multiplier inputs: y(t-2), -a2 for next cycle
130 // Accumulator input: u(t-2), b2 for next cycle
131 // Clip input: u(t-1)*b1 for next cycle
132 // M_TDATA: u(t)*b0+y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
133 // M_TVALID = 1
134 enMAC <= 0;
135 yOld <= M_TDATA;
136 if (M_TREADY) begin
137 M_TVALID <= 0;
138 S_TREADY <= 1;
139 state <= 0;
140 end
141 end
142 default: begin
143 enMAC <= 0;
144 ldMAC <= 0;
145 S_TREADY <= 0;
146 M_TVALID <= 0;
147 state <= 0;
148 end
149 endcase
150 end
151end
152
153///////////////////////////////////////////////////////////////////////////////
154// Per-lane computation
155genvar i;
156generate
157for (i = 0 ; i < DATA_COUNT ; i = i + 1) begin
158 // Instantiate multiply-accumulate module
159 // Module doesn't provide a reset port so fake one by enabling
160 // the module in 'load' mode with coefficients all 0.
161 wire [MAC_WIDTH-1:0] accum_out;
162 macc # (.SIZEA(DATA_WIDTH),
163 .SIZEB(COEFFICIENT_WIDTH),
164 .SIZEOUT(MAC_WIDTH))
165 macc_i (
166 .clk(dataClk),
167 .ce(reset || enMAC),
168 .sload(reset || ldMAC),
169 .a(reset ? {DATA_WIDTH{1'b0}} : parameterMux[i*DATA_WIDTH+:DATA_WIDTH]),
170 .b(coefficientRAMq),
171 .accum_out(accum_out));
172
173 // Clip accumulated result
174 // The '-2' on the input width and input bit selection accounts
175 // for the fact that the coefficient range is [-2,2).
176 wire [DATA_WIDTH-1:0] accum_out_clipped;
177 reduceWidth #(.IWIDTH(MAC_WIDTH-(COEFFICIENT_WIDTH-2)),
178 .OWIDTH(DATA_WIDTH))
179 clipMAC (.I(accum_out[MAC_WIDTH-1:COEFFICIENT_WIDTH-2]),
180 .O(accum_out_clipped));
181 always @(posedge dataClk) begin
182 M_TDATA[i*DATA_WIDTH+:DATA_WIDTH] <= accum_out_clipped;
183 end
184end
185endgenerate
186endmodule
187
188///////////////////////////////////////////////////////////////////////////////
189// Multiply-accumulate unit
190// Template from Vivado
191module macc #(
192 parameter SIZEA = 25,
193 SIZEB = 28,
194 SIZEOUT = 55
195) (
196 input clk,
197 input ce,
198 input sload,
199 input signed [SIZEA-1:0] a,
200 input signed [SIZEB-1:0] b,
201 output signed [SIZEOUT-1:0] accum_out
202);
203
204// Declare registers for intermediate values
205reg signed [SIZEA-1:0] a_reg;
206reg signed [SIZEB-1:0] b_reg;
207reg sload_reg;
208reg signed [SIZEA+SIZEB-1:0] mult_reg;
209reg signed [SIZEOUT-1:0] adder_out, old_result;
210
211always @(sload_reg or adder_out)
212begin
213 if (sload_reg)
214 old_result <= 0;
215 else
216 // 'sload' is now and opens the accumulation loop.
217 // The accumulator takes the next multiplier output
218 // in the same cycle.
219 old_result <= adder_out;
220end
221
222always @(posedge clk)
223 if (ce)
224 begin
225 a_reg <= a;
226 b_reg <= b;
227 mult_reg <= a_reg * b_reg;
228 sload_reg <= sload;
229 // Store accumulation result into a register
230 adder_out <= old_result + mult_reg;
231 end
232
233// Output accumulation result
234assign accum_out = adder_out;
235
236endmodule