Attention

This documentation is a work in progress. Expect to see errors and unfinished things.

biquad Source File

  1// Biquad IIR filter
  2//
  3// y(t) = u(t)*b0 + u(t-1)*b1 + u(t-2)*b2 + y(t-1)*(-a1) + y(t-2)*(-a2)
  4//
  5// Minimizes latency by computing all but u(t)*b0 at end of previous sample.
  6//
  7// Minimizes resource consumption by using internal
  8// DSP registers for between-sample storage.
  9//
 10// Writing a coefficient holds the filter in reset until address 7 is written.
 11// This allows usage of a consistent set of coefficients.
 12//
 13module biquad #(
 14    parameter DATA_WIDTH        = 28,
 15    parameter DATA_COUNT        = 1,
 16    parameter COEFFICIENT_WIDTH = 25,
 17    parameter DEBUG             = "false"
 18) (
 19    input                         sysClk,
 20    input                         sysCoefficientStrobe,
 21    input                   [2:0] sysCoefficientAddress,
 22    input [COEFFICIENT_WIDTH-1:0] sysCoefficientValue,
 23
 24    input                                                         dataClk,
 25    (*mark_debug=DEBUG*) input      [(DATA_COUNT*DATA_WIDTH)-1:0] S_TDATA,
 26    (*mark_debug=DEBUG*) input                                    S_TVALID,
 27    (*mark_debug=DEBUG*) output reg                               S_TREADY,
 28    (*mark_debug=DEBUG*) output reg [(DATA_COUNT*DATA_WIDTH)-1:0] M_TDATA,
 29    (*mark_debug=DEBUG*) output reg                               M_TVALID,
 30    (*mark_debug=DEBUG*) input                                    M_TREADY
 31);
 32
 33localparam MAC_WIDEN = 4;
 34localparam MAC_WIDTH = DATA_WIDTH + COEFFICIENT_WIDTH + MAC_WIDEN;
 35
 36// Coefficient dual-port RAM 0:b0, 1:b1, 2:b2, 3:-a2, 4:-a1
 37// Coefficient range [-2,2) -- i.e. two bits to the left of the binary point
 38reg [COEFFICIENT_WIDTH-1:0] coefficientRAM [0:4], coefficientRAMq;
 39reg sysReset = 1;
 40always @(posedge sysClk) begin
 41    if (sysCoefficientStrobe) begin
 42        if (sysCoefficientAddress <= 4) begin
 43            coefficientRAM[sysCoefficientAddress] <= sysCoefficientValue;
 44            sysReset <= 1;
 45        end else if (sysCoefficientAddress == 7) begin
 46            sysReset <= 0;
 47        end
 48    end
 49end
 50
 51// I/O history
 52(*mark_debug=DEBUG*) reg [(DATA_COUNT*DATA_WIDTH)-1:0] u, uOld = 0, yOld = 0;
 53
 54// MAC parameter input multiplexer
 55reg [2:0] state = 0;
 56wire [(DATA_COUNT*DATA_WIDTH)-1:0] parameterMux = (state == 1) ? u :
 57                                                  (state == 2) ? u :
 58                                                  (state == 3) ? uOld :
 59                                                  (state == 4) ? yOld : M_TDATA;
 60
 61// Move sysReset to our clock domain
 62wire reset;
 63reg_tech_cdc reset_cdc(.I(sysReset), .C(dataClk), .O(reset));
 64
 65// Computation state machine
 66reg enMAC = 0, ldMAC = 0;
 67
 68always @(posedge dataClk) begin
 69    coefficientRAMq <= coefficientRAM[state];
 70end
 71
 72always @(posedge dataClk) begin
 73    if (reset) begin
 74        state <= 0;
 75        u <= 0;
 76        uOld <= 0;
 77        yOld <= 0;
 78        S_TREADY <= 0;
 79        M_TVALID <= 0;
 80    end
 81    else begin
 82        case (state)
 83        0: begin
 84            if (S_TVALID && S_TREADY) begin
 85                u <= S_TDATA;
 86                S_TREADY <= 0;
 87                enMAC <= 1;
 88                state <= 1;
 89            end
 90            else begin
 91                S_TREADY <= 1;
 92            end
 93        end
 94        1: begin // MAC inputs: u(t), b0
 95                 // Multiplier inputs: y(t-1), -a1
 96                 // Accumulator input: y(t-2)*-a2
 97                 // Clip input: u(t-2)*b2 + u(t-1)*b1
 98                 // M_TDATA: u(t-1)*b1
 99            state <= 2;
100        end
101        2: begin // MAC inputs: u(t-1), b1 for next cycle
102                 // Multiplier inputs: u(t), b0
103                 // Accumulator inputx: y(t-1)*-a1
104                 // Clip input: y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
105                 // M_TDATA: u(t-2)*b2 + u(t-1)*b1
106            ldMAC <= 1;
107            state <= 3;
108        end
109        3: begin // MAC inputs: u(t-2), b2 for next cycle
110                 // Multiplier inputs: u(t-1), b1 for next cycle
111                 // Accumulator input: u(t)*b0
112                 // Clip input: y(t-1)*a1 + y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
113                 // M_TDATA: y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
114                 // sload = 1
115            ldMAC <= 0;
116            uOld <= u;
117            state <= 4;
118        end
119        4: begin // MAC inputs: y(t-2), -a2 for next cycle
120                 // Multiplier inputs: u(t-2), b2 for next cycle
121                 // Accumulator input: u(t-1)*b1 for next cycle
122                 // Clip input: u(t)*b0+y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
123                 // M_TDATA: y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
124                 // sload_reg = 1
125            M_TVALID <= 1;
126            state <= 5;
127        end
128        5: begin // MAC inputs: y(t-1), -a1 for next cycle
129                 // Multiplier inputs: y(t-2), -a2 for next cycle
130                 // Accumulator input: u(t-2), b2 for next cycle
131                 // Clip input: u(t-1)*b1 for next cycle
132                 // M_TDATA: u(t)*b0+y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
133                 // M_TVALID = 1
134            enMAC <= 0;
135            yOld <= M_TDATA;
136            if (M_TREADY) begin
137                M_TVALID <= 0;
138                S_TREADY <= 1;
139                state <= 0;
140            end
141        end
142        default: begin
143            enMAC <= 0;
144            ldMAC <= 0;
145            S_TREADY <= 0;
146            M_TVALID <= 0;
147            state <= 0;
148        end
149        endcase
150    end
151end
152
153///////////////////////////////////////////////////////////////////////////////
154// Per-lane computation
155genvar i;
156generate
157for (i = 0 ; i < DATA_COUNT ; i = i + 1) begin
158    // Instantiate multiply-accumulate module
159    // Module doesn't provide a reset port so fake one by enabling
160    // the module in 'load' mode with coefficients all 0.
161    wire [MAC_WIDTH-1:0] accum_out;
162    macc # (.SIZEA(DATA_WIDTH),
163            .SIZEB(COEFFICIENT_WIDTH),
164            .SIZEOUT(MAC_WIDTH))
165      macc_i (
166        .clk(dataClk),
167        .ce(reset || enMAC),
168        .sload(reset || ldMAC),
169        .a(reset ? {DATA_WIDTH{1'b0}} : parameterMux[i*DATA_WIDTH+:DATA_WIDTH]),
170        .b(coefficientRAMq),
171        .accum_out(accum_out));
172
173    // Clip accumulated result
174    // The '-2' on the input width and input bit selection accounts
175    // for the fact that the coefficient range is [-2,2).
176    wire [DATA_WIDTH-1:0] accum_out_clipped;
177    reduceWidth #(.IWIDTH(MAC_WIDTH-(COEFFICIENT_WIDTH-2)),
178                  .OWIDTH(DATA_WIDTH))
179      clipMAC (.I(accum_out[MAC_WIDTH-1:COEFFICIENT_WIDTH-2]),
180               .O(accum_out_clipped));
181    always @(posedge dataClk) begin
182        M_TDATA[i*DATA_WIDTH+:DATA_WIDTH] <= accum_out_clipped;
183    end
184end
185endgenerate
186endmodule
187
188///////////////////////////////////////////////////////////////////////////////
189// Multiply-accumulate unit
190// Template from Vivado
191module macc #(
192    parameter SIZEA   = 25,
193              SIZEB   = 28,
194              SIZEOUT = 55
195) (
196    input clk,
197    input ce,
198    input sload,
199    input signed    [SIZEA-1:0] a,
200    input signed    [SIZEB-1:0] b,
201    output signed [SIZEOUT-1:0] accum_out
202);
203
204// Declare registers for intermediate values
205reg signed       [SIZEA-1:0] a_reg;
206reg signed       [SIZEB-1:0] b_reg;
207reg                          sload_reg;
208reg signed [SIZEA+SIZEB-1:0] mult_reg;
209reg signed     [SIZEOUT-1:0] adder_out, old_result;
210
211always @(sload_reg or adder_out)
212begin
213 if (sload_reg)
214    old_result <= 0;
215 else
216  // 'sload' is now and opens the accumulation loop.
217  // The accumulator takes the next multiplier output
218  // in the same cycle.
219    old_result <= adder_out;
220end
221
222always @(posedge clk)
223 if (ce)
224  begin
225    a_reg     <= a;
226    b_reg     <= b;
227    mult_reg  <= a_reg * b_reg;
228    sload_reg <= sload;
229    // Store accumulation result into a register
230    adder_out <= old_result + mult_reg;
231 end
232
233// Output accumulation result
234assign accum_out = adder_out;
235
236endmodule