.. _complex_mul_source: complex_mul Source File ======================= .. code-block:: verilog :linenos: `timescale 1ns / 1ns // Complex number multiplication, as in // (a+ib)*(c+id) = (a*c-b*d)+i(a*d+b*c). // All the complex numbers are IQ-serialized, such that port x // carries a+ib, port y carries c+id, and port z carries the result. // It produces up to one answer every two clock cycles. // The 18-bit inputs and output are assumed scaled to [-1,1). // This module uses two 18-bit signed hardware multipliers, // and can clock at over 100 MHz in Spartan-6. // It's pretty easy to ask for results that would overflow the representable // numbers; an extreme case is (1+i)*(1-i) = 2. All such results get // saturated to the maximum representable positive or negative number. // A second copy of the result with no rounding error is also provided // in z_all. Using both outputs will consume more FPGA resources than // using either one alone. // Output results are delayed four cycles from the input. // The gate_out port is nothing more or less than the gate_in // port, delayed four cycles. Only the iq control is used to control // the data paths inside this module. module complex_mul #( parameter dw = 18 ) ( input clk, // Rising edge clock input; all logic is synchronous in this domain input gate_in, // Flag marking input data valid input signed [dw-1:0] x, // Multiplicand, signed, time-interleaved real and imaginary input signed [dw-1:0] y, // Multiplicand, signed, time-interleaved real and imaginary input iq, // Flag marking the real (I) part of the complex pair output signed [dw-1:0] z, // Result output signed [(2*dw)-1:0] z_all, // Result output gate_out // Delayed version of gate_in ); // Flow-through vector multiplier // x, y, and z are interleaved I-Q complex numbers // iq set high for I, low for Q at input, a pair is I followed by Q. // Assumes there is some guarantee that you will never multiply two // full-scale negative values together. reg [3:0] iq_sr=0; always @(posedge clk) iq_sr <= {iq_sr[3:0],iq}; // Keep one guard bit through the addition step. That, and the // strange-looking "+1" below, reduces the average error offset // to -1/4 result bit. reg signed [dw-1:0] x1=0, x2=0, y1=0; reg signed [(2*dw)-1:0] prod1=0, prod2=0; reg signed [(2*dw)-1:0] prod1_d=0, prod2_d=0; reg signed [(2*dw)-1:0] sumi=0, sumq=0; wire signed [dw-1:0] m2mux = iq_sr[1] ? x2 : x; always @(posedge clk) begin x1 <= x; x2 <= x1; y1 <= y; prod1 <= x*y; prod2 <= m2mux * y1; prod1_d <= prod1; prod2_d <= prod2; sumi <= prod1_d - prod1; sumq <= prod2_d + prod2 + 1; end `define SAT(x,old,new) ((~|x[old:new] | &x[old:new]) ? x[new:0] : {x[old],{new{~x[old]}}}) wire iqx = iq_sr[3]; wire signed [(2*dw)-1:0] mux = iqx ? sumq : sumi; reg signed [dw:0] zr=0; reg signed [(2*dw)-1:0] mux_r=0; wire signed [(dw+1):0] zsel=mux[(2*dw)-1:(dw-2)]; always @(posedge clk) begin zr <= `SAT(zsel, dw+1, dw); mux_r <= mux; end assign z = zr[dw:1]; assign z_all = mux_r; `undef SAT // This gate input isn't really used, but describes the length of this // pipeline to let users keep track of the data flow. reg [3:0] gate_sr=0; always @(posedge clk) gate_sr <= {gate_sr[2:0],gate_in}; assign gate_out = gate_sr[3]; endmodule