Attention

This documentation is a work in progress. Expect to see errors and unfinished things.

xy_pi_clip Source File

  1`timescale 1ns / 1ns
  2
  3// Proportional-Integral gain for multiplexed X-Y data stream,
  4// with programmable clip levels.  Timing plan shown below.
  5
  6// Spartan-6: 159 LUTs, 1 DSP48A1
  7//  (not counting ~36 LUTs needed to generate coeff and lim)
  8// XXX this is the critical timing path, streamline and/or pipeline some more
  9// XXX proportional and integral gain terms need very different scaling?
 10
 11// Serious pipelining internally.
 12// At any one point, the data flow sequence is:
 13//   X integral     high-side clip (new data from multiplier summed with previous X integral term)
 14//   Y integral     high-side clip (new data from multiplier summed with previous Y integral term)
 15//   X proportional high-side clip (new data from multiplier summed with previous X integral term)
 16//   Y proportional high-side clip (new data from multiplier summed with previous Y integral term)
 17//   X integral      low-side clip (recirculated data from high-side clip)
 18//   Y integral      low-side clip (recirculated data from high-side clip)
 19//   X proportional  low-side clip (recirculated data from high-side clip)
 20//   Y proportional  low-side clip (recirculated data from high-side clip)
 21
 22//        in_xy  coeff   lim
 23//  sync  xerr
 24//  .     yerr   x_int
 25//  .     .      y_int
 26//  .     .      x_prop
 27//  .     .      y_prop
 28//  .     .      .       x_hi
 29//  .     .      .       y_hi
 30//  .     .      .       x_hi
 31//  sync  .      .       y_hi
 32//  .     .      .       x_lo
 33//  .     .      .       y_lo
 34//  .     .      .       x_lo
 35//  .     .      .       y_lo
 36//  .     .      .       .      o_sync  out_x
 37//  .     .      .       .      .       out_y
 38module xy_pi_clip #(
 39     parameter ff_dshift = 0 // Deferred ff_ddrive downshift
 40) (
 41     input clk,  // timespec 6.8 ns
 42     input sync,  // high for the first of the xy pair
 43     input signed [17:0] in_xy,
 44     output signed [17:0] out_xy,
 45     output o_sync,
 46     // 8-way muxed configuration
 47     input signed [17:0] coeff,
 48     input signed [17:0] lim,
 49     // feed-forward inputs
 50     input ffd_en,
 51     input signed [17:0] ff_ddrive, // FF drive (derivative) to be accumulated in I term
 52     input signed [17:0] ff_dphase, // FF phase (derivative); currently unused
 53     input ffp_en,
 54     input signed [17:0] ff_drive, // FF drive added to P term
 55     input signed [17:0] ff_phase, // FF phase
 56     // Output clipped, four bits are vs. {x_hi, y_hi, x_lo, y_lo}
 57     output [3:0] clipped
 58);
 59
 60// sync comes in one out of every eight cycles
 61// build a one-hot encoding of the various phases out of a simple shift register
 62reg [14:0] stb=0;
 63always @(posedge clk) stb <= {stb[13:0],sync};
 64
 65`define SAT(x,old,new) ((~|x[old:new] | &x[old:new]) ? x[new:0] : {x[old],{new{~x[old]}}})
 66
 67wire signed [17:0] in_xy1;
 68reg_delay #(.dw(18), .len(2))
 69     pi_match(.clk(clk), .reset(1'b0), .gate(1'b1), .din(in_xy), .dout(in_xy1));
 70
 71// FF ddrive to be sampled during 'integral' cycles only. Phase tie-in still incomplete
 72// Pipelined to ease timing
 73reg signed [41:0] mr_scale=0;
 74reg signed [42:0] mr_ff=0;
 75reg signed [18+12-1:0] ff_mp=0;
 76wire signed [18+12-1:0] ff_ddrive_l, ff_drive_l, ff_phase_l; // Worst-case bit-sizing
 77
 78// Perform deferred down-shifting of ff_ddrive here by combining with up-shifting by 12
 79// required by the accumulator construction below.
 80generate if (ff_dshift > 12) begin: g_dshift
 81     assign ff_ddrive_l = ff_ddrive >>> (ff_dshift-12);
 82end else begin: g_ushift
 83     assign ff_ddrive_l = ff_ddrive <<< (12-ff_dshift);
 84end endgenerate
 85assign ff_drive_l = ff_drive <<< 12;
 86assign ff_phase_l = ff_phase <<< 12;
 87
 88always @(posedge clk) begin
 89     ff_mp <= 0;
 90     case(stb[4:1])
 91             4'b0001: ff_mp <= ffd_en ? ff_ddrive_l : 0; // ddrive I
 92             4'b0010: ff_mp <= 0;           // dphase I
 93             4'b0100: ff_mp <= ffp_en ? ff_drive_l : 0;  // drive P
 94             4'b1000: ff_mp <= ffp_en ? ff_phase_l : 0;  // phase P
 95             default: ff_mp <= 0;
 96     endcase
 97     // Avoid 3-way add by pre-computing mr_scale + ff_mp
 98     mr_ff <= mr_scale + ff_mp;  // outputs on stb 3, 4, 5, 6
 99end
100
101wire signed [29:0] mr_sat = `SAT(mr_ff,42,29);
102`undef SAT
103
104reg signed [35:0] mr=0;
105reg signed [30:0] lim1=0;
106reg signed [30:0] accum1=0, accum2=0, accum3=0, accum4=0, accum5=0, accum6=0;
107reg signed [17:0] val=0;
108reg clip_recirc=0, p_term=0, p_term1=0, p_term2=0, lim_hi=0, cmp=0;
109wire sat1 = cmp ^ lim_hi;
110wire signed [18:0] accum1_upper = accum1[30:12];
111always @(posedge clk) begin
112     clip_recirc <= stb[6]|stb[7]|stb[0]|stb[1];
113     p_term <= stb[2]|stb[3]|stb[6]|stb[7];
114     p_term1 <= p_term;
115     p_term2 <= p_term1;
116     lim_hi <= stb[6]|stb[7]|stb[8]|stb[9];
117     val <= (sync|stb[0]) ? in_xy : in_xy1;  // outputs on stb 0, 1, 2, 3
118     mr <= coeff * val;  // outputs on stb 1, 2, 3, 4
119     mr_scale <= p_term ? (mr <<< 6) : mr;  // this step determines K_P vs. K_I scaling
120     accum1 <= clip_recirc ? accum4 : (mr_sat + (p_term2 ? accum6 : accum4));
121     accum2 <= accum1;
122     cmp <= accum1_upper < lim;
123     lim1 <= {lim[17],lim,12'b0};
124     accum3 <= sat1 ? lim1 : accum2;
125     accum4 <= accum3;
126     accum5 <= accum4;
127     accum6 <= accum5;
128end
129
130wire signed [17:0] out_show = accum3[29:12];
131wire signed [18:0] acc_show = accum1[30:12];  // debug only, match cmp expression
132assign out_xy = (stb[4]|stb[5]) ? out_show : 0;
133assign o_sync = stb[4];
134assign clipped = {4{sat1}} & {stb[14]|stb[12], stb[13]|stb[11], stb[10]|stb[8], stb[9]|stb[7]};
135
136endmodule