Attention

This documentation is a work in progress. Expect to see errors and unfinished things.

xy_pi_clip Source File

  1`timescale 1ns / 1ns
  2
  3// Proportional-Integral gain for multiplexed X-Y data stream,
  4// with programmable clip levels.  Timing plan shown below.
  5
  6// Spartan-6: 159 LUTs, 1 DSP48A1
  7//  (not counting ~36 LUTs needed to generate coeff and lim)
  8// XXX this is the critical timing path, streamline and/or pipeline some more
  9// XXX proportional and integral gain terms need very different scaling?
 10
 11// Serious pipelining internally.
 12// At any one point, the data flow sequence is:
 13//   X integral     high-side clip (new data from multiplier summed with previous X integral term)
 14//   Y integral     high-side clip (new data from multiplier summed with previous Y integral term)
 15//   X proportional high-side clip (new data from multiplier summed with previous X integral term)
 16//   Y proportional high-side clip (new data from multiplier summed with previous Y integral term)
 17//   X integral      low-side clip (recirculated data from high-side clip)
 18//   Y integral      low-side clip (recirculated data from high-side clip)
 19//   X proportional  low-side clip (recirculated data from high-side clip)
 20//   Y proportional  low-side clip (recirculated data from high-side clip)
 21
 22//        in_xy  coeff   lim
 23//  sync  xerr
 24//  .     yerr   x_int
 25//  .     .      y_int
 26//  .     .      x_prop
 27//  .     .      y_prop
 28//  .     .      .       x_hi
 29//  .     .      .       y_hi
 30//  .     .      .       x_hi
 31//  sync  .      .       y_hi
 32//  .     .      .       x_lo
 33//  .     .      .       y_lo
 34//  .     .      .       x_lo
 35//  .     .      .       y_lo
 36//  .     .      .       .      o_sync  out_x
 37//  .     .      .       .      .       out_y
 38module xy_pi_clip #(
 39     parameter ff_dshift = 0 // Deferred ff_ddrive downshift
 40) (
 41     input clk,  // timespec 6.8 ns
 42     input sync,  // high for the first of the xy pair
 43     input signed [17:0] in_xy,
 44     output signed [17:0] out_xy,
 45     output o_sync,
 46     // 8-way muxed configuration
 47     input signed [17:0] coeff,
 48     input signed [17:0] lim,
 49     // adaptive feed-forward at the drive
 50     input ad_ffd_en,
 51     input signed [17:0] tri_out_xy,
 52     // feed-forward inputs
 53     input ffd_en,
 54     input signed [17:0] ff_ddrive, // FF drive (derivative) to be accumulated in I term
 55     input signed [17:0] ff_dphase, // FF phase (derivative); currently unused
 56     input ffp_en,
 57     input signed [17:0] ff_drive, // FF drive added to P term
 58     input signed [17:0] ff_phase, // FF phase
 59     // Output clipped, four bits are vs. {x_hi, y_hi, x_lo, y_lo}
 60     output [3:0] clipped
 61);
 62
 63// sync comes in one out of every eight cycles
 64// build a one-hot encoding of the various phases out of a simple shift register
 65reg [14:0] stb=0;
 66always @(posedge clk) stb <= {stb[13:0],sync};
 67
 68`define SAT(x,old,new) ((~|x[old:new] | &x[old:new]) ? x[new:0] : {x[old],{new{~x[old]}}})
 69
 70wire signed [17:0] in_xy1;
 71reg_delay #(.dw(18), .len(2))
 72     pi_match(.clk(clk), .reset(1'b0), .gate(1'b1), .din(in_xy), .dout(in_xy1));
 73
 74// FF ddrive to be sampled during 'integral' cycles only. Phase tie-in still incomplete
 75// Pipelined to ease timing
 76reg signed [41:0] mr_scale=0;
 77reg signed [42:0] mr_ff=0;
 78reg signed [18+12-1:0] ff_mp=0;
 79wire signed [18+12-1:0] ff_ddrive_l, ff_drive_l, ff_phase_l; // Worst-case bit-sizing
 80
 81// Perform deferred down-shifting of ff_ddrive here by combining with up-shifting by 12
 82// required by the accumulator construction below.
 83generate if (ff_dshift > 12) begin: g_dshift
 84     assign ff_ddrive_l = ff_ddrive >>> (ff_dshift-12);
 85end else begin: g_ushift
 86     assign ff_ddrive_l = ff_ddrive <<< (12-ff_dshift);
 87end endgenerate
 88assign ff_drive_l = ff_drive <<< 12;
 89assign ff_phase_l = ff_phase <<< 12;
 90
 91always @(posedge clk) begin
 92     ff_mp <= 0;
 93     case(stb[4:1])
 94             4'b0001: ff_mp <= ffd_en ? ff_ddrive_l : 0; // ddrive I
 95             4'b0010: ff_mp <= 0;           // dphase I
 96             4'b0100: ff_mp <= ffp_en ? ff_drive_l : 0;  // drive P
 97             4'b1000: ff_mp <= ffp_en ? ff_phase_l : 0;  // phase P
 98             default: ff_mp <= 0;
 99     endcase
100     // Avoid 3-way add by pre-computing mr_scale + ff_mp
101     mr_ff <= mr_scale + ff_mp;  // outputs on stb 3, 4, 5, 6
102end
103
104wire signed [29:0] mr_sat = `SAT(mr_ff,42,29);
105`undef SAT
106
107reg signed [35:0] mr=0;
108reg signed [30:0] lim1=0;
109reg signed [30:0] accum1=0, accum2=0, accum3=0, accum4=0, accum5=0, accum6=0;
110reg signed [17:0] val=0;
111reg clip_recirc=0, p_term=0, p_term1=0, p_term2=0, lim_hi=0, cmp=0;
112wire sat1 = cmp ^ lim_hi;
113wire signed [18:0] accum1_upper = accum1[30:12];
114always @(posedge clk) begin
115     clip_recirc <= stb[6]|stb[7]|stb[0]|stb[1];
116     p_term <= stb[2]|stb[3]|stb[6]|stb[7];
117     p_term1 <= p_term;
118     p_term2 <= p_term1;
119     lim_hi <= stb[6]|stb[7]|stb[8]|stb[9];
120     val <= (sync|stb[0]) ? in_xy : in_xy1;  // outputs on stb 0, 1, 2, 3
121     mr <= coeff * val;  // outputs on stb 1, 2, 3, 4
122     mr_scale <= p_term ? (mr <<< 6) : mr;  // this step determines K_P vs. K_I scaling
123     accum1 <= clip_recirc ? accum4 : (mr_sat + (p_term2 ? accum6 : accum4));
124     accum2 <= accum1;
125     cmp <= accum1_upper < lim;
126     lim1 <= {lim[17],lim,12'b0};
127     accum3 <= sat1 ? lim1 : accum2;
128     accum4 <= accum3;
129     accum5 <= accum4;
130     accum6 <= accum5;
131end
132
133wire signed [17:0] out_show = accum3[29:12];
134wire signed [18:0] acc_show = accum1[30:12];  // debug only, match cmp expression
135assign out_xy = (stb[4]|stb[5]) ? out_show : 0;
136assign o_sync = stb[4];
137assign clipped = {4{sat1}} & {stb[14]|stb[12], stb[13]|stb[11], stb[10]|stb[8], stb[9]|stb[7]};
138
139endmodule