Attention
This documentation is a work in progress. Expect to see errors and unfinished things.
xy_pi_clip Source File
1`timescale 1ns / 1ns
2
3// Proportional-Integral gain for multiplexed X-Y data stream,
4// with programmable clip levels. Timing plan shown below.
5
6// Spartan-6: 159 LUTs, 1 DSP48A1
7// (not counting ~36 LUTs needed to generate coeff and lim)
8// XXX this is the critical timing path, streamline and/or pipeline some more
9// XXX proportional and integral gain terms need very different scaling?
10
11// Serious pipelining internally.
12// At any one point, the data flow sequence is:
13// X integral high-side clip (new data from multiplier summed with previous X integral term)
14// Y integral high-side clip (new data from multiplier summed with previous Y integral term)
15// X proportional high-side clip (new data from multiplier summed with previous X integral term)
16// Y proportional high-side clip (new data from multiplier summed with previous Y integral term)
17// X integral low-side clip (recirculated data from high-side clip)
18// Y integral low-side clip (recirculated data from high-side clip)
19// X proportional low-side clip (recirculated data from high-side clip)
20// Y proportional low-side clip (recirculated data from high-side clip)
21
22// in_xy coeff lim
23// sync xerr
24// . yerr x_int
25// . . y_int
26// . . x_prop
27// . . y_prop
28// . . . x_hi
29// . . . y_hi
30// . . . x_hi
31// sync . . y_hi
32// . . . x_lo
33// . . . y_lo
34// . . . x_lo
35// . . . y_lo
36// . . . . o_sync out_x
37// . . . . . out_y
38module xy_pi_clip #(
39 parameter ff_dshift = 0 // Deferred ff_ddrive downshift
40) (
41 input clk, // timespec 6.8 ns
42 input sync, // high for the first of the xy pair
43 input signed [17:0] in_xy,
44 output signed [17:0] out_xy,
45 output o_sync,
46 // 8-way muxed configuration
47 input signed [17:0] coeff,
48 input signed [17:0] lim,
49 // feed-forward inputs
50 input ffd_en,
51 input signed [17:0] ff_ddrive, // FF drive (derivative) to be accumulated in I term
52 input signed [17:0] ff_dphase, // FF phase (derivative); currently unused
53 input ffp_en,
54 input signed [17:0] ff_drive, // FF drive added to P term
55 input signed [17:0] ff_phase, // FF phase
56 // Output clipped, four bits are vs. {x_hi, y_hi, x_lo, y_lo}
57 output [3:0] clipped
58);
59
60// sync comes in one out of every eight cycles
61// build a one-hot encoding of the various phases out of a simple shift register
62reg [14:0] stb=0;
63always @(posedge clk) stb <= {stb[13:0],sync};
64
65`define SAT(x,old,new) ((~|x[old:new] | &x[old:new]) ? x[new:0] : {x[old],{new{~x[old]}}})
66
67wire signed [17:0] in_xy1;
68reg_delay #(.dw(18), .len(2))
69 pi_match(.clk(clk), .reset(1'b0), .gate(1'b1), .din(in_xy), .dout(in_xy1));
70
71// FF ddrive to be sampled during 'integral' cycles only. Phase tie-in still incomplete
72// Pipelined to ease timing
73reg signed [41:0] mr_scale=0;
74reg signed [42:0] mr_ff=0;
75reg signed [18+12-1:0] ff_mp=0;
76wire signed [18+12-1:0] ff_ddrive_l, ff_drive_l, ff_phase_l; // Worst-case bit-sizing
77
78// Perform deferred down-shifting of ff_ddrive here by combining with up-shifting by 12
79// required by the accumulator construction below.
80generate if (ff_dshift > 12) begin: g_dshift
81 assign ff_ddrive_l = ff_ddrive >>> (ff_dshift-12);
82end else begin: g_ushift
83 assign ff_ddrive_l = ff_ddrive <<< (12-ff_dshift);
84end endgenerate
85assign ff_drive_l = ff_drive <<< 12;
86assign ff_phase_l = ff_phase <<< 12;
87
88always @(posedge clk) begin
89 ff_mp <= 0;
90 case(stb[4:1])
91 4'b0001: ff_mp <= ffd_en ? ff_ddrive_l : 0; // ddrive I
92 4'b0010: ff_mp <= 0; // dphase I
93 4'b0100: ff_mp <= ffp_en ? ff_drive_l : 0; // drive P
94 4'b1000: ff_mp <= ffp_en ? ff_phase_l : 0; // phase P
95 default: ff_mp <= 0;
96 endcase
97 // Avoid 3-way add by pre-computing mr_scale + ff_mp
98 mr_ff <= mr_scale + ff_mp; // outputs on stb 3, 4, 5, 6
99end
100
101wire signed [29:0] mr_sat = `SAT(mr_ff,42,29);
102`undef SAT
103
104reg signed [35:0] mr=0;
105reg signed [30:0] lim1=0;
106reg signed [30:0] accum1=0, accum2=0, accum3=0, accum4=0, accum5=0, accum6=0;
107reg signed [17:0] val=0;
108reg clip_recirc=0, p_term=0, p_term1=0, p_term2=0, lim_hi=0, cmp=0;
109wire sat1 = cmp ^ lim_hi;
110wire signed [18:0] accum1_upper = accum1[30:12];
111always @(posedge clk) begin
112 clip_recirc <= stb[6]|stb[7]|stb[0]|stb[1];
113 p_term <= stb[2]|stb[3]|stb[6]|stb[7];
114 p_term1 <= p_term;
115 p_term2 <= p_term1;
116 lim_hi <= stb[6]|stb[7]|stb[8]|stb[9];
117 val <= (sync|stb[0]) ? in_xy : in_xy1; // outputs on stb 0, 1, 2, 3
118 mr <= coeff * val; // outputs on stb 1, 2, 3, 4
119 mr_scale <= p_term ? (mr <<< 6) : mr; // this step determines K_P vs. K_I scaling
120 accum1 <= clip_recirc ? accum4 : (mr_sat + (p_term2 ? accum6 : accum4));
121 accum2 <= accum1;
122 cmp <= accum1_upper < lim;
123 lim1 <= {lim[17],lim,12'b0};
124 accum3 <= sat1 ? lim1 : accum2;
125 accum4 <= accum3;
126 accum5 <= accum4;
127 accum6 <= accum5;
128end
129
130wire signed [17:0] out_show = accum3[29:12];
131wire signed [18:0] acc_show = accum1[30:12]; // debug only, match cmp expression
132assign out_xy = (stb[4]|stb[5]) ? out_show : 0;
133assign o_sync = stb[4];
134assign clipped = {4{sat1}} & {stb[14]|stb[12], stb[13]|stb[11], stb[10]|stb[8], stb[9]|stb[7]};
135
136endmodule