Skip to content
Snippets Groups Projects
Commit 0d2a549c authored by Felix Jentzsch's avatar Felix Jentzsch
Browse files

Add RAM buffer component

parent 0f866f1e
No related branches found
No related tags found
No related merge requests found
`timescale 1 ns / 1 ps
`timescale 1 ns / 1 ps
module $TOP_MODULE_NAME$_controller
(
......@@ -14,7 +14,7 @@ output cmd_read;
output cmd_write;
////code generation part:
//mapping of R/W command values to each state (START, MAIN_1, MAIN_2, INTER_1, INTER_2, END_1, END_2)
//mapping of R/W command values to each state (START, MAIN_1, MAIN_2, INTER_1, INTER_2, END_1, END_2)
localparam [0:6] READ_CMD_MAP = $READ_CMD_MAP$;
localparam [0:6] WRITE_CMD_MAP = $WRITE_CMD_MAP$;
......@@ -37,7 +37,7 @@ integer counter_loop_main;
integer counter_loop_inter;
assign cmd_read = READ_CMD_MAP[state_next]; //read command indicates read in *upcoming* cycle, due to how schedule is constructed
assign cmd_write = WRITE_CMD_MAP[state];
assign cmd_write = WRITE_CMD_MAP[state];
reg cycle_last;
wire cycle_advance;
......@@ -66,7 +66,7 @@ always @ (state, counter_current, counter_loop_main, counter_loop_inter) begin
//there might not be an end sequence -> restart immediately
if (LOOP_END_1_COUNTER != 0)
state_next = STATE_END_1;
else
else
state_next = STATE_START;
end
end
......@@ -77,7 +77,7 @@ always @ (state, counter_current, counter_loop_main, counter_loop_inter) begin
if (counter_current == LOOP_INTER_1_COUNTER-1) begin
if (LOOP_INTER_2_COUNTER != 0)
state_next = STATE_LOOP_INTER_2;
else
else
state_next = STATE_LOOP_MAIN_1;
end
end
......@@ -141,6 +141,113 @@ always @ (posedge CLK) begin
end
endmodule //controller
module $TOP_MODULE_NAME$_reg_buffer
#(
parameter WIDTH = 1,
parameter DEPTH = 1
)
(
CLK,
shift_enable,
shift_in,
shift_out,
data_out
);
input CLK, shift_enable;
input [WIDTH-1:0] shift_in;
output [WIDTH-1:0] shift_out;
output [WIDTH*DEPTH-1:0] data_out;
//UG901 template for SRL inference:
// 32-bit Shift Register
// Rising edge clock
// Active high clock enable
// For-loop based template
// File: shift_registers_1.v
//
//module shift_registers_1 (clk, clken, SI, SO);
//parameter WIDTH = 32;
//input clk, clken, SI;
//output SO;
//reg [WIDTH-1:0] shreg;
//
//integer i;
//always @(posedge clk)
//begin
// if (clken)
// begin
// for (i = 0; i < WIDTH-1; i = i+1)
// shreg[i+1] <= shreg[i];
// shreg[0] <= SI;
// end
//end
//assign SO = shreg[WIDTH-1];
//endmodule
reg [WIDTH-1:0] data [DEPTH-1:0];
assign shift_out = data[DEPTH-1];
for (genvar e=0; e<DEPTH; e=e+1)
assign data_out[e*WIDTH +: WIDTH] = data[e];
always @ (posedge CLK) begin
if (shift_enable) begin
for (integer i=DEPTH-1; i>0; i=i-1)
data[i] <= data[i-1];
data[0] <= shift_in;
end
end
endmodule //reg_buffer
module $TOP_MODULE_NAME$_ram_buffer
#(
parameter WIDTH = 1,
parameter DEPTH = 1
)
(
CLK,
RST,
shift_enable,
shift_in,
shift_out
);
input CLK, RST, shift_enable;
input [WIDTH-1:0] shift_in;
output [WIDTH-1:0] shift_out;
reg [WIDTH-1:0] out_reg;
assign shift_out = out_reg;
integer addr_w, addr_r; //todo: minimize width (as reg), make r addr depend on w
(* ram_style = "block" *) reg [WIDTH-1:0] ram [DEPTH-1:0];
always @(posedge CLK) begin
if (RST == 1'b0) begin
addr_w <= 0;
addr_r <= 1;
end else begin
if (shift_enable) begin
ram[addr_w] <= shift_in;
out_reg <= ram[addr_r];
if (addr_w == DEPTH-1)
addr_w <= 0;
else
addr_w <= addr_w + 1;
if (addr_r == DEPTH-1)
addr_r <= 0;
else
addr_r <= addr_r + 1;
end
end
end
endmodule //ram_buffer
module $TOP_MODULE_NAME$_wb
#(
parameter IN_WIDTH = 1, //bit-width*C*MMV_in
......@@ -150,12 +257,13 @@ module $TOP_MODULE_NAME$_wb
)
(
CLK,
RST,
data_in,
shift_enable,
data_out
);
input CLK;
input CLK, RST;
input [IN_WIDTH-1:0] data_in;
input shift_enable;
output [OUT_WIDTH-1:0] data_out;
......@@ -163,24 +271,20 @@ output [OUT_WIDTH-1:0] data_out;
//Input REG to enable simultaneous R/W
reg [IN_WIDTH-1:0] reg_input;
//REG FIFOs
$GENERATE_REG_FIFOS$
//BRAM FIFOs
//todo: generate real BRAM shift buffers if these get too large
$GENERATE_BRAM_FIFOS$
//Fixed interconnect between linear buffers
$GENERATE_BUFFER_CONNECTION$
//Fixed REG FIFO <-> output mapping
$GENERATE_OUTPUT_MAPPING$
//main process
//input register logic
integer i;
always @ (posedge CLK) begin
if (shift_enable) begin
//shift logic
$GENERATE_SHIFT_LOGIC$
//shift in new data
reg_input <= data_in;
end
end
......@@ -234,6 +338,7 @@ $TOP_MODULE_NAME$_wb
window_buffer_inst
(
.CLK(ap_clk),
.RST(ap_rst_n),
.data_in(window_buffer_in),
.shift_enable(window_buffer_shift_enable),
.data_out(window_buffer_out)
......@@ -291,9 +396,9 @@ always @ (posedge ap_clk) begin
//count cycle (completed R or W or both (depending on current cycle))
if (cycle == CYCLES_TOTAL-1)
cycle <= 0;
else
cycle <= cycle+1;
else
cycle <= cycle+1;
end else if (write_ok) // successful W in this cycle, but R still outstanding
write_done <= 1'b1; //write can happen even if read is blocked, but only for the current cycle!
end
......
......@@ -505,7 +505,7 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
# example:
# 0: only consecutive access patterns will be implemented in regs, rest in BRAM line buffers
# 2: [0, 3, 6] access pattern is still allowed and will be implemented with 1 7-position shift reg
REG_BRAM_THRESHOLD = 9999
REG_BRAM_THRESHOLD = 8
#--------------------
in_shape = (n,c,h,w) #NCHW
......@@ -932,11 +932,13 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
bram_fifos_depth.append(math.ceil((distance-1)/M)) # really ceil?
# start with new REG FIFO
reg_fifos.append(current)
reg_fifos_depth.append(math.ceil((max(current)+1)/M))
#reg_fifos_depth.append(math.ceil((max(current)+1)/M)) ToDo: fix for M again
reg_fifos_depth.append(len(current))
current = []
current.append(access_idx)
reg_fifos.append(current)
reg_fifos_depth.append(math.ceil((max(current)+1)/M))
#reg_fifos_depth.append(math.ceil((max(current)+1)/M)) ToDo fix for M again
reg_fifos_depth.append(len(current))
f_debug.write("\n"+"Buffer partitioning using REG_BRAM_THRESHOLD=%d" % REG_BRAM_THRESHOLD)
f_debug.write("\n"+"%d REG FIFOs (parallel read access):" % len(reg_fifos))
......@@ -947,17 +949,43 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
code_gen_dict["$GENERATE_REG_FIFOS$"] = []
for i in range(len(reg_fifos)):
code_gen_dict["$GENERATE_REG_FIFOS$"].append(
"""parameter reg_fifo_{id}_len = {len};
reg [IN_WIDTH-1:0] reg_fifo_{id} [reg_fifo_{id}_len-1:0];
""".format(id=i, len=reg_fifos_depth[i]))
#todo: generate actual bram shift buffers instead of regs
"""
wire [IN_WIDTH-1:0] reg_fifo_{id}_in;
wire [IN_WIDTH-1:0] reg_fifo_{id}_out;
wire [IN_WIDTH*{len}-1:0] reg_fifo_{id};
{name}_reg_buffer
#(
.WIDTH(IN_WIDTH),
.DEPTH({len})
)
reg_buffer_inst_{id}
(
.CLK(CLK),
.shift_enable(shift_enable),
.shift_in(reg_fifo_{id}_in),
.shift_out(reg_fifo_{id}_out),
.data_out(reg_fifo_{id})
);""".format(name=self.get_verilog_top_module_name(), id=i, len=reg_fifos_depth[i]))
code_gen_dict["$GENERATE_BRAM_FIFOS$"] = []
for i in range(len(bram_fifos)):
code_gen_dict["$GENERATE_BRAM_FIFOS$"].append(
"""parameter bram_fifo_{id}_len = {len};
reg [IN_WIDTH-1:0] bram_fifo_{id} [bram_fifo_{id}_len-1:0];
""".format(id=i, len=bram_fifos_depth[i]))
"""
wire [IN_WIDTH-1:0] bram_fifo_{id}_in;
wire [IN_WIDTH-1:0] bram_fifo_{id}_out;
{name}_ram_buffer
#(
.WIDTH(IN_WIDTH),
.DEPTH({len})
)
ram_buffer_inst_{id}
(
.CLK(CLK),
.RST(RST),
.shift_enable(shift_enable),
.shift_in(bram_fifo_{id}_in),
.shift_out(bram_fifo_{id}_out)
);""".format(name=self.get_verilog_top_module_name(), id=i, len=bram_fifos_depth[i]))
code_gen_dict["$GENERATE_OUTPUT_MAPPING$"] = []
out_idx = mmv_out-1
......@@ -970,46 +998,32 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
# )
#)
code_gen_dict["$GENERATE_OUTPUT_MAPPING$"].append(
"assign data_out[OUT_ELEM_WIDTH*{out_idx}+:OUT_ELEM_WIDTH] = reg_fifo_{fifo_id}[{access_idx}][OUT_ELEM_WIDTH*{mmv_idx}+:OUT_ELEM_WIDTH];".format(
"assign data_out[OUT_ELEM_WIDTH*{out_idx}+:OUT_ELEM_WIDTH] = reg_fifo_{fifo_id}[{access_idx}*{mmv}*OUT_ELEM_WIDTH+OUT_ELEM_WIDTH*{mmv_idx}+:OUT_ELEM_WIDTH];".format(
out_idx=out_idx, fifo_id=fifo_id,
access_idx=reg_fifos_depth[fifo_id]-1-int((max(reg_fifo)-access_idx)/M),
mmv_idx=(max(reg_fifo)-access_idx)%M
mmv_idx=(max(reg_fifo)-access_idx)%M,
mmv = M
)
)
# reversal: out_idx=0 -> oldest buffer element -> highest access_idx
out_idx = out_idx-1
assert out_idx==-1, "ERROR: Not all output vector elements connected"
code_gen_dict["$GENERATE_SHIFT_LOGIC$"] = []
code_gen_dict["$GENERATE_BUFFER_CONNECTION$"] = []
for i in range(len(reg_fifos)):
if i == 0:
# first FIFO containing newest elements -> input comes from input reg
code_gen_dict["$GENERATE_SHIFT_LOGIC$"].append(
"""for (i=reg_fifo_{fifo_id}_len-1; i>0; i=i-1)
reg_fifo_{fifo_id}[i] <= reg_fifo_{fifo_id}[i-1];
reg_fifo_{fifo_id}[0] <= reg_input;""".format(
fifo_id=i,
)
)
code_gen_dict["$GENERATE_BUFFER_CONNECTION$"].append(
"""assign reg_fifo_{fifo_id}_in = reg_input;""".format(fifo_id=i,))
else:
# other REG FIFOs -> input comes from connected BRAM FIFO (line buffer)
input_fifo_id = i-1
code_gen_dict["$GENERATE_SHIFT_LOGIC$"].append(
"""for (i=reg_fifo_{fifo_id}_len-1; i>0; i=i-1)
reg_fifo_{fifo_id}[i] <= reg_fifo_{fifo_id}[i-1];
reg_fifo_{fifo_id}[0] <= bram_fifo_{input_fifo_id} [bram_fifo_{input_fifo_id}_len-1];""".format(
fifo_id=i, input_fifo_id=input_fifo_id
)
)
code_gen_dict["$GENERATE_BUFFER_CONNECTION$"].append(
"""assign reg_fifo_{fifo_id}_in = bram_fifo_{input_fifo_id}_out;""".format(fifo_id=i, input_fifo_id=input_fifo_id))
for i in range(len(bram_fifos)):
input_fifo_id = i
code_gen_dict["$GENERATE_SHIFT_LOGIC$"].append(
"""for (i=bram_fifo_{fifo_id}_len-1; i>0; i=i-1)
bram_fifo_{fifo_id}[i] <= bram_fifo_{fifo_id}[i-1];
bram_fifo_{fifo_id}[0] <= reg_fifo_{input_fifo_id} [reg_fifo_{input_fifo_id}_len-1];""".format(
fifo_id=i, input_fifo_id=input_fifo_id
)
)
code_gen_dict["$GENERATE_BUFFER_CONNECTION$"].append(
"""assign bram_fifo_{fifo_id}_in = reg_fifo_{input_fifo_id}_out;""".format(fifo_id=i, input_fifo_id=input_fifo_id))
# Generate read schedule (when data is read from input, written to buffer)
# code_gen_dict["$GENERATE_READ_SCHEDULE$"] = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment