From 0f866f1e7123a625f13692ac7cc59c11b530da21 Mon Sep 17 00:00:00 2001
From: Felix Jentzsch <felix.jentzsch@upb.de>
Date: Tue, 22 Mar 2022 11:58:14 +0100
Subject: [PATCH] Replace microprogramming with controller

---
 finn-rtllib/swg/swg_hdl_template.v            | 206 ++++++++---
 .../convolutioninputgenerator_rtl.py          | 334 ++++++++++++++++--
 2 files changed, 458 insertions(+), 82 deletions(-)

diff --git a/finn-rtllib/swg/swg_hdl_template.v b/finn-rtllib/swg/swg_hdl_template.v
index 44fd41aba..88ef58531 100755
--- a/finn-rtllib/swg/swg_hdl_template.v
+++ b/finn-rtllib/swg/swg_hdl_template.v
@@ -1,11 +1,146 @@
-// ==============================================================
-// RTL generated by Vivado(TM) HLS - High-Level Synthesis from C, C++ and OpenCL
-// Version: 2020.1
-// Copyright (C) 1986-2020 Xilinx, Inc. All Rights Reserved.
-// 
-// ===========================================================
-
 `timescale 1 ns / 1 ps 
+
+module $TOP_MODULE_NAME$_controller
+(
+    CLK,
+    cycle,
+    cmd_read,
+    cmd_write
+);
+
+input CLK;
+input [31:0] cycle; //todo: minimize width or switch to single bit flag/advance wire
+output cmd_read;
+output cmd_write;
+
+////code generation part:
+//mapping of R/W command values to each state (START, MAIN_1, MAIN_2, INTER_1, INTER_2, END_1, END_2)            
+localparam [0:6] READ_CMD_MAP = $READ_CMD_MAP$;
+localparam [0:6] WRITE_CMD_MAP = $WRITE_CMD_MAP$;
+
+localparam START_COUNTER = $START_COUNTER$;
+localparam LOOP_MAIN_COUNTER = $LOOP_MAIN_COUNTER$;
+localparam LOOP_MAIN_1_COUNTER = $LOOP_MAIN_1_COUNTER$;
+localparam LOOP_MAIN_2_COUNTER = $LOOP_MAIN_2_COUNTER$;
+localparam LOOP_INTER_COUNTER = $LOOP_INTER_COUNTER$;
+localparam LOOP_INTER_1_COUNTER = $LOOP_INTER_1_COUNTER$;
+localparam LOOP_INTER_2_COUNTER = $LOOP_INTER_2_COUNTER$;
+localparam LOOP_END_1_COUNTER = $LOOP_END_1_COUNTER$;
+localparam LOOP_END_2_COUNTER = $LOOP_END_2_COUNTER$;
+////
+
+//state and counters
+reg [2:0] state, state_next;
+parameter STATE_START = 0, STATE_LOOP_MAIN_1 = 1, STATE_LOOP_MAIN_2 = 2, STATE_LOOP_INTER_1 = 3, STATE_LOOP_INTER_2 = 4, STATE_END_1 = 5, STATE_END_2 = 6;
+integer counter_current; //todo: minimize width
+integer counter_loop_main;
+integer counter_loop_inter;
+
+assign cmd_read = READ_CMD_MAP[state_next]; //read command indicates read in *upcoming* cycle, due to how schedule is constructed
+assign cmd_write = WRITE_CMD_MAP[state]; 
+
+reg cycle_last;
+wire cycle_advance;
+assign cycle_advance = !(cycle == cycle_last);
+
+//combinational next state logic
+always @ (state, counter_current, counter_loop_main, counter_loop_inter) begin
+    state_next = state; //default
+    case (state)
+        STATE_START:
+            if (counter_current == START_COUNTER-1)
+                state_next = STATE_LOOP_MAIN_1;
+
+        STATE_LOOP_MAIN_1:
+            if (counter_current == LOOP_MAIN_1_COUNTER-1)
+                state_next = STATE_LOOP_MAIN_2;
+
+        STATE_LOOP_MAIN_2: begin
+            if (counter_current == LOOP_MAIN_2_COUNTER-1) begin
+                state_next = STATE_LOOP_MAIN_1;
+                if (counter_loop_main == LOOP_MAIN_COUNTER-1) begin
+                    //no -1 because this counter marks the currently active iteration, not finished iterations
+                    if ((LOOP_INTER_COUNTER != 0) && (counter_loop_inter != LOOP_INTER_COUNTER))
+                        state_next = STATE_LOOP_INTER_1;
+                    else begin
+                        //there might not be an end sequence -> restart immediately
+                        if (LOOP_END_1_COUNTER != 0)
+                            state_next = STATE_END_1;
+                        else 
+                            state_next = STATE_START;
+                    end
+                end
+            end
+        end
+
+        STATE_LOOP_INTER_1: begin
+            if (counter_current == LOOP_INTER_1_COUNTER-1) begin
+                if (LOOP_INTER_2_COUNTER != 0)
+                    state_next = STATE_LOOP_INTER_2;
+                else 
+                    state_next = STATE_LOOP_MAIN_1;
+            end
+        end
+
+        STATE_LOOP_INTER_2:
+            if (counter_current == LOOP_INTER_2_COUNTER-1)
+                state_next = STATE_LOOP_MAIN_1;
+
+        STATE_END_1: begin
+            if (counter_current == LOOP_END_1_COUNTER-1) begin
+                if (LOOP_END_2_COUNTER != 0)
+                    state_next = STATE_END_2;
+                else
+                    state_next = STATE_START;
+            end
+        end
+
+        STATE_END_2:
+            if (counter_current == LOOP_END_2_COUNTER-1)
+                state_next = STATE_START;
+    endcase
+end
+
+//sequential logic
+always @ (posedge CLK) begin
+    if (cycle == 0) begin
+        counter_current <= 0;
+        counter_loop_main <= 0;
+        counter_loop_inter <= 0;
+        cycle_last <= 0;
+        state <= STATE_START;
+    end else begin
+        cycle_last <= cycle;
+        state <= state_next;
+
+        if (cycle_advance) begin
+            counter_current <= counter_current+1;
+        end
+
+        if (state != state_next) begin
+            counter_current <= 0;
+
+            //count up main loop upon re-entering this loop (not on first enter from start)
+            if ((state_next == STATE_LOOP_MAIN_1) && (state != STATE_START)) begin
+                if (counter_loop_main == LOOP_MAIN_COUNTER-1) begin
+                    counter_loop_main <= 0;
+                end else begin
+                    counter_loop_main <= counter_loop_main+1;
+                end
+            end
+
+            if (state_next == STATE_LOOP_INTER_1) begin
+                if (counter_loop_inter == LOOP_INTER_COUNTER) begin //no -1 because this counter marks the currently active iteration, not finished iterations
+                    counter_loop_inter <= 0;
+                end else begin
+                    counter_loop_inter <= counter_loop_inter+1;
+                end
+            end
+        end
+    end
+end
+endmodule //controller
+
 module $TOP_MODULE_NAME$_wb
 #(
     parameter IN_WIDTH = 1, //bit-width*C*MMV_in
@@ -63,7 +198,6 @@ module $TOP_MODULE_NAME$ (
         out_V_V_TREADY
 );
 
-//parameters
 parameter BIT_WIDTH = $BIT_WIDTH$;
 parameter SIMD = $SIMD$; //assuming SIMD=C for now
 parameter MMV_IN = $MMV_IN$; //assuming MMV_IN=1*M for now
@@ -71,7 +205,6 @@ parameter MMV_OUT = $MMV_OUT$; //assuming MMV_OUT=K*M for now
 parameter BUF_IN_WIDTH = BIT_WIDTH * SIMD * MMV_IN; //bit-width*C*MMV_in
 parameter BUF_OUT_ELEM_WIDTH = BIT_WIDTH * SIMD; //bit-width*C
 parameter BUF_OUT_WIDTH = BIT_WIDTH * SIMD * MMV_OUT; //bit-width*C*MMV_out
-
 parameter CYCLES_TOTAL = $CYCLES_TOTAL$;
 parameter BUF_ELEM_TOTAL = $BUF_ELEM_TOTAL$;
 
@@ -106,76 +239,63 @@ window_buffer_inst
     .data_out(window_buffer_out)
 );
 
-//FSM state
-//reg [1:0] state;
-//parameter STATE_RESET = 0, STATE_OPERATE = 1, S2 = 2;
-
-//main cycle counter (where either read/write/both happen, resets for each image)
-integer cycle;
-integer cycle_last;
-
-//read/write loop state
-wire read_state;
-wire write_state;
+integer cycle; //main cycle counter (where either read/write/both happen, resets for each image)
+wire read_cmd;
+wire write_cmd;
 reg write_done; //keep track if W of current cycle was already completed, but we still wait on a R in the same cycle
 
+$TOP_MODULE_NAME$_controller
+controller_inst
+(
+    .CLK(ap_clk),
+    .cycle(cycle),
+    .cmd_read(read_cmd),
+    .cmd_write(write_cmd)
+);
+
 wire write_blocked;
-assign write_blocked = write_state && !out_V_V_TREADY && !write_done;
+assign write_blocked = write_cmd && !out_V_V_TREADY && !write_done;
 
 wire read_ok;
 // with transition to next cycle:
 //              want to read      can read       source is ready (waiting on VALID allowed)
-assign read_ok = read_state && !write_blocked && in0_V_V_TVALID;
+assign read_ok = read_cmd && !write_blocked && in0_V_V_TVALID;
 
 wire write_ok;
 // with transition to next cycle:
 //              output is VALID   sink is ready  sink has already read (we are waiting on source)
-assign write_ok = write_state && (out_V_V_TREADY || write_done);
+assign write_ok = write_cmd && (out_V_V_TREADY || write_done);
 
 wire advance;
-//            includes waiting on W    if W-only cycle: wait only on W
-assign advance =      read_ok        ||   (!read_state && write_ok);
+//            includes waiting on W    if W-only cycle: wait only on W     no R/W to wait for
+assign advance =      read_ok        ||   (!read_cmd && write_ok)    || (!read_cmd && !write_cmd);
 
 //assign buffer control
-//todo: if mmv_out < k: might not shift and/or write for multiple read_state cycles
+//todo: if mmv_out < k: might not shift and/or write for multiple read_cmd cycles
 assign window_buffer_shift_enable = advance;
 
 //assign I/O ports
 assign window_buffer_in = in0_V_V_TDATA;
 assign out_V_V_TDATA = window_buffer_out;
 assign in0_V_V_TREADY = ap_rst_n && read_ok; //only asserted if data is available and we can store it (allowed)
-assign out_V_V_TVALID = ap_rst_n && write_state && !write_done; //only asserted if we have data available and it has not been read yet (don't wait for READY from sink)
-
-//read schedule
-//todo: generate differently
-$GENERATE_READ_SCHEDULE$
-
-//write schedule
-//todo: generate differently
-$GENERATE_WRITE_SCHEDULE$
+assign out_V_V_TVALID = ap_rst_n && write_cmd && !write_done; //only asserted if we have data available and it has not been read yet (don't wait for READY from sink)
 
 //main process for advancing cycle count
 always @ (posedge ap_clk) begin
     if (ap_rst_n == 1'b0) begin
         cycle <= 0;
-        cycle_last <= 0;
     end else begin
         if (advance) begin
             write_done <= 1'b0; //reset flag
 
             //count cycle (completed R or W or both (depending on current cycle))
-            cycle_last <= cycle; //cycle last is used to generate write_state (due to how schedule is constructed)
             if (cycle == CYCLES_TOTAL-1)
                 cycle <= 0;
             else 
                 cycle <= cycle+1; 
         
-        end else begin
-            if (write_ok) begin
-                // successful W in this cycle, but R still outstanding
-                write_done <= 1'b1; //write can happen even if read is blocked, but only for the current cycle!
-            end
-        end
+        end else if (write_ok) // successful W in this cycle, but R still outstanding
+            write_done <= 1'b1; //write can happen even if read is blocked, but only for the current cycle!
     end
 end
 
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py
index e0285cd47..a54dea916 100755
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py
@@ -505,7 +505,7 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
         # example:
         # 0: only consecutive access patterns will be implemented in regs, rest in BRAM line buffers
         # 2: [0, 3, 6] access pattern is still allowed and will be implemented with 1 7-position shift reg
-        REG_BRAM_THRESHOLD = 1
+        REG_BRAM_THRESHOLD = 9999
         #--------------------
 
         in_shape = (n,c,h,w) #NCHW
@@ -595,6 +595,10 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
         # buffer schedule (write from input, read to output)
         schedule_write = []
         schedule_read = []
+
+        schedule = []
+        schedule_prev = ''
+
         next_in_px = 0
 
         idx_px_relative = idx_px.copy()
@@ -611,6 +615,12 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
                         next_in_px += 1
                     schedule_write.append(1)
                     schedule_read.append(0)
+                    if schedule_prev == 'w':
+                        count, cmd = schedule[-1]
+                        schedule[-1] = (count+1, cmd)
+                    else:
+                        schedule.append((1, 'w'))
+                        schedule_prev = 'w'
             
             # discard unused buffer elements (assumes in-order access)
             oldest_px = min(idx_px_relative[:,x])
@@ -635,12 +645,24 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
             # simultaneously load next pixel(s) into buffer if there are any left
             if next_in_px > (h_padded*w_padded-1):
                 schedule_write.append(0)
+                if schedule_prev == 'r':
+                    count, cmd = schedule[-1]
+                    schedule[-1] = (count+1, cmd)
+                else:
+                    schedule.append((1, 'r'))
+                    schedule_prev = 'r'
             else:
                 # load M inputs at once
                 for m in range(M):
                     buffer.append(next_in_px)
                     next_in_px += 1
                 schedule_write.append(1)
+                if schedule_prev == 'wr':
+                    count, cmd = schedule[-1]
+                    schedule[-1] = (count+1, cmd)
+                else:
+                    schedule.append((1, 'wr'))
+                    schedule_prev = 'wr'
 
 
         # find buffer access patterns
@@ -649,7 +671,198 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
             if idx_px_relative[:,x].tolist() not in buffer_access_patterns:
                 buffer_access_patterns.append(idx_px_relative[:,x].tolist())
                 
+        # from itertools import groupby
+        # schedule_write_compressed = ''.join('(' + str(k) + ',' + str(sum(1 for x in g)) + '),' for k, g in groupby(schedule_write))
+        # schedule_read_compressed = ''.join('(' + str(k) + ',' + str(sum(1 for x in g)) + '),' for k, g in groupby(schedule_read))
+
+        # analyse schedule
+        # class sched_gen:
+        #     start_counter = 0
+        #     start_val = 0
+
+        #     end_last_sequence_counter = 0
+        #     end_sequence = []
+
+        #     outer_counter = 0
+        #     outer_sequence_counter = 0
+        #     outer_sequence_val = 0
+
+        #     inner_counter = 0
+        #     inner_sequence = []
+
+        #     def __str__(self):
+        #         return "\nstart: %d x %d\n %d x\n   %d x %s + %d x %d\nend: %d x %s + %s\n" % (
+        #             self.start_counter,
+        #             self.start_val,
+        #             self.outer_counter,
+        #             self.inner_counter,
+        #             str(self.inner_sequence),
+        #             self.outer_sequence_counter,
+        #             self.outer_sequence_val,
+        #             self.end_last_sequence_counter,
+        #             str(self.inner_sequence),
+        #             self.end_sequence
+        #         )
+
+        
+        # def analyse_schedule(schedule):
+        #     generator = sched_gen()
+            
+        #     #determine start sequence
+        #     for i, v in enumerate(schedule):
+        #         if i > 0 and v != schedule[i-1]:
+        #             generator.start_counter = i
+        #             generator.start_val = schedule[i-1]
+        #             break
+
+        #     #determine inner loop/sequence
+        #     sequence_MAX = 10
+        #     schedule = schedule[generator.start_counter:] # cut off processed entries
+        #     sequence = []
+        #     repititions = 0
+        #     i = 0
+        #     while i < len(schedule):
+        #         if not sequence:
+        #             sequence.append(schedule[i])
+        #             i = i+1
+        #         else:
+        #             # is this a beginning of a repitition of the current sequence?
+        #             if i + len(sequence) < len(schedule) and all([schedule[i+offset] == sequence[offset] for offset in range(len(sequence))]):  
+        #                 repititions = repititions + 1
+        #                 i = i+len(sequence)
+        #             else:
+        #                 # did we already count repitions of the sequence?
+        #                 sequence_candidate = sequence + sequence * repititions
+        #                 sequence_candidate.append(schedule[i])
+        #                 if len(sequence_candidate) < sequence_MAX:
+        #                     sequence = sequence_candidate.copy()
+        #                     repititions = 0
+        #                     i = i+1
+        #                 else:
+        #                     schedule = schedule[i:] # cut off processed entries
+        #                     break
+        #     generator.inner_counter = repititions + 1
+        #     generator.inner_sequence = sequence
+            
+        #     #determine outer sequence
+        #     for i, v in enumerate(schedule):
+        #         if i > 0 and v != schedule[i-1]:
+        #             generator.outer_sequence_counter = i
+        #             generator.outer_sequence_val = schedule[i-1]
+        #             break
+
+        #     schedule = schedule[generator.outer_sequence_counter:] # cut off processed entries
+
+        #     sequence_to_compare = generator.inner_sequence * generator.inner_counter + [generator.outer_sequence_val] * generator.outer_sequence_counter
+
+        #     generator.outer_counter = 1
+        #     i = 0
+        #     while i < len(schedule):
+        #         # is this a beginning of a repitition of the current sequence?
+        #         if i + len(sequence_to_compare) < len(schedule) and all([schedule[i+offset] == sequence_to_compare[offset] for offset in range(len(sequence_to_compare))]):
+        #             generator.outer_counter = generator.outer_counter + 1
+        #             i = i+len(sequence_to_compare)
+        #         else:
+        #             schedule = schedule[i:] # cut off processed entries
+        #             break
+
+        #     #determine end sequence
+        #     #for i, v in enumerate(schedule):
+        #     #    if i > 0 and v != schedule[i-1]:
+        #     #        generator.end_counter = i
+        #     #        generator.end_val = schedule[i-1]
+        #     #        break
         
+        #     sequence = generator.inner_sequence
+        #     repititions = 0
+        #     i = 0
+        #     while i < len(schedule):
+        #         # is this a beginning of a repitition of the current sequence?
+        #         if i + len(sequence) < len(schedule) and all([schedule[i+offset] == sequence[offset] for offset in range(len(sequence))]):  
+        #             repititions = repititions + 1
+        #             i = i+len(sequence)
+        #         else:
+        #             schedule = schedule[i:] # cut off processed entries
+        #             break
+        #     generator.end_last_sequence_counter = repititions
+
+        #     #remainder
+        #     generator.end_sequence = schedule
+
+        #     return generator
+
+        def compact_schedule(schedule):
+
+            # leave first sequence (pre-load) as is
+            start_sequence = schedule[0]
+
+            loop_sequence_1_counter = 1
+            loop_sequence_1 = schedule[1]
+
+            loop_counter = 0
+            loop_sequence_2 = None
+            end_sequence = None
+
+            i = 2
+            if i < len(schedule):
+                loop_sequence_1 += schedule[i]
+                i += 1
+
+            while i+1 < len(schedule):
+                candidate = schedule[i] + schedule[i+1]
+                if candidate == loop_sequence_1:
+                    loop_sequence_1_counter += 1
+                    i += 2
+                else:
+                    break
+
+            if i < len(schedule):
+                loop_sequence_2 = schedule[i]
+                i += 1
+
+            if i+1 < len(schedule):
+                candidate = schedule[i] + schedule[i+1]
+                if candidate != loop_sequence_1:
+                    loop_sequence_2 += schedule[i]
+
+                i -= 1
+                loop_sequence_total_len = (int(len(loop_sequence_2)/2)) + loop_sequence_1_counter*(int(len(loop_sequence_1)/2))
+                loop_sequence_total = loop_sequence_2 + loop_sequence_1_counter*loop_sequence_1
+                while i+loop_sequence_total_len < len(schedule):
+                    candidate = schedule[i] 
+                    for x in range (i+1, i+loop_sequence_total_len):
+                        candidate += schedule[x]
+
+                    if candidate == loop_sequence_total:
+                        loop_counter += 1
+                        i += loop_sequence_total_len
+                    else:
+                        break
+
+            else:
+                if i < len(schedule):
+                    end_sequence = loop_sequence_2 + schedule[i]
+                    i += 1
+                    loop_sequence_2 = None
+                else:
+                    end_sequence = loop_sequence_2
+                    loop_sequence_2 = None
+
+            if i < len(schedule):
+                end_sequence = schedule[i]
+                i += 1
+
+            assert i == len(schedule), "ERROR: schedule could not be compacted %d / %d" %(i, len(schedule))
+
+            return (
+                   start_sequence,
+                   loop_counter,
+                   loop_sequence_1_counter,
+                   loop_sequence_1,
+                   loop_sequence_2,
+                   end_sequence
+                )
+
         f_debug.write("\n"+"max buffer size observed: %d" %(buffer_max_size))
         f_debug.write("\n"+"output vector elements: relative buffer indices")
         f_debug.write("\n"+str(idx_px_relative))
@@ -659,9 +872,21 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
         f_debug.write("\n"+"buffer write schedule (%d cycles)" % len(schedule_write))
         f_debug.write("\n"+str(schedule_write))
         f_debug.write("\n"+"writing buffer in %d cycles" % schedule_write.count(1))
+        #f_debug.write("\n"+"buffer write schedule COMPRESSED")
+        #f_debug.write("\n"+str(schedule_write_compressed))
+        #f_debug.write("\n"+"buffer write schedule ANALYZED")
+        #f_debug.write("\n"+str(analyse_schedule(schedule_write)))
         f_debug.write("\n"+"buffer read schedule (%d cycles)" % len(schedule_read))
         f_debug.write("\n"+str(schedule_read))
         f_debug.write("\n"+"reading buffer in %d cycles" % schedule_read.count(1))
+        #f_debug.write("\n"+"buffer read schedule COMPRESSED")
+        #f_debug.write("\n"+str(schedule_read_compressed))
+        #f_debug.write("\n"+"buffer read schedule ANALYZED")
+        #f_debug.write("\n"+str(analyse_schedule(schedule_read)))
+        f_debug.write("\n"+"buffer rw schedule NEW")
+        f_debug.write("\n"+str(schedule))
+        f_debug.write("\n"+"buffer rw schedule NEW compacted")
+        f_debug.write("\n"+"\nstart_sequence: %s\nloop_counter: %s\nloop_sequence_1_counter: %s\nloop_sequence_1: %s\nloop_sequence_2: %s\nend_sequence: %s\n" % compact_schedule(schedule))
 
         assert len(schedule_write) == len(schedule_read), "ERROR: Schedules have different lenghts"
         cycles_total = len(schedule_write)
@@ -704,7 +929,7 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
                 else:
                     # assign skipped accesses to new BRAM FIFO
                     bram_fifos.append([-1]*(distance-1))
-                    bram_fifos_depth.append((distance-1)/M)
+                    bram_fifos_depth.append(math.ceil((distance-1)/M)) # really ceil?
                     # start with new REG FIFO
                     reg_fifos.append(current)
                     reg_fifos_depth.append(math.ceil((max(current)+1)/M))
@@ -787,38 +1012,76 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
             )
 
         # Generate read schedule (when data is read from input, written to buffer)
-        code_gen_dict["$GENERATE_READ_SCHEDULE$"] = []
-        schedule_as_string = ""
-        #todo: change naming to swap write/read
-        for i in schedule_write:
-            if i == 1:
-                schedule_as_string += "1'b1,"
+        # code_gen_dict["$GENERATE_READ_SCHEDULE$"] = []
+        # schedule_as_string = ""
+        # #todo: change naming to swap write/read
+        # for i in schedule_write:
+        #     if i == 1:
+        #         schedule_as_string += "1'b1,"
+        #     else:
+        #         schedule_as_string += "1'b0,"
+        # schedule_as_string = schedule_as_string[:-1] # remove trailing ','
+        # code_gen_dict["$GENERATE_READ_SCHEDULE$"].append(
+        #     "localparam [0:{len}-1] READ_SCHEDULE = {{{str}}};".format(len=cycles_total, str=schedule_as_string)
+        # )
+        # code_gen_dict["$GENERATE_READ_SCHEDULE$"].append(
+        #     "assign read_state = READ_SCHEDULE[cycle];"
+        # )
+
+        # # Generate write schedule (when data is written to output, read from buffer)
+        # code_gen_dict["$GENERATE_WRITE_SCHEDULE$"] = []
+        # schedule_as_string = ""
+        # #todo: change naming to swap write/read
+        # for i in schedule_read:
+        #     if i == 1:
+        #         schedule_as_string += "1'b1,"
+        #     else:
+        #         schedule_as_string += "1'b0,"
+        # schedule_as_string = schedule_as_string[:-1] # remove trailing ','
+        # code_gen_dict["$GENERATE_WRITE_SCHEDULE$"].append(
+        #     "localparam [0:{len}-1] WRITE_SCHEDULE = {{{str}}};".format(len=cycles_total, str=schedule_as_string)
+        # )
+        # code_gen_dict["$GENERATE_WRITE_SCHEDULE$"].append(
+        #     "assign write_state = WRITE_SCHEDULE[cycle_last];"
+        # )
+
+        def convert_tuple(seq):
+            mapping = {'w': ("1'b1", "1'b0"),
+                        'r': ("1'b0", "1'b1"),
+                        'wr':("1'b1", "1'b1"),
+                        'n': ("1'b0", "1'b0")}
+            if seq:
+                if len(seq) == 2:
+                    return (seq[0], mapping[seq[1]], 0, mapping['n'])
+                if len(seq) == 4:
+                    return (seq[0], mapping[seq[1]], seq[2], mapping[seq[3]])
             else:
-                schedule_as_string += "1'b0,"
-        schedule_as_string = schedule_as_string[:-1] # remove trailing ','
-        code_gen_dict["$GENERATE_READ_SCHEDULE$"].append(
-            "localparam [0:{len}-1] READ_SCHEDULE = {{{str}}};".format(len=cycles_total, str=schedule_as_string)
-        )
-        code_gen_dict["$GENERATE_READ_SCHEDULE$"].append(
-            "assign read_state = READ_SCHEDULE[cycle];"
-        )
+                return (0, mapping['n'], 0, mapping['n'])
 
-        # Generate write schedule (when data is written to output, read from buffer)
-        code_gen_dict["$GENERATE_WRITE_SCHEDULE$"] = []
-        schedule_as_string = ""
-        #todo: change naming to swap write/read
-        for i in schedule_read:
-            if i == 1:
-                schedule_as_string += "1'b1,"
-            else:
-                schedule_as_string += "1'b0,"
-        schedule_as_string = schedule_as_string[:-1] # remove trailing ','
-        code_gen_dict["$GENERATE_WRITE_SCHEDULE$"].append(
-            "localparam [0:{len}-1] WRITE_SCHEDULE = {{{str}}};".format(len=cycles_total, str=schedule_as_string)
-        )
-        code_gen_dict["$GENERATE_WRITE_SCHEDULE$"].append(
-            "assign write_state = WRITE_SCHEDULE[cycle_last];"
-        )
+        start_sequence,loop_counter,loop_sequence_1_counter,loop_sequence_1,loop_sequence_2,end_sequence = compact_schedule(schedule)
+
+        start_sequence = convert_tuple(start_sequence)
+        loop_sequence_1 = convert_tuple(loop_sequence_1)
+        loop_sequence_2 = convert_tuple(loop_sequence_2)
+        end_sequence = convert_tuple(end_sequence)
+
+        code_gen_dict["$START_COUNTER$"]=[str(start_sequence[0])]
+        code_gen_dict["$LOOP_MAIN_COUNTER$"]=[str(loop_sequence_1_counter)]
+        code_gen_dict["$LOOP_INTER_COUNTER$"]=[str(loop_counter)]
+
+        code_gen_dict["$LOOP_MAIN_1_COUNTER$"]=[str(loop_sequence_1[0])]
+        code_gen_dict["$LOOP_MAIN_2_COUNTER$"]=[str(loop_sequence_1[2])]
+
+        code_gen_dict["$LOOP_INTER_1_COUNTER$"]=[str(loop_sequence_2[0])]
+        code_gen_dict["$LOOP_INTER_2_COUNTER$"]=[str(loop_sequence_2[2])]
+
+        code_gen_dict["$LOOP_END_1_COUNTER$"]=[str(end_sequence[0])]
+        code_gen_dict["$LOOP_END_2_COUNTER$"]=[str(end_sequence[2])]
+
+        code_gen_dict["$READ_CMD_MAP$"]=["{{ {}, {}, {}, {}, {}, {}, {} }}".format(
+            start_sequence[1][0],loop_sequence_1[1][0],loop_sequence_1[3][0],loop_sequence_2[1][0],loop_sequence_2[3][0],end_sequence[1][0],end_sequence[3][0])]
+        code_gen_dict["$WRITE_CMD_MAP$"]=["{{ {}, {}, {}, {}, {}, {}, {} }}".format(
+            start_sequence[1][1],loop_sequence_1[1][1],loop_sequence_1[3][1],loop_sequence_2[1][1],loop_sequence_2[3][1],end_sequence[1][1],end_sequence[3][1])]
 
         with open("/workspace/finn/finn-rtllib/swg/swg_hdl_template.v", "r") as f:
             template = f.read()
@@ -871,17 +1134,10 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp):
         """Constructs and returns the TCL for node instantiation in Vivado IPI."""
         vlnv = self.get_nodeattr("ip_vlnv")
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
-        
-        #cmd = ["create_bd_cell -type ip -vlnv %s %s" % (vlnv, self.onnx_node.name)]
 
         cmd = ["add_files -norecurse %s" % (os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_hdl_gen.v")),
             "create_bd_cell -type module -reference %s %s" % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)]
 
-        #update_compile_order -fileset sources_1
-        #add_files -norecurse C:/Users/felix/Downloads/swg_hdl_generated.v
-        #update_compile_order -fileset sources_1
-        #create_bd_cell -type module -reference ConvolutionInputGenerator_rtl_0_ConvolutionInputGenerator_rtl_0 ConvolutionInputGene_0
-
         return cmd
 
     def code_generation_ipgen(self, model, fpgapart, clk):
-- 
GitLab