diff --git a/finn-rtllib/swg/swg_hdl_template.v b/finn-rtllib/swg/swg_hdl_template.v index b0e00ea4d23395a446cc3522e3d2d61b158dc5e3..195075724565081d11c055351f6199153d37746f 100755 --- a/finn-rtllib/swg/swg_hdl_template.v +++ b/finn-rtllib/swg/swg_hdl_template.v @@ -6,7 +6,7 @@ // =========================================================== `timescale 1 ns / 1 ps -module window_buffer +module $TOP_MODULE_NAME$_wb #( parameter IN_WIDTH = 1, //c*bit-width parameter OUT_WIDTH = 1, //c*bit-width*MMV_out @@ -76,9 +76,11 @@ parameter BUF_ELEM_TOTAL = $BUF_ELEM_TOTAL$; //IO ports input ap_clk; input ap_rst_n; +(* X_INTERFACE_PARAMETER = "FREQ_HZ 250000000.000000" *) input [BUF_IN_WIDTH-1:0] in0_V_V_TDATA; input in0_V_V_TVALID; output in0_V_V_TREADY; +(* X_INTERFACE_PARAMETER = "FREQ_HZ 250000000.000000" *) output [BUF_OUT_WIDTH-1:0] out_V_V_TDATA; output out_V_V_TVALID; input out_V_V_TREADY; @@ -87,7 +89,7 @@ input out_V_V_TREADY; wire [BUF_IN_WIDTH-1:0] window_buffer_in; wire [BUF_OUT_WIDTH-1:0] window_buffer_out; wire window_buffer_shift_enable; -window_buffer +$TOP_MODULE_NAME$_wb #( .IN_WIDTH(BUF_IN_WIDTH), .OUT_WIDTH(BUF_OUT_WIDTH), @@ -102,28 +104,44 @@ window_buffer_inst ); //FSM state -reg [1:0] state; -parameter STATE_RESET = 0, STATE_OPERATE = 1, S2 = 2; +//reg [1:0] state; +//parameter STATE_RESET = 0, STATE_OPERATE = 1, S2 = 2; //main cycle counter (where either read/write/both happen, resets for each image) integer cycle; +integer cycle_last; //read/write loop state wire read_state; wire write_state; +reg write_done; //keep track if W of current cycle was already completed, but we still wait on a R in the same cycle -//output registers -reg out_V_V_TVALID_reg; +wire write_blocked; +assign write_blocked = write_state && !out_V_V_TREADY && !write_done; + +wire read_ok; +// with transition to next cycle: +// want to read can read source is ready (waiting on VALID allowed) +assign read_ok = read_state && !write_blocked && in0_V_V_TVALID; + +wire write_ok; +// with transition to next cycle: +// output is VALID sink is ready sink has already read (we are waiting on source) +assign write_ok = write_state && (out_V_V_TREADY || write_done); + +wire advance; +// includes waiting on W if W-only cycle: wait only on W +assign advance = read_ok || (!read_state && write_ok); //assign buffer control //todo: if mmv_out < k: might not shift and/or write for multiple read_state cycles -assign window_buffer_shift_enable = (read_state && in0_V_V_TVALID) || write_state; +assign window_buffer_shift_enable = advance; //assign I/O ports assign window_buffer_in = in0_V_V_TDATA; -assign in0_V_V_TREADY = read_state; //accept data whenever read loop wants to read -assign out_V_V_TDATA = window_buffer_out; //out_V_V_TDATA_reg; -assign out_V_V_TVALID = out_V_V_TVALID_reg; +assign out_V_V_TDATA = window_buffer_out; +assign in0_V_V_TREADY = ap_rst_n && read_ok; //only asserted if data is available and we can store it (allowed) +assign out_V_V_TVALID = ap_rst_n && write_state && !write_done; //only asserted if we have data available and it has not been read yet (don't wait for READY from sink) //read schedule //todo: generate differently @@ -133,52 +151,29 @@ $GENERATE_READ_SCHEDULE$ //todo: generate differently $GENERATE_WRITE_SCHEDULE$ -//read process (writing to buffer) +//main process for advancing cycle count always @ (posedge ap_clk) begin if (ap_rst_n == 1'b0) begin - state <= STATE_RESET; + cycle <= 0; + cycle_last <= 0; end else begin - case (state) - STATE_RESET: begin - state <= STATE_OPERATE; + if (advance) begin + write_done <= 1'b0; //reset flag + + //count cycle (completed R or W or both (depending on current cycle)) + cycle_last <= cycle; //cycle last is used to generate write_state (due to how schedule is constructed) + if (cycle == CYCLES_TOTAL-1) cycle <= 0; + else + cycle <= cycle+1; + + end else begin + if (write_ok) begin + // successful W in this cycle, but R still outstanding + write_done <= 1'b1; //write can happen even if read is blocked, but only for the current cycle! end - STATE_OPERATE: begin - if (read_state && in0_V_V_TVALID) begin - //read into buffer - //done in concurrent assignment - //count cycle (R) - cycle <= cycle+1; - if (cycle == CYCLES_TOTAL-1) - state <= STATE_RESET; - end else if (write_state && out_V_V_TREADY) begin - cycle <= cycle+1; //count cycle (or W) - if (cycle == CYCLES_TOTAL-1) - state <= STATE_RESET; - end - end - endcase + end end end -//write process (reading from buffer) -always @ (posedge ap_clk) begin - if (ap_rst_n == 1'b0) begin - end else begin - case (state) - STATE_RESET: begin - end - STATE_OPERATE: begin - if (write_state && out_V_V_TREADY) begin - //write from buffer - //todo: VALID seems to be deasserted 1 cycle too late?! - out_V_V_TVALID_reg <= 1'b1; - end else begin - out_V_V_TVALID_reg <= 1'b0; - end - end - endcase - end -end - endmodule //ConvolutionInputGenerator1D_0_ConvolutionInputGenerator1D_0 diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py index 9908bbb30d2dd6669ecdc44d3568dcadc3ac17ad..2e8e8ec75e95440f2d5131fcaff4724792711219 100755 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator_rtl.py @@ -96,6 +96,7 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): "distributed", {"auto", "block", "distributed", "ultra"}, ), + "gen_top_module": ("s", False, ""), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -348,7 +349,12 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): # TODO ensure codegen dir exists if mode == "cppsim": - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + #code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + raise Exception( + """cppsim not possible for RTL SWG""".format( + mode + ) + ) elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") else: @@ -377,44 +383,27 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): reshaped_input = inp.copy() np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) - if mode == "cppsim": - # execute the precompiled model - super().exec_precompiled_singlenode_model() - # load output npy file - super().npy_to_dynamic_output(context) - assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim \ - did not produce expected ofolded utput shape" - context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) - elif mode == "rtlsim": - sim = self.get_rtlsim() - nbits = self.get_instream_width() - rtlsim_inp = npy_to_rtlsim_input( - "{}/input_0.npy".format(code_gen_dir), export_idt, nbits - ) - super().reset_rtlsim(sim) - super().toggle_clk(sim) - rtlsim_output = self.rtlsim(sim, rtlsim_inp) - odt = export_idt - target_bits = odt.bitwidth() - packed_bits = self.get_outstream_width() - out_npy_path = "{}/output.npy".format(code_gen_dir) - out_shape = self.get_folded_output_shape() - rtlsim_output_to_npy( - rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits - ) - # load and reshape output - output = np.load(out_npy_path) - output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) - context[node.output[0]] = output - else: - raise Exception( - """Invalid value for attribute exec_mode! Is currently set to: {} - has to be set to one of the following value ("cppsim", "rtlsim")""".format( - mode - ) - ) + sim = self.get_rtlsim() + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "{}/input_0.npy".format(code_gen_dir), export_idt, nbits + ) + super().reset_rtlsim(sim) + super().toggle_clk(sim) + rtlsim_output = self.rtlsim(sim, rtlsim_inp) + odt = export_idt + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + out_npy_path = "{}/output.npy".format(code_gen_dir) + out_shape = self.get_folded_output_shape() + rtlsim_output_to_npy( + rtlsim_output, out_npy_path, odt, out_shape, packed_bits, target_bits + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshape) + context[node.output[0]] = output + # binary -> bipolar if needed if self.get_output_datatype() == DataType["BIPOLAR"]: out = context[node.output[0]] @@ -426,244 +415,37 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): shape doesn't match expected shape (1, ofm_dim_h, ofm_dim_w, k_h*k_w*ifm_ch).""" def global_includes(self): - self.code_gen_dict["$GLOBALS$"] = ['#include "slidingwindow.h"'] + pass def defines(self, var): - numReps = 1 - ( - ifm_ch, - ifm_dim, - ofm_dim, - k, - stride, - dilation, - ) = self.get_1d_conv_attrs_normalized() - simd = self.get_nodeattr("SIMD") - ifm_precision = self.get_input_datatype().bitwidth() - ifm_dim_y, ifm_dim_x = ifm_dim - ofm_dim_y, ofm_dim_x = ofm_dim - k_y, k_x = k - dilation_y, dilation_x = dilation - # For a 1d convolution with stride=[S,1] or [1,S], the finn-hlslib function - # of ConvInpGen must be created with [stride_y, stride_x] = [S, S]. - # TODO: changes in finn-hlslib (slidingwindow.h) - stride_y = np.prod(stride) - stride_x = np.prod(stride) - - if dilation_x > 1: - assert ( - dilation_y == 1 - ), "Dilation value greater than 1 along y-axis is not yet supported" - self.code_gen_dict["$DEFINES$"] = [ - """ - #define ConvKernelDim1_x {}\n - #define ConvKernelDim1_y {}\n - #define IFMChannels1 {}\n - #define Input_precision1 {}\n - #define IFMDim1_x {}\n - #define IFMDim1_y {}\n - #define OFMDim1_x {}\n - #define OFMDim1_y {}\n - #define SIMD1 {}\n - #define Stride1_x {}\n - #define Stride1_y {}\n - #define Dilation1_x {}\n - #define Dilation1_y {}\n - #define numReps {} - """.format( - k_x, - k_y, - ifm_ch, - ifm_precision, - ifm_dim_x, - ifm_dim_y, - ofm_dim_x, - ofm_dim_y, - simd, - stride_x, - stride_y, - dilation_x, - dilation_y, - numReps, - ) - ] - else: - ofm_dim = self.get_nodeattr("OFMDim") - self.code_gen_dict["$DEFINES$"] = [ - """ - #define ConvKernelDim1_x {}\n - #define ConvKernelDim1_y {}\n - #define IFMChannels1 {}\n - #define Input_precision1 {}\n - #define IFMDim1_x {}\n - #define IFMDim1_y {}\n - #define OFMDim1_x {}\n - #define OFMDim1_y {}\n - #define SIMD1 {}\n - #define Stride1_x {}\n - #define Stride1_y {}\n - #define numReps {} - """.format( - k_x, - k_y, - ifm_ch, - ifm_precision, - ifm_dim_x, - ifm_dim_y, - ofm_dim_x, - ofm_dim_y, - simd, - stride_x, - stride_y, - numReps, - ) - ] + pass def read_npy_data(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_input_datatype() - if dtype == DataType["BIPOLAR"]: - # use binary for bipolar storage - dtype = DataType["BINARY"] - elem_bits = dtype.bitwidth() - packed_bits = self.get_instream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - elem_hls_type = dtype.get_hls_datatype_str() - npy_type = "float" - npy_in = "%s/input_0.npy" % code_gen_dir - self.code_gen_dict["$READNPYDATA$"] = [] - self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) - ) + pass def strm_decl(self): - self.code_gen_dict["$STREAMDECLARATIONS$"] = [] - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) - ) - self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) - ) + pass def docompute(self): - ram_style = self.get_nodeattr("ram_style") - map_to_hls_ram_style = { - "auto": "ap_resource_dflt()", - "block": "ap_resource_bram()", - "distributed": "ap_resource_lutram()", - "ultra": "ap_resource_uram()", - } - hls_ram_style = map_to_hls_ram_style[ram_style] - - # check which ConvolutionInputGenerator is needed - if self.use_parallel_window_output(): - hls_call = "ConvolutionInputGenerator_1D_parallel" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, - IFMDim1_x, OFMDim1_x, SIMD1, Stride1_x> - (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] - else: - hls_call = "ConvolutionInputGenerator_NonSquare" - dilation_h, dilation_w = self.get_nodeattr("Dilation") - if dilation_h > 1 or dilation_w > 1: - hls_call += "_Dilated" - if self.get_nodeattr("depthwise") == 1: - hls_call += "_dws" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y, Dilation1_x, Dilation1_y> - (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] - elif self.get_nodeattr("depthwise") == 1: - hls_call += "_dws" - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] - else: - self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<ConvKernelDim1_x, ConvKernelDim1_y, IFMChannels1, - Input_precision1, IFMDim1_x, IFMDim1_y, OFMDim1_x, OFMDim1_y, - SIMD1, Stride1_x, Stride1_y> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style - ) - ] + pass def dataoutstrm(self): - code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") - dtype = self.get_output_datatype() - if dtype == DataType["BIPOLAR"]: - # use binary for bipolar storage - dtype = DataType["BINARY"] - elem_bits = dtype.bitwidth() - packed_bits = self.get_outstream_width() - packed_hls_type = "ap_uint<%d>" % packed_bits - elem_hls_type = dtype.get_hls_datatype_str() - npy_type = "float" - npy_out = "%s/output.npy" % code_gen_dir - oshape = self.get_folded_output_shape() - oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") - if self.use_parallel_window_output(): - # pass the number of pixels in the folded output to apintstream2npy, needed - # to unpack the ouput correctly and reverse only the inner SIMD dimension - k_h, k_w = self.get_nodeattr("ConvKernelDim") - multi_pixel_out = k_h * k_w - else: - multi_pixel_out = 1 - - self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", true, 1, %d);' - % ( - packed_hls_type, - elem_hls_type, - elem_bits, - npy_type, - oshape_cpp_str, - npy_out, - multi_pixel_out, - ) - ] + pass def save_as_npy(self): - self.code_gen_dict["$SAVEASCNPY$"] = [] + pass def blackboxfunction(self): - if self.use_parallel_window_output(): - self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0, - hls::stream<ap_uint<ConvKernelDim1_x*SIMD1*Input_precision1>> - &out)""".format( - self.onnx_node.name - ) - ] - else: - self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0, - hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format( - self.onnx_node.name - ) - ] + pass def pragmas(self): - self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] - self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") - self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE ap_ctrl_none port=return" - ) + pass def generate_hdl(self): - #todo: generate into some code gen dict - f_debug = open(os.path.join("/workspace/finn/finn-rtllib/swg/", "swg_hdl_debuginfo.log"), "w") + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + f_debug = open(os.path.join(code_gen_dir, "swg_hdl_debuginfo.log"), "w") + #debug: + #f_debug = open(os.path.join("/workspace/finn/finn-rtllib/swg/", "swg_hdl_debuginfo.log"), "w") code_gen_dict = {} #-------------------- @@ -844,6 +626,8 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): assert schedule_read.count(1) == self.get_number_output_values(), "ERROR: Reading buffer in fewer cycles than expected" code_gen_dict["$TOP_MODULE_NAME$"] = [self.get_verilog_top_module_name()] + #save top module name so we can refer to it even after this node has been renamed (e.g. by GiveUniqueNodeNames(prefix) during MakeZynqProject) + self.set_nodeattr("gen_top_module", self.get_verilog_top_module_name()) code_gen_dict["$BIT_WIDTH$"] = [str(self.get_input_datatype().bitwidth())] code_gen_dict["$SIMD$"] = [str(simd)] code_gen_dict["$MMV_IN$"] = [str(mmv_in)] @@ -976,8 +760,11 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): "localparam [0:{len}-1] WRITE_SCHEDULE = {{{str}}};".format(len=cycles_total, str=schedule_as_string) ) code_gen_dict["$GENERATE_WRITE_SCHEDULE$"].append( - "assign write_state = WRITE_SCHEDULE[cycle];" + "assign write_state = WRITE_SCHEDULE[cycle_last];" ) + #code_gen_dict["$GENERATE_WRITE_SCHEDULE$"].append( + # "assign write_state_next = WRITE_SCHEDULE[cycle_next];" + #) with open("/workspace/finn/finn-rtllib/swg/swg_hdl_template.v", "r") as f: template = f.read() @@ -986,23 +773,33 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): # transform list into long string separated by '\n' code_gen_line = "\n".join(code_gen_dict[key]) template = template.replace(key, code_gen_line) - f = open(os.path.join("/workspace/finn/finn-rtllib/swg/", "swg_hdl_generated.v"), "w") + + f = open(os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_hdl_gen.v"), "w") + #debug: + #f = open(os.path.join("/workspace/finn/finn-rtllib/swg/", "swg_hdl_generated.v"), "w") f.write(template) f.close() f_debug.close() + #set ipgen_path and ip_path so that HLS-Synth transformation and stich_ip transformation do not complain + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + def prepare_rtlsim(self): """Creates a Verilator emulation library for the RTL code generated for this node, sets the rtlsim_so attribute to its path and returns a PyVerilator wrapper around it.""" #modified to use generated verilog instead of HLS output products - self.generate_hdl() - if PyVerilator is None: raise ImportError("Installation of PyVerilator is required.") - verilog_paths = ["/workspace/finn/finn-rtllib/swg/"] - verilog_files = ["swg_hdl_generated.v"] + + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + verilog_paths = [code_gen_dir] + verilog_files = [self.get_nodeattr("gen_top_module") + "_hdl_gen.v"] + #debug: + #verilog_paths = ["/workspace/finn/finn-rtllib/swg/"] + #verilog_files = ["swg_hdl_generated.v"] # build the Verilator emu library sim = PyVerilator.build( verilog_files, @@ -1014,3 +811,37 @@ class ConvolutionInputGenerator_rtl(HLSCustomOp): # save generated lib filename in attribute self.set_nodeattr("rtlsim_so", sim.lib._name) return sim + + + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + vlnv = self.get_nodeattr("ip_vlnv") + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + #cmd = ["create_bd_cell -type ip -vlnv %s %s" % (vlnv, self.onnx_node.name)] + + cmd = ["add_files -norecurse %s" % (os.path.join(code_gen_dir, self.get_nodeattr("gen_top_module") + "_hdl_gen.v")), + "create_bd_cell -type module -reference %s %s" % (self.get_nodeattr("gen_top_module"), self.onnx_node.name)] + + #update_compile_order -fileset sources_1 + #add_files -norecurse C:/Users/felix/Downloads/swg_hdl_generated.v + #update_compile_order -fileset sources_1 + #create_bd_cell -type module -reference ConvolutionInputGenerator_rtl_0_ConvolutionInputGenerator_rtl_0 ConvolutionInputGene_0 + + return cmd + + def code_generation_ipgen(self, model, fpgapart, clk): + """Generates c++ code and tcl script for ip generation.""" + self.generate_hdl() + + def ipgen_singlenode_code(self): + """Builds the bash script for ip generation using the CallHLS from + finn.util.hls.""" + pass + + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + pass + + def compile_singlenode_code(self): + pass \ No newline at end of file diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py index f7a724133333156811d5e3f7721c9585dba94eca..0845dc2fcad42257336027e1e03bdee9c17a946f 100755 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl.py @@ -151,15 +151,15 @@ def prepare_inputs(input_tensor): # input datatype @pytest.mark.parametrize("idt", [DataType["INT4"]]) # kernel size -@pytest.mark.parametrize("k", [[3, 3]]) +@pytest.mark.parametrize("k", [[3, 1]]) # input dimension -@pytest.mark.parametrize("ifm_dim", [[6, 11]]) +@pytest.mark.parametrize("ifm_dim", [[8, 1]]) # input channels @pytest.mark.parametrize("ifm_ch", [2]) # Stride -@pytest.mark.parametrize("stride", [[1, 2]]) +@pytest.mark.parametrize("stride", [[1, 1]]) # Dilation -@pytest.mark.parametrize("dilation", [[1, 2]]) +@pytest.mark.parametrize("dilation", [[1, 1]]) # execution mode @pytest.mark.parametrize("exec_mode", ["rtlsim"]) # input channel parallelism ("SIMD") @@ -210,17 +210,14 @@ def test_fpgadataflow_slidingwindow_rtl( ) if exec_mode == "cppsim": - model = model.transform(SetExecMode("cppsim")) - model = model.transform(PrepareCppSim()) - model = model.transform(CompileCppSim()) + raise Exception("cppsim not supported in test_fpgadataflow_slidingwindow_rtl") elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) model = model.transform(GiveUniqueNodeNames()) - model = model.transform(PrepareIP("xc7z020clg400-1", 5)) - model = model.transform(HLSSynthIP()) + model = model.transform(PrepareIP("xc7z020clg400-1", 4)) model = model.transform(PrepareRTLSim()) else: - raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow") + raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow_rtl") # prepare input data input_dict = prepare_inputs(x)