diff --git a/notebooks/9-FINN-EndToEndFlow.ipynb b/notebooks/9-FINN-EndToEndFlow.ipynb index 2478d276eb3885b98b0ec18eaa49ccb2ca81cd19..1796faab6166caefc880ab2ec4e29d6abab29dec 100644 --- a/notebooks/9-FINN-EndToEndFlow.ipynb +++ b/notebooks/9-FINN-EndToEndFlow.ipynb @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -117,13 +117,15 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "\n", + "Stopping http://0.0.0.0:8081\n", "Serving '/workspace/finn/tfc_w1_a1.onnx' at http://0.0.0.0:8081\n" ] } @@ -134,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -164,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -233,7 +235,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ @@ -262,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -282,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -315,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -367,7 +369,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -376,19 +378,19 @@ "text": [ "\n", "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1.onnx' at http://0.0.0.0:8081\n" + "Serving '/workspace/finn/tfc_w1_a1_streamlined.onnx' at http://0.0.0.0:8081\n" ] } ], "source": [ "model = model.transform(Streamline())\n", - "model.save(build_dir+\"/tfc_w1_a1.onnx\")\n", - "netron.start(build_dir+\"/tfc_w1_a1.onnx\", port=8081, host=\"0.0.0.0\")" + "model.save(build_dir+\"/tfc_w1_a1_streamlined.onnx\")\n", + "netron.start(build_dir+\"/tfc_w1_a1_streamlined.onnx\", port=8081, host=\"0.0.0.0\")" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -421,7 +423,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -456,20 +458,20 @@ "text": [ "\n", "Stopping http://0.0.0.0:8081\n", - "Serving '/workspace/finn/tfc_w1_a1.onnx' at http://0.0.0.0:8081\n" + "Serving '/workspace/finn/tfc_w1_a1_hls_layers.onnx' at http://0.0.0.0:8081\n" ] } ], "source": [ "import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls\n", "model = model.transform(to_hls.InferBinaryStreamingFCLayer())\n", - "model.save(build_dir+\"/tfc_w1_a1.onnx\")\n", - "netron.start(build_dir+\"/tfc_w1_a1.onnx\", port=8081, host=\"0.0.0.0\")" + "model.save(build_dir+\"/tfc_w1_a1_hls_layers.onnx\")\n", + "netron.start(build_dir+\"/tfc_w1_a1_hls_layers.onnx\", port=8081, host=\"0.0.0.0\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 52, "metadata": { "scrolled": true }, @@ -512,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 53, "metadata": {}, "outputs": [ { @@ -535,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 54, "metadata": { "scrolled": false }, @@ -567,7 +569,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -576,7 +578,7 @@ "text": [ "\n", "Stopping http://0.0.0.0:8081\n", - "Serving '/tmp/finn_maltanar_22115/dataflow_partition_9vof1ltc/df_model.onnx' at http://0.0.0.0:8081\n" + "Serving '/tmp/finn_maltanar/dataflow_partition_n7ae7i0t/df_model.onnx' at http://0.0.0.0:8081\n" ] } ], @@ -589,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -619,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -636,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -670,7 +672,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -694,16 +696,18 @@ " 'outputDataType': ('s', True, ''),\n", " 'binaryXnorMode': ('i', False, 0),\n", " 'noActivation': ('i', False, 0),\n", + " 'inFIFODepth': ('i', False, 0),\n", + " 'outFIFODepth': ('i', False, 0),\n", " 'backend': ('s', True, 'fpgadataflow'),\n", " 'code_gen_dir_npysim': ('s', False, ''),\n", " 'code_gen_dir_ipgen': ('s', False, ''),\n", " 'executable_path': ('s', False, ''),\n", " 'ipgen_path': ('s', False, ''),\n", - " 'sim_mode': ('s', False, ''),\n", + " 'exec_mode': ('s', False, ''),\n", " 'sim_cycles': ('i', False, 0)}" ] }, - "execution_count": 26, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -721,24 +725,29 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# SIMD controls the folding over the input vector\n", "# PE controls the folding over the output vector\n", "\n", + "fc0w.set_nodeattr(\"inFIFODepth\", 50)\n", "fc0w.set_nodeattr(\"SIMD\", 16)\n", "fc0w.set_nodeattr(\"PE\", 16)\n", + "fc0w.set_nodeattr(\"outFIFODepth\", 4)\n", "\n", "fc1w.set_nodeattr(\"SIMD\", 16)\n", "fc1w.set_nodeattr(\"PE\", 16)\n", + "fc1w.set_nodeattr(\"outFIFODepth\", 4)\n", "\n", "fc2w.set_nodeattr(\"SIMD\", 16)\n", "fc2w.set_nodeattr(\"PE\", 16)\n", + "fc2w.set_nodeattr(\"outFIFODepth\", 4)\n", "\n", "fc3w.set_nodeattr(\"SIMD\", 16)\n", - "fc3w.set_nodeattr(\"PE\", 10)" + "fc3w.set_nodeattr(\"PE\", 10)\n", + "fc3w.set_nodeattr(\"outFIFODepth\", 50)" ] }, { @@ -750,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -782,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -812,7 +821,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -833,7 +842,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -851,7 +860,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -871,7 +880,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 29, "metadata": { "scrolled": true }, @@ -907,7 +916,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -936,7 +945,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -944,8 +953,8 @@ "output_type": "stream", "text": [ "#!/bin/bash \r\n", - "cd /tmp/finn_maltanar_22115/code_gen_ipgen_StreamingFCLayer_Batch_bwxffr0g\r\n", - "vivado_hls /tmp/finn_maltanar_22115/code_gen_ipgen_StreamingFCLayer_Batch_bwxffr0g/hls_syn_StreamingFCLayer_Batch_0.tcl\r\n", + "cd /tmp/finn_maltanar/code_gen_ipgen_StreamingFCLayer_Batch_y_fxb2eb\r\n", + "vivado_hls /tmp/finn_maltanar/code_gen_ipgen_StreamingFCLayer_Batch_y_fxb2eb/hls_syn_StreamingFCLayer_Batch_0.tcl\r\n", "cd /workspace/finn\r\n" ] } @@ -966,7 +975,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -976,7 +985,7 @@ "\r\n", "set config_proj_name project_StreamingFCLayer_Batch_0\r\n", "puts \"HLS project: $config_proj_name\"\r\n", - "set config_hwsrcdir \"/tmp/finn_maltanar_22115/code_gen_ipgen_StreamingFCLayer_Batch_bwxffr0g\"\r\n", + "set config_hwsrcdir \"/tmp/finn_maltanar/code_gen_ipgen_StreamingFCLayer_Batch_y_fxb2eb\"\r\n", "puts \"HW source dir: $config_hwsrcdir\"\r\n", "set config_proj_part \"xczu3eg-sbva484-1-e\"\r\n", "\r\n", @@ -993,6 +1002,7 @@ "set_part $config_proj_part\r\n", "\r\n", "config_interface -m_axi_addr64\r\n", + "config_rtl -auto_prefix\r\n", "\r\n", "create_clock -period $config_clkperiod -name default\r\n", "csynth_design\r\n", @@ -1015,33 +1025,25 @@ "Now that all IP blocks are in place, they can be stitched together to create an IP design that matches the ONNX model. This is covered in the next section." ] }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "# save model with other name for section \"Emulation using PyVerilator\"\"\n", - "model.save(build_dir+\"/tfc_w1_a1_after_hls_ip_per_layer.onnx\")" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ "### IP Stitching <a id='ip_stitching'></a>\n", "\n", - "We now have IP blocks for each of our layers, and will stitch them together into a larger IP that implements the whole network using the `CodeGen_ipstitch` transformation. Bear in mind that this transformation can only be applied on a graph that only contains HLS nodes that already have been through the `HLSSynth_IPGen` transformation, which is the last step we performed. **This invokes Vivado and may take a few minutes to run.**" + "We now have IP blocks for each of our layers, and will stitch them together into a larger IP that implements the whole network using the `CodeGen_ipstitch` transformation. Bear in mind that this transformation can only be applied on a graph that only contains HLS nodes that already have been through the `HLSSynth_IPGen` transformation, which is the last step we performed. Prior to calling IP stitching, we'll also use the `ReplaceVerilogRelPaths` transformation to convert any relative `$readmemh` paths in the generated IP blocks to absolute ones, which prevents errors later on. **This step invokes Vivado and may take a few minutes to run.**" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch\n", - "\n", + "from finn.transformation.fpgadataflow.replace_verilog_relpaths import ReplaceVerilogRelPaths\n", + "model = ModelWrapper(build_dir+\"/tfc_w1_a1_ipgen.onnx\")\n", + "model = model.transform(ReplaceVerilogRelPaths())\n", "model = model.transform(CodeGen_ipstitch(fpga_part))" ] }, @@ -1054,20 +1056,22 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[key: \"vivado_stitch_proj\"\n", - "value: \"/tmp/finn_maltanar_22115/vivado_stitch_proj_nfte0nh0\"\n", + "value: \"/tmp/finn_maltanar/vivado_stitch_proj_gvhcdxah\"\n", ", key: \"vivado_stitch_vlnv\"\n", "value: \"xilinx_finn:finn:finn_design:1.0\"\n", + ", key: \"wrapper_filename\"\n", + "value: \"/tmp/finn_maltanar/vivado_stitch_proj_gvhcdxah/finn_vivado_stitch_proj.srcs/sources_1/bd/finn_design/hdl/finn_design_wrapper.v\"\n", "]" ] }, - "execution_count": 39, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1078,16 +1082,16 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'/tmp/finn_maltanar_22115/vivado_stitch_proj_nfte0nh0'" + "'/tmp/finn_maltanar/vivado_stitch_proj_gvhcdxah'" ] }, - "execution_count": 40, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1110,6 +1114,15 @@ "" ] }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "model.save(build_dir+\"/tfc_w1_a1_ipstitch.onnx\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1121,45 +1134,46 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[key: \"vivado_stitch_proj\"\n", - "value: \"/tmp/finn_maltanar_22115/vivado_stitch_proj_nfte0nh0\"\n", + "value: \"/tmp/finn_maltanar/vivado_stitch_proj_ud9yxuzi\"\n", ", key: \"vivado_stitch_vlnv\"\n", "value: \"xilinx_finn:finn:finn_design:1.0\"\n", + ", key: \"wrapper_filename\"\n", + "value: \"/tmp/finn_maltanar/vivado_stitch_proj_ud9yxuzi/finn_vivado_stitch_proj.srcs/sources_1/bd/finn_design/hdl/finn_design_wrapper.v\"\n", ", key: \"vivado_pynq_proj\"\n", - "value: \"/tmp/finn_maltanar_22115/vivado_pynq_proj_bj_z4tm0\"\n", + "value: \"/tmp/finn_maltanar/vivado_pynq_proj_6rhrsy8m\"\n", "]" ] }, - "execution_count": 42, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject\n", - "\n", + "model = ModelWrapper(build_dir+\"/tfc_w1_a1_ipstitch.onnx\")\n", "model = model.transform(MakePYNQProject(pynq_board))\n", "model.model.metadata_props" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "ip_config.tcl\t resizer.hw\t\tresizer.srcs\t vivado.jou\r\n", - "make_project.sh resizer.ip_user_files\tresizer.xpr\t vivado.log\r\n", - "resizer.cache\t resizer.sim\t\tsynth_project.sh vivado_pid24853.str\r\n" + "ip_config.tcl\t resizer.cache\tresizer.ip_user_files resizer.xpr\r\n", + "make_project.sh resizer.hw\tresizer.srcs\t synth_project.sh\r\n" ] } ], @@ -1175,6 +1189,15 @@ "" ] }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "model.save(build_dir + \"/tfc_w1_a1_pynq_project.onnx\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1191,33 +1214,33 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[key: \"vivado_stitch_proj\"\n", - "value: \"/tmp/finn_maltanar_22115/vivado_stitch_proj_nfte0nh0\"\n", + "value: \"/tmp/finn_maltanar/vivado_stitch_proj_ud9yxuzi\"\n", ", key: \"vivado_stitch_vlnv\"\n", "value: \"xilinx_finn:finn:finn_design:1.0\"\n", + ", key: \"wrapper_filename\"\n", + "value: \"/tmp/finn_maltanar/vivado_stitch_proj_ud9yxuzi/finn_vivado_stitch_proj.srcs/sources_1/bd/finn_design/hdl/finn_design_wrapper.v\"\n", ", key: \"vivado_pynq_proj\"\n", - "value: \"/tmp/finn_maltanar_22115/vivado_pynq_proj_bj_z4tm0\"\n", + "value: \"/tmp/finn_maltanar/vivado_pynq_proj_6rhrsy8m\"\n", ", key: \"vivado_pynq_bitfile\"\n", - "value: \"/tmp/finn_maltanar_22115/vivado_pynq_proj_bj_z4tm0/resizer.bit\"\n", + "value: \"/tmp/finn_maltanar/vivado_pynq_proj_6rhrsy8m/resizer.bit\"\n", "]" ] }, - "execution_count": 44, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "model.save(build_dir + \"/tfc_w1_a1_pre_synthesis.onnx\")\n", - "\n", "from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject\n", - "\n", + "model = ModelWrapper(build_dir + \"/tfc_w1_a1_pynq_project.onnx\")\n", "model = model.transform(SynthPYNQProject())\n", "model.model.metadata_props" ] diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py index 0102d236c3a9a6eafe87f59d01e23e0d1307b6f4..5ed45339cc2fe5f77339e33c4e7a8f6c556b704f 100644 --- a/src/finn/core/onnx_exec.py +++ b/src/finn/core/onnx_exec.py @@ -45,7 +45,8 @@ def execute_node(node, context, graph): if node.op_type == "StreamingDataflowPartition": sdp_node = getCustomOp(node) model = ModelWrapper(sdp_node.get_nodeattr("model")) - execute_onnx(model, context) + ret = execute_onnx(model, context, True) + context.update(ret) else: if node.domain == "finn": @@ -124,8 +125,8 @@ def execute_onnx(model, input_dict, return_full_exec_context=False): str(input_dict[inp_name].shape), ) ) - else: - raise Exception("Provided input not found in graph context: %s" % inp_name) + # else: + # raise Exception("Provided input not found in graph context: %s" % inp_name) # check if model has an execution mode set # if None, execute model node by node using execute_node() diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 51cd5745733c8fe1d9d437feef2d91d3dbd35bfb..bc8d74b3e3bc585f58bfca79eb593542e8733e9a 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -2,7 +2,10 @@ import os from finn.custom_op.registry import getCustomOp from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy -from finn.util.fpgadataflow import pyverilate_stitched_ip +from finn.util.fpgadataflow import ( + pyverilate_get_liveness_threshold_cycles, + pyverilate_stitched_ip, +) def rtlsim_exec(model, execution_context): @@ -11,6 +14,7 @@ def rtlsim_exec(model, execution_context): # ensure stitched ip project already exists assert os.path.isfile(model.get_metadata_prop("wrapper_filename")) assert os.path.isdir(model.get_metadata_prop("vivado_stitch_proj")) + trace_file = model.get_metadata_prop("rtlsim_trace") # extract input shape # TODO extend for multiple inputs i_name = model.graph.input[0].name @@ -38,7 +42,7 @@ def rtlsim_exec(model, execution_context): sim = pyverilate_stitched_ip(model) _reset_rtlsim(sim) _toggle_clk(sim) - ret = _run_rtlsim(sim, packed_input, num_out_values) + ret = _run_rtlsim(sim, packed_input, num_out_values, trace_file) packed_output = ret[0] model.set_metadata_prop("sim_cycles", str(ret[1])) # unpack output and put into context @@ -61,7 +65,7 @@ def _toggle_clk(sim): sim.io.ap_clk_0 = 0 -def _run_rtlsim(sim, inp, num_out_values): +def _run_rtlsim(sim, inp, num_out_values, trace_file=None): # import pdb; pdb.set_trace() inputs = inp outputs = [] @@ -73,9 +77,13 @@ def _run_rtlsim(sim, inp, num_out_values): observation_count = 0 # avoid infinite looping of simulation by aborting when there is no change in - # output values after 100 cycles + # output values after LIVENESS_THRESHOLD cycles no_change_count = 0 old_outputs = outputs + liveness_threshold = pyverilate_get_liveness_threshold_cycles() + + if trace_file is not None: + sim.start_vcd_trace(trace_file) while not (output_observed): sim.io.in0_V_V_0_tvalid = 1 if len(inputs) > 0 else 0 @@ -94,13 +102,21 @@ def _run_rtlsim(sim, inp, num_out_values): sim_cycles = observation_count output_observed = True - if no_change_count == 100: + if no_change_count == liveness_threshold: if old_outputs == outputs: + if trace_file is not None: + sim.flush_vcd_trace() + sim.stop_vcd_trace() raise Exception( "Error in simulation! Takes too long to produce output." + "Consider setting the LIVENESS_THRESHOLD env.var. to a " + "larger value." ) else: no_change_count = 0 old_outputs = outputs + if trace_file is not None: + sim.flush_vcd_trace() + sim.stop_vcd_trace() return (outputs, sim_cycles) diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index b786944edfc75041efc21b40b881c8ba7ffc0736..672831fb07e85c544cf8d46d66d9917169ea07b9 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -4,7 +4,10 @@ import os import subprocess from finn.custom_op import CustomOp from finn.util.basic import CppBuilder -from finn.util.fpgadataflow import IPGenBuilder +from finn.util.fpgadataflow import ( + IPGenBuilder, + pyverilate_get_liveness_threshold_cycles, +) from . import templates @@ -34,6 +37,7 @@ class HLSCustomOp(CustomOp): "ipgen_path": ("s", False, ""), "exec_mode": ("s", False, ""), "sim_cycles": ("i", False, 0), + "rtlsim_trace": ("s", False, ""), } def node_res_estimation(self): @@ -192,6 +196,11 @@ compilation transformations? def rtlsim(self, sim, inp): # import pdb; pdb.set_trace() + trace_file = self.get_nodeattr("rtlsim_trace") + if trace_file != "": + if trace_file == "default": + trace_file = self.onnx_node.name + ".vcd" + sim.start_vcd_trace(trace_file) inputs = inp outputs = [] sim.io.out_V_V_TREADY = 1 @@ -206,6 +215,7 @@ compilation transformations? # output values after 100 cycles no_change_count = 0 old_outputs = outputs + liveness_threshold = pyverilate_get_liveness_threshold_cycles() while not (output_observed): sim.io.in0_V_V_TVALID = 1 if len(inputs) > 0 else 0 @@ -224,15 +234,22 @@ compilation transformations? self.set_nodeattr("sim_cycles", observation_count) output_observed = True - if no_change_count == 100: + if no_change_count == liveness_threshold: if old_outputs == outputs: + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() raise Exception( - "Error in simulation! Takes too long to produce output." + "Error in simulation! Takes too long to produce output. " + "Consider setting the LIVENESS_THRESHOLD env.var. to a " + "larger value." ) else: no_change_count = 0 old_outputs = outputs - + if trace_file != "": + sim.flush_vcd_trace() + sim.stop_vcd_trace() return outputs def execute_node(self, context, graph): diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 8ae626f14febc72fc6c81f07732de2229af9cc33..1da7618718fc32a53835ec8021da93936dc03380 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -98,9 +98,10 @@ class ConvolutionInputGenerator(HLSCustomOp): ) elif mode == "rtlsim": code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + prefixed_top_name = "%s_%s" % (node.name, node.name) # check if needed file exists verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( - code_gen_dir, node.name, node.name + code_gen_dir, node.name, prefixed_top_name ) if os.path.isfile(verilog_file): inp = context[node.input[0]] @@ -209,7 +210,7 @@ class ConvolutionInputGenerator(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, OFMDim1, SIMD1, Stride1> (in0, out, numReps);""".format( - node.op_type, + node.op_type ) ] diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 643a2062dc50d712f0e0b08ff281638354813fe8..915a498248222d802b398aa02518fbfc0fa5d482 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -297,6 +297,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): ret = interleave_matrix_outer_dim_from_partitions(ret, pe) # create SIMD as innermost dimension and add a dummy outer dim ret = ret.reshape(1, pe, wmem, simd) + # reverse the SIMD dimension + ret = np.flip(ret, axis=-1) return ret def get_hls_compatible_threshold_tensor(self, orig_thres_matrix): @@ -479,9 +481,10 @@ class StreamingFCLayer_Batch(HLSCustomOp): # reshape output to have expected shape context[node.output[0]] = context[node.output[0]].reshape(1, mh) elif mode == "rtlsim": + prefixed_top_name = "%s_%s" % (node.name, node.name) # check if needed file exists verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( - code_gen_dir, node.name, node.name + code_gen_dir, node.name, prefixed_top_name ) if os.path.isfile(verilog_file): nbits = self.get_instream_width() @@ -568,7 +571,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0, true);' + 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) ) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 07a356e5ee7e10a6b51859cb7fb2c4bdb5deeda7..e2f43f4edf206b32974adaf02fd479f0af522702 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -65,6 +65,7 @@ open_solution sol1 set_part $config_proj_part config_interface -m_axi_addr64 +config_rtl -auto_prefix create_clock -period $config_clkperiod -name default csynth_design diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 3f3bf6e79c8ab6d485c5356a4c3bba3623220df2..31a0347f3dfa571a95f354fb2ffbc74caab5ca2e 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -54,13 +54,8 @@ class TLastMarker(HLSCustomOp): ] def read_npy_data(self): - # TLastMarker does not support npysim self.code_gen_dict["$READNPYDATA$"] = [] - def strm_decl(self): - # TLastMarker does not support npysim - self.code_gen_dict["$STREAMDECLARATIONS$"] = [] - def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [ "for(int i=0; i<NumIters; i++) {", @@ -74,7 +69,6 @@ class TLastMarker(HLSCustomOp): ] def dataoutstrm(self): - # TLastMarker does not support npysim self.code_gen_dict["$DATAOUTSTREAM$"] = [] def save_as_npy(self): @@ -114,3 +108,12 @@ class TLastMarker(HLSCustomOp): def get_outstream_width(self): stream_width = self.get_nodeattr("StreamWidth") return stream_width + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<OutDType> out ("out");' + ) diff --git a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py new file mode 100644 index 0000000000000000000000000000000000000000..597aa36eb80dd5e19b19764f027ebeb515a73bdd --- /dev/null +++ b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py @@ -0,0 +1,42 @@ +import os + +import finn.custom_op.registry as registry +import finn.util.basic as util +from finn.transformation import Transformation + + +class ReplaceVerilogRelPaths(Transformation): + """Convert ./ relative file paths to absolute ones for generated Verilog""" + + def __init__(self): + super().__init__() + + def apply(self, model): + for node in model.graph.node: + op_type = node.op_type + if node.domain == "finn": + backend_attribute = util.get_by_name(node.attribute, "backend") + if backend_attribute is None: + continue + backend_value = backend_attribute.s.decode("UTF-8") + if backend_value == "fpgadataflow": + try: + # lookup op_type in registry of CustomOps + inst = registry.custom_op[op_type](node) + # find the IP gen dir + ipgen_path = inst.get_nodeattr("ipgen_path") + if ipgen_path is not None and os.path.isdir(ipgen_path): + for dname, dirs, files in os.walk(ipgen_path): + for fname in files: + if fname.endswith(".v"): + fpath = os.path.join(dname, fname) + with open(fpath, "r") as f: + s = f.read() + old = '$readmemh(".' + new = '$readmemh("%s' % dname + s = s.replace(old, new) + with open(fpath, "w") as f: + f.write(s) + except KeyError: + pass + return (model, False) diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index b5db59046b80bb4ccd8750e3aa91f7298377f2a8..639fa6d3fb44694c347d984524c175aee8575713 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -106,7 +106,7 @@ def pack_innermost_dim_as_hex_string(ndarray, dtype, pad_to_nbits, reverse_inner def unpack_innermost_dim_from_hex_string( - ndarray, dtype, out_shape, reverse_inner=False + ndarray, dtype, out_shape, packedBits, reverse_inner=False ): """Convert a NumPy array of hex strings into a FINN NumPy array by unpacking the hex strings into the specified data type. out_shape can be specified @@ -125,7 +125,6 @@ def unpack_innermost_dim_from_hex_string( ) # convert ndarray into flattened list data = ndarray.flatten().tolist() - packedBits = len(data[0]) * 8 targetBits = dtype.bitwidth() # calculate outer and inner dim shapes outer_dim_elems = 1 @@ -221,7 +220,7 @@ def numpy_to_hls_code( return ret -def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=False): +def npy_to_rtlsim_input(input_file, input_dtype, pad_to_nbits, reverse_inner=True): """Convert the multidimensional NumPy array of integers (stored as floats) from input_file into a flattened sequence of Python arbitrary-precision integers, packing the innermost dimension. See @@ -253,7 +252,7 @@ def rtlsim_output_to_npy( # TODO should have its own testbench? output = np.asarray([hex(int(x)) for x in output]) out_array = unpack_innermost_dim_from_hex_string( - output, dtype, shape, reverse_inner=reverse_inner + output, dtype, shape, packedBits=packedBits, reverse_inner=reverse_inner ) np.save(path, out_array) return out_array @@ -326,7 +325,7 @@ def packed_bytearray_to_finnpy( npbytearray2hexstring, packed_dim, packed_bytearray ) ret = unpack_innermost_dim_from_hex_string( - packed_hexstring, dtype, output_shape, reverse_inner + packed_hexstring, dtype, output_shape, packed_bits, reverse_inner ) return ret diff --git a/src/finn/util/fpgadataflow.py b/src/finn/util/fpgadataflow.py index f11192a121a4b4a8a0524629fe6b3988923a363b..a6887e0fd6329e1c0ca5ad8e187e6ee1fabb1679 100644 --- a/src/finn/util/fpgadataflow.py +++ b/src/finn/util/fpgadataflow.py @@ -45,3 +45,10 @@ def pyverilate_stitched_ip(model): top_verilog = model.get_metadata_prop("wrapper_filename") sim = PyVerilator.build(top_verilog, verilog_path=all_verilog_dirs) return sim + + +def pyverilate_get_liveness_threshold_cycles(): + """Return the number of no-output cycles rtlsim will wait before assuming + the simulation is not finishing and throwing an exception.""" + + return int(os.getenv("LIVENESS_THRESHOLD", 10000)) diff --git a/tests/end2end/test_end2end_tfc.py b/tests/end2end/test_end2end_tfc.py new file mode 100644 index 0000000000000000000000000000000000000000..598bf16758310388aef4ecc621a16afab0ad8062 --- /dev/null +++ b/tests/end2end/test_end2end_tfc.py @@ -0,0 +1,217 @@ +import os + +import numpy as np +# as of Feb'20 there is a bug that segfaults ONNX shape inference if we +# import pytorch before onnx, so we make sure to import onnx first +import onnx # NOQA + +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.streamline.absorb as absorb +from finn.core.modelwrapper import ModelWrapper +from finn.core.onnx_exec import execute_onnx +from finn.custom_op.registry import getCustomOp +from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount +from finn.transformation.fold_constants import FoldConstants +from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen +from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch +from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim +from finn.transformation.fpgadataflow.compile import Compile +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.infer_datatypes import InferDataTypes +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.streamline import Streamline +from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds +from finn.util.basic import pynq_part_map +from finn.util.test import get_test_model_trained + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 5 + + +def test_end2end_tfc_export(): + import brevitas.onnx as bo + + tfc = get_test_model_trained("TFC", 1, 1) + bo.export_finn_onnx( + tfc, (1, 1, 28, 28), build_dir + "/end2end_tfc_w1_a1_export.onnx" + ) + + +def test_end2end_tfc_import_and_tidy(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_export.onnx") + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(InferDataTypes()) + model.save(build_dir + "/end2end_tfc_w1_a1_tidy.onnx") + + +def test_end2end_tfc_streamline(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_tidy.onnx") + model = model.transform(Streamline()) + model.save(build_dir + "/end2end_tfc_w1_a1_streamlined.onnx") + + +def test_end2end_tfc_convert_to_hls_layers(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_streamlined.onnx") + model = model.transform(ConvertBipolarMatMulToXnorPopcount()) + model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) + model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) + model = model.transform(RoundAndClipThresholds()) + model = model.transform(to_hls.InferBinaryStreamingFCLayer()) + model.save(build_dir + "/end2end_tfc_w1_a1_hls_layers.onnx") + + +def test_end2end_tfc_create_dataflow_partition(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_hls_layers.onnx") + parent_model = model.transform(CreateDataflowPartition()) + parent_model.save(build_dir + "/end2end_tfc_w1_a1_dataflow_parent.onnx") + sdp_node = getCustomOp(parent_model.graph.node[2]) + dataflow_model_filename = sdp_node.get_nodeattr("model") + dataflow_model = ModelWrapper(dataflow_model_filename) + dataflow_model.save(build_dir + "/end2end_tfc_w1_a1_dataflow_model.onnx") + + +def test_end2end_tfc_fold_and_tlastmarker(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_dataflow_model.onnx") + fc0 = model.graph.node[0] + fc1 = model.graph.node[1] + fc2 = model.graph.node[2] + fc3 = model.graph.node[3] + fc0w = getCustomOp(fc0) + fc1w = getCustomOp(fc1) + fc2w = getCustomOp(fc2) + fc3w = getCustomOp(fc3) + fc0w.set_nodeattr("inFIFODepth", 50) + fc0w.set_nodeattr("SIMD", 16) + fc0w.set_nodeattr("PE", 16) + fc0w.set_nodeattr("outFIFODepth", 4) + fc1w.set_nodeattr("SIMD", 16) + fc1w.set_nodeattr("PE", 16) + fc1w.set_nodeattr("outFIFODepth", 4) + fc2w.set_nodeattr("SIMD", 16) + fc2w.set_nodeattr("PE", 16) + fc2w.set_nodeattr("outFIFODepth", 4) + fc3w.set_nodeattr("SIMD", 16) + fc3w.set_nodeattr("PE", 10) + fc3w.set_nodeattr("outFIFODepth", 50) + model = model.transform(InsertTLastMarker()) + model.save(build_dir + "/end2end_tfc_w1_a1_folded.onnx") + + +def test_end2end_tfc_gen_hls_ip(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_folded.onnx") + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynth_IPGen()) + model.save(build_dir + "/end2end_tfc_w1_a1_ipgen.onnx") + + +def test_end2end_tfc_ip_stitch(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_ipgen.onnx") + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(CodeGen_ipstitch(test_fpga_part)) + model.save(build_dir + "/end2end_tfc_w1_a1_ipstitch.onnx") + + +def test_end2end_tfc_verify_dataflow_part(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_ipstitch.onnx") + x = np.zeros((1, 784), dtype=np.float32) + inp_name = model.graph.input[0].name + out_name = model.graph.output[0].name + inp_dict = {inp_name: x} + # npysim + model = model.transform(CodeGen_npysim()) + model = model.transform(Compile()) + model = model.transform(SetExecMode("npysim")) + model.save(build_dir + "/end2end_tfc_w1_a1_ipstitch_npysim.onnx") + ret_npysim = execute_onnx(model, inp_dict, True) + res_npysim = ret_npysim[out_name] + # node-by-node rtlsim + model = model.transform(SetExecMode("rtlsim")) + getCustomOp(model.graph.node[0]).set_nodeattr("rtlsim_trace", "default") + getCustomOp(model.graph.node[1]).set_nodeattr("rtlsim_trace", "default") + getCustomOp(model.graph.node[2]).set_nodeattr("rtlsim_trace", "default") + getCustomOp(model.graph.node[3]).set_nodeattr("rtlsim_trace", "default") + model.save(build_dir + "/end2end_tfc_w1_a1_ipstitch_nodebynode_rtlsim.onnx") + ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) + res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] + # whole-network (ip-stitched) rtlsim + model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd") + model.save(build_dir + "/end2end_tfc_w1_a1_ipstitch_whole_rtlsim.onnx") + ret_rtlsim_whole = execute_onnx(model, inp_dict, True) + res_rtlsim_whole = ret_rtlsim_whole[out_name] + assert np.isclose(res_npysim, res_rtlsim_nodebynode).all() + assert np.isclose(res_npysim, res_rtlsim_whole).all() + + +def test_end2end_tfc_verify_all(): + # use the streamlined model as the "golden" model for right answers + golden = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_streamlined.onnx") + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + ishape = golden.get_tensor_shape(iname) + x = np.zeros(ishape, dtype=np.float32) + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_dataflow_parent.onnx") + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + # produce results with npysim + sdp_node = getCustomOp(parent_model.graph.node[2]) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_tfc_w1_a1_ipstitch_npysim.onnx" + ) + ret_npysim = execute_onnx(parent_model, {iname: x}, True) + y_npysim = ret_npysim[oname] + # produce results with node-by-node rtlsim + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_tfc_w1_a1_ipstitch_nodebynode_rtlsim.onnx" + ) + ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) + y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] + # produce results with whole-network (stitched ip) rtlsim + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_tfc_w1_a1_ipstitch_whole_rtlsim.onnx" + ) + ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) + y_whole_rtlsim = ret_whole_rtlsim[oname] + assert np.isclose(y_golden, y_npysim).all() + assert np.isclose(y_golden, y_nodebynode_rtlsim).all() + assert np.isclose(y_golden, y_whole_rtlsim).all() + + +def test_end2end_tfc_make_pynq_proj(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_ipstitch.onnx") + model = model.transform(MakePYNQProject(test_pynq_board)) + model.save(build_dir + "/end2end_tfc_w1_a1_pynq_project.onnx") + + +def test_end2end_synth_pynq_project(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_pynq_project.onnx") + model = model.transform(SynthPYNQProject()) + model.save(build_dir + "/end2end_tfc_w1_a1_synth.onnx") + + +def test_end2end_tfc_make_driver(): + model = ModelWrapper(build_dir + "/end2end_tfc_w1_a1_synth.onnx") + model = model.transform(MakePYNQDriver()) + model.save(build_dir + "/end2end_tfc_w1_a1_pynq_driver.onnx") diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py index 5c1e3908c94a38fb008ca400f7058d8928807eb7..9d3b36f2204abbe4277c7164d0259175fce5e085 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py +++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py @@ -183,9 +183,9 @@ def test_fpgadataflow_fclayer_npysim(idt, wdt, act, nf, sf, mw, mh): # input datatype @pytest.mark.parametrize("idt", [DataType.BIPOLAR, DataType.INT2]) # neuron folding, -1 is maximum possible -@pytest.mark.parametrize("nf", [-1, 1]) +@pytest.mark.parametrize("nf", [-1, 2, 1]) # synapse folding, -1 is maximum possible -@pytest.mark.parametrize("sf", [-1, 1]) +@pytest.mark.parametrize("sf", [-1, 2, 1]) # HLS matrix width (input features) @pytest.mark.parametrize("mw", [4]) # HLS matrix height (output features) diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py index 6c4ce2d235d138227026e7ec082d1e13e4ea3673..775251b13bfe0d35100b79c84cdf2611ba94f99c 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py @@ -200,7 +200,6 @@ def test_fpgadataflow_ipstitch_gen_model(): # exec_mode): model = model.transform(CodeGen_ipgen(test_fpga_part, 5)) model = model.transform(HLSSynth_IPGen()) assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" - # assert model.graph.node[1].op_type == "StreamingFCLayer_Batch" assert model.graph.node[-1].op_type == "TLastMarker" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx") diff --git a/tests/util/test_rtlsim2npy.py b/tests/util/test_rtlsim2npy.py index c1a10fe49a7956c83a7bfcd26f15a8c4238ebdb3..9ddf34bad46376f618f4be401ca6aaeace0e9d72 100644 --- a/tests/util/test_rtlsim2npy.py +++ b/tests/util/test_rtlsim2npy.py @@ -10,13 +10,13 @@ def test_unpack_innermost_dim_from_hex_string(): dtype = DataType.BINARY shape = (1, 2, 4) eA = [[1, 1, 1, 0], [0, 1, 1, 0]] - A_unpacked = unpack_innermost_dim_from_hex_string(A, dtype, shape) + A_unpacked = unpack_innermost_dim_from_hex_string(A, dtype, shape, 8) assert (A_unpacked == eA).all() A = np.asarray(["0x0e", "0x06"]) eA_flipped = [[0, 1, 1, 1], [0, 1, 1, 0]] A_unpacked_flipped = unpack_innermost_dim_from_hex_string( - A, dtype, shape, reverse_inner=True + A, dtype, shape, 8, reverse_inner=True ) assert (A_unpacked_flipped == eA_flipped).all() @@ -25,13 +25,13 @@ def test_unpack_innermost_dim_from_hex_string(): dtype = DataType.UINT2 shape = (1, 2, 2, 2) eB = [[[3, 3], [3, 3]], [[1, 3], [3, 1]]] - B_unpacked = unpack_innermost_dim_from_hex_string(B, dtype, shape) + B_unpacked = unpack_innermost_dim_from_hex_string(B, dtype, shape, 8) assert (B_unpacked == eB).all() B = np.asarray([["0x0f", "0x0f"], ["0x07", "0x0d"]]) eB_flipped = [[[3, 3], [3, 3]], [[3, 1], [1, 3]]] B_unpacked_flipped = unpack_innermost_dim_from_hex_string( - B, dtype, shape, reverse_inner=True + B, dtype, shape, 8, reverse_inner=True ) assert (B_unpacked_flipped == eB_flipped).all() @@ -40,7 +40,7 @@ def test_unpack_innermost_dim_from_hex_string(): dtype = DataType.INT2 shape = (1, 2, 2, 2) eC = [[[-1, -1], [-1, -1]], [[1, -1], [-1, 1]]] - C_unpacked = unpack_innermost_dim_from_hex_string(C, dtype, shape) + C_unpacked = unpack_innermost_dim_from_hex_string(C, dtype, shape, 8) assert (C_unpacked == eC).all() C = np.asarray([["0x0f", "0x0f"], ["0x07", "0x0d"]]) @@ -48,7 +48,7 @@ def test_unpack_innermost_dim_from_hex_string(): shape = (1, 2, 2, 2) eC = [[[-1, -1], [-1, -1]], [[-1, 1], [1, -1]]] C_unpacked = unpack_innermost_dim_from_hex_string( - C, dtype, shape, reverse_inner=True + C, dtype, shape, 8, reverse_inner=True ) assert (C_unpacked == eC).all() @@ -57,11 +57,11 @@ def test_unpack_innermost_dim_from_hex_string(): dtype = DataType.INT4 shape = (2, 1) eD = [[-2], [6]] - D_unpacked = unpack_innermost_dim_from_hex_string(D, dtype, shape) + D_unpacked = unpack_innermost_dim_from_hex_string(D, dtype, shape, 8) assert (D_unpacked == eD).all() D_unpacked = unpack_innermost_dim_from_hex_string( - D, dtype, shape, reverse_inner=True + D, dtype, shape, 8, reverse_inner=True ) assert (D_unpacked == eD).all() @@ -70,10 +70,10 @@ def test_unpack_innermost_dim_from_hex_string(): dtype = DataType.INT32 shape = (1, 4, 1) eE = [[[-1], [-2], [2], [-17]]] - E_unpacked = unpack_innermost_dim_from_hex_string(E, dtype, shape) + E_unpacked = unpack_innermost_dim_from_hex_string(E, dtype, shape, 32) assert (E_unpacked == eE).all() E_unpacked = unpack_innermost_dim_from_hex_string( - E, dtype, shape, reverse_inner=True + E, dtype, shape, 32, reverse_inner=True ) assert (E_unpacked == eE).all()