diff --git a/docs/finn/example_networks.rst b/docs/finn/example_networks.rst index 9f221871f09bf655db9d81988d6fa83e53473634..86bb2bd11fd805a23a3bdf6da8a8ed686259ecc1 100644 --- a/docs/finn/example_networks.rst +++ b/docs/finn/example_networks.rst @@ -20,17 +20,17 @@ version, this is indicated by an x mark in the table. +-----------------------+------------+----------+----------+----------+----------+----------+----------+ | Export/Import | x | x | x | x | x | x | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Streamlining | x | x | x | x | x | | | +| Streamlining | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Convert to HLS layers | x | x | x | x | x | | | +| Convert to HLS layers | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Stitched IP | x | x | x | x | x | | | +| Stitched IP | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| Hardware test | x | x | x | | x | | | +| Hardware test | x | x | x | | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| cppsim | x | x | x | x | x | | | +| cppsim | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| rtlsim node-by-node | x | x | x | x | x | | | +| rtlsim node-by-node | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ -| rtlsim stitched IP | x | x | x | x | x | | | +| rtlsim stitched IP | x | x | x | x | x | | x | +-----------------------+------------+----------+----------+----------+----------+----------+----------+ diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst index 5beabeb2980840fd1dbd2ee5b058738fa8553152..f4fa7a13dcbe4fe8ab9667a111df00c605747710 100644 --- a/docs/finn/getting_started.rst +++ b/docs/finn/getting_started.rst @@ -31,6 +31,7 @@ Getting an interactive shell for development or experimentation sh run_docker.sh Simply running sh run-docker.sh without any additional arguments will clone the dependency repos, create a Docker container and give you a terminal with you can use for development for experimentation. +If you want a new terminal on an already-running container, you can do this with `docker exec -it finn_dev_<username> bash`. .. warning:: The Docker container is spawned with the `--rm` option, so make sure that any important files you created inside the container are either in the /workspace/finn folder (which is mounted from the host computer) or otherwise backed up. diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index a688898f4a43b33fd3f07cda12144b84829e451f..14ba1b813c8d61145f7d221deee9c184aeb9bddc 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -171,9 +171,15 @@ class HLSCustomOp(CustomOp): of the node as a dictionary.""" ret = dict() ret["BRAM_18K"] = self.bram_estimation() + ret["BRAM_efficiency"] = self.bram_efficiency_estimation() ret["LUT"] = self.lut_estimation() return ret + def bram_efficiency_estimation(self): + """Function for BRAM efficiency estimation: actual parameter storage + needed divided by the allocated BRAM storage (from estimation)""" + return 1 + def bram_estimation(self): """Function for BRAM resource estimation, is member function of HLSCustomOp class but has to be filled by every node""" @@ -219,7 +225,6 @@ class HLSCustomOp(CustomOp): self.code_gen_dict["$CLKPERIOD$"] = [str(clk)] self.code_gen_dict["$EXTRA_DIRECTIVES$"] = self.ipgen_extra_directives() - template = self.ipgentcl_template for key in self.code_gen_dict: @@ -235,7 +240,7 @@ class HLSCustomOp(CustomOp): def ipgen_extra_directives(self): "Return a list of extra tcl directives for HLS synthesis." return [] - + def ipgen_singlenode_code(self): """Builds the bash script for ip generation using the IPGenBuilder from finn.util.fpgadataflow.""" diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index b6c992e4b5ea1ced088928b3d9a4db381d82db22..a7ebff68749120868cae9ce5ac18d2856fe2cb8a 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -240,11 +240,21 @@ class StreamingFCLayer_Batch(HLSCustomOp): Q = self.get_nodeattr("SIMD") wdt = self.get_weight_datatype() W = wdt.bitwidth() - D_in = self.get_instream_width() - D_out = self.get_outstream_width() + D_in = self.get_nodeattr("MW") + D_out = self.get_nodeattr("MH") omega = (D_in * D_out) / (Q * P) return P * (math.ceil(omega / 512)) * (math.ceil((Q * W) / 36)) + def bram_efficiency_estimation(self): + wdt = self.get_weight_datatype() + W = wdt.bitwidth() + D_in = self.get_nodeattr("MW") + D_out = self.get_nodeattr("MH") + bram16_est = self.bram_estimation() + wbits = W * D_in * D_out + bram16_est_capacity = bram16_est * 36 * 512 + return wbits / bram16_est_capacity + def lut_estimation(self): """Calculates resource estimations for LUTs based on: - FINN-R: An End-to-End Deep-Learning Framework for Fast diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py index 207075b00de1871da19ea78472125d435449ed6e..62ee92df54eee2b63d84657515d7fbc3a8808b81 100644 --- a/src/finn/transformation/fpgadataflow/annotate_resources.py +++ b/src/finn/transformation/fpgadataflow/annotate_resources.py @@ -69,6 +69,9 @@ class AnnotateResources(Transformation): total_dict[r_type] += r_amount else: total_dict[r_type] = r_amount + for k in total_dict.keys(): + if "efficiency" in k: + total_dict[k] = total_dict[k] / len(graph.node) model.set_metadata_prop("res_total_" + self.mode, str(total_dict)) for node in graph.node: if _is_fpgadataflow_node(node) and node.name in res_dict.keys(): diff --git a/src/finn/transformation/remove_identity.py b/src/finn/transformation/remove_identity.py new file mode 100644 index 0000000000000000000000000000000000000000..d7a58d59c1bb8ff643e691442e7eda3c0516aa5c --- /dev/null +++ b/src/finn/transformation/remove_identity.py @@ -0,0 +1,62 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.transformation import Transformation + + +def _is_identity(node, model): + if node.op_type == "Mul": + scale = model.get_initializer(node.input[1]) + if scale is not None: + return (scale == 1).all() + elif node.op_type == "Add": + bias = model.get_initializer(node.input[1]) + if bias is not None: + return (bias == 0).all() + return False + + +class RemoveIdentity(Transformation): + """Remove nodes that apply identity ops from the graph, including: + * Multiply by 1 + * Add 0 + .""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if _is_identity(node, model): + node_src = node.input[0] + node_dst = node.output[0] + graph.node.remove(node) + model.rename_tensor(node_dst, node_src) + graph_modified = True + return (model, graph_modified) diff --git a/src/finn/transformation/streamline/__init__.py b/src/finn/transformation/streamline/__init__.py index c9c73fa4c8303ee28bc1cc6aee879d633740e01e..d9c12a20975084705b801c0ff027d4b99aff9490 100644 --- a/src/finn/transformation/streamline/__init__.py +++ b/src/finn/transformation/streamline/__init__.py @@ -41,6 +41,7 @@ from finn.transformation.streamline.absorb import ( FactorOutMulSignMagnitude, Absorb1BitMulIntoMatMul, Absorb1BitMulIntoConv, + AbsorbSignBiasIntoMultiThreshold, ) from finn.transformation.streamline.collapse_repeated import ( @@ -59,6 +60,7 @@ from finn.transformation.streamline.reorder import ( from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds from finn.transformation.streamline.sign_to_thres import ConvertSignToThres from finn.transformation.batchnorm_to_affine import BatchNormToAffine +from finn.transformation.streamline.remove import RemoveIdentityOps class Streamline(Transformation): @@ -70,6 +72,7 @@ class Streamline(Transformation): ConvertDivToMul(), BatchNormToAffine(), ConvertSignToThres(), + AbsorbSignBiasIntoMultiThreshold(), MoveAddPastMul(), MoveScalarAddPastMatMul(), MoveScalarAddPastConv(), @@ -87,6 +90,7 @@ class Streamline(Transformation): ] for trn in streamline_transformations: model = model.transform(trn) + model = model.transform(RemoveIdentityOps()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py index 4266488c7d1b86f2997d4c77d70b80f88bf37442..dc01eea411fc1f640e481c9be02a92acdd59533f 100644 --- a/src/finn/transformation/streamline/absorb.py +++ b/src/finn/transformation/streamline/absorb.py @@ -38,6 +38,70 @@ from finn.transformation.infer_shapes import InferShapes from finn.transformation.infer_datatypes import InferDataTypes +class AbsorbSignBiasIntoMultiThreshold(Transformation): + """Absorb scalar bias originating from signed int export back into + MultiThreshold and re-evaluate the output datatype.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + # search for (MultiThreshold, Add) pair + node_ind += 1 + if ( + n.op_type == "MultiThreshold" + and not model.is_fork_node(n) + and not model.is_join_node(n) + ): + consumer = model.find_consumer(n.output[0]) + if consumer is not None and consumer.op_type == "Add": + mt_node = n + add_node = consumer + threshold_name = mt_node.input[1] + add_weight_name = add_node.input[1] + T = model.get_initializer(threshold_name) + A = model.get_initializer(add_weight_name) + if (A is None) or (T is None): + warnings.warn("Threshold or add bias not constant, skipping") + continue + end_name = add_node.output[0] + # we can only absorb scalar adds + is_scalar = A.ndim == 0 or all(x == 1 for x in A.shape) + if not is_scalar: + continue + bias = A.flatten()[0] + # set MultiThreshold bias property + mt_inst = getCustomOp(mt_node) + bias += mt_inst.get_nodeattr("out_bias") + mt_inst.set_nodeattr("out_bias", bias) + graph_modified = True + # compute new DataType for MultiThreshold output + steps = T.shape[-1] + new_min = bias + new_max = steps + bias + odt = DataType.get_smallest_possible(steps).name.replace( + "UINT", "INT" + ) + odt = DataType[odt] + assert odt.allowed(new_max) and odt.allowed( + new_min + ), """Could + not compute new MultiThreshold DataType (min = %d max = %d)""" % ( + new_min, + new_max, + ) + mt_inst.set_nodeattr("out_dtype", odt.name) + # remove Add node, rewire MultiThreshold + graph.node.remove(add_node) + mt_node.output[0] = end_name + # set datatype + model.set_tensor_datatype(end_name, odt) + if graph_modified: + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class AbsorbAddIntoMultiThreshold(Transformation): """Absorb preceding Add ops into MultiThreshold by updating the threshold values. Only scalar/1D add vectors can be absorbed.""" diff --git a/tests/end2end/test_end2end_cnv_w2a2.py b/tests/end2end/test_end2end_cnv_w2a2.py new file mode 100644 index 0000000000000000000000000000000000000000..31ccebd4c175ad2badef17499bf113d978b637f7 --- /dev/null +++ b/tests/end2end/test_end2end_cnv_w2a2.py @@ -0,0 +1,377 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +import numpy as np + +# as of Feb'20 there is a bug that segfaults ONNX shape inference if we +# import pytorch before onnx, so we make sure to import onnx first +import onnx # NOQA + +import pytest +import pkg_resources as pk +from finn.custom_op.registry import getCustomOp +from finn.core.onnx_exec import execute_onnx +from finn.transformation.double_to_single_float import DoubleToSingleFloat +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.move_reshape import RemoveCNVtoFCFlatten +from finn.transformation.fold_constants import FoldConstants +from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from finn.transformation.streamline import Streamline +from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul +import finn.transformation.streamline.absorb as absorb +from finn.transformation.streamline.reorder import MakeMaxPoolNHWC +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from finn.transformation.fpgadataflow.insert_dwc import InsertDWC +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.replace_verilog_relpaths import ( + ReplaceVerilogRelPaths, +) +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver +from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject +from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject +from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ +from finn.util.basic import pynq_part_map +from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip +from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.core.throughput_test import throughput_test_rtlsim + +build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] +test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") +test_fpga_part = pynq_part_map[test_pynq_board] +target_clk_ns = 10 +mem_mode = "decoupled" + + +def test_end2end_cnv_w2a2_export(): + import brevitas.onnx as bo + + cnv = get_test_model_trained("CNV", 2, 2) + bo.export_finn_onnx( + cnv, (1, 3, 32, 32), build_dir + "/end2end_cnv_w2a2_export.onnx" + ) + + +def test_end2end_cnv_w2a2_import_and_tidy(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_export.onnx") + model = model.transform(DoubleToSingleFloat()) + model = model.transform(InferShapes()) + model = model.transform(FoldConstants()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model.save(build_dir + "/end2end_cnv_w2a2_tidy.onnx") + + +def test_end2end_cnv_w2a2_streamline(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_tidy.onnx") + model = model.transform(Streamline()) + model = model.transform(LowerConvsToMatMul()) + model = model.transform(MakeMaxPoolNHWC()) + model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) + model = model.transform(Streamline()) + model.save(build_dir + "/end2end_cnv_w2a2_streamlined.onnx") + + +def test_end2end_cnv_w2a2_convert_to_hls_layers(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_streamlined.onnx" + ) + model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode)) + model = model.transform(to_hls.InferConvInpGen()) + model = model.transform(to_hls.InferStreamingMaxPool()) + model = model.transform(RemoveCNVtoFCFlatten()) + model.save(build_dir + "/end2end_cnv_w2a2_hls_layers.onnx") + + +def test_end2end_cnv_w2a2_create_dataflow_partition(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_hls_layers.onnx" + ) + parent_model = model.transform(CreateDataflowPartition()) + parent_model.save(build_dir + "/end2end_cnv_w2a2_dataflow_parent.onnx") + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + dataflow_model_filename = sdp_node.get_nodeattr("model") + dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) + dataflow_model.save(build_dir + "/end2end_cnv_w2a2_dataflow_model.onnx") + + +def test_end2end_cnv_w2a2_fold_and_tlastmarker(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_dataflow_model.onnx" + ) + fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch") + # each tuple is (PE, SIMD, in_fifo_depth) for a layer + folding = [ + (8, 3, 256, "auto"), + (16, 16, 256, "auto"), + (8, 16, 256, "auto"), + (8, 16, 256, "block"), + (4, 8, 214, "auto"), + (1, 8, 2, "auto"), + (1, 2, 126, "distributed"), + (2, 2, 62, "block"), + (5, 1, 6, "distributed"), + ] + for fcl, (pe, simd, ififodepth, ramstyle) in zip(fc_layers, folding): + fcl_inst = getCustomOp(fcl) + fcl_inst.set_nodeattr("PE", pe) + fcl_inst.set_nodeattr("SIMD", simd) + fcl_inst.set_nodeattr("inFIFODepth", ififodepth) + fcl_inst.set_nodeattr("ram_style", ramstyle) + + swg_layers = model.get_nodes_by_op_type("ConvolutionInputGenerator") + swg_idepth = [2, 51, 9, 106, 2, 2] + for i in range(len(swg_layers)): + swg_inst = getCustomOp(swg_layers[i]) + simd = folding[i][1] + swg_inst.set_nodeattr("SIMD", simd) + swg_inst.set_nodeattr("inFIFODepth", swg_idepth[i]) + + model = model.transform(InsertDWC()) + model = model.transform(InsertFIFO()) + model = model.transform(InsertTLastMarker()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(AnnotateResources("estimate")) + model.save(build_dir + "/end2end_cnv_w2a2_folded.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_cnv_w2a2_gen_hls_ip(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_folded.onnx") + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(AnnotateResources("hls")) + model.save(build_dir + "/end2end_cnv_w2a2_ipgen.onnx") + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_ip_stitch(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipgen.onnx") + model = model.transform(ReplaceVerilogRelPaths()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + model.save(build_dir + "/end2end_cnv_w2a2_ipstitch.onnx") + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_verify_dataflow_part(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipstitch.onnx") + x = np.zeros((1, 32, 32, 3), dtype=np.float32) + inp_name = model.graph.input[0].name + out_name = model.graph.output[0].name + inp_dict = {inp_name: x} + # cppsim + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + model.save(build_dir + "/end2end_cnv_w2a2_ipgen_cppsim.onnx") + ret_cppsim = execute_onnx(model, inp_dict, True) + res_cppsim = ret_cppsim[out_name] + # node-by-node rtlsim + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + model.save(build_dir + "/end2end_cnv_w2a2_ipgen_nodebynode_rtlsim.onnx") + ret_rtlsim_nodebynode = execute_onnx(model, inp_dict, True) + res_rtlsim_nodebynode = ret_rtlsim_nodebynode[out_name] + # whole-network (ip-stitched) rtlsim + model.set_metadata_prop("exec_mode", "rtlsim") + model.save(build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx") + # this is a particularly long-running test, set liveness thr. to unlimited + os.environ["LIVENESS_THRESHOLD"] = "-1" + ret_rtlsim_whole = execute_onnx(model, inp_dict, True) + res_rtlsim_whole = ret_rtlsim_whole[out_name] + assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all() + assert np.isclose(res_cppsim, res_rtlsim_whole).all() + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_throughput_test_rtlsim(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx" + ) + model.set_metadata_prop("rtlsim_trace", "rtlsim_trace.vcd") + # os.environ["RTLSIM_TRACE_DEPTH"] = "4" + # run through IP-stitched rtlsim with increasing batch sizes and + # check the number of cycles it takes to execute + ret = throughput_test_rtlsim(model, 10) + # TODO check for expected performance + assert ret["cycles"] > 0 + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_verify_all(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + # load one of the test vectors + fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") + input_tensor = np.load(fn)["arr_0"].astype(np.float32) + input_tensor = input_tensor / 255 + assert input_tensor.shape == (1, 3, 32, 32) + x = input_tensor + # x = np.zeros(ishape, dtype=np.float32) + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipgen_cppsim.onnx") + sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w2a2_ipgen_cppsim.onnx") + ret_cppsim = execute_onnx(parent_model, {iname: x}, True) + y_cppsim = ret_cppsim[oname] + # produce results with node-by-node rtlsim + load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_ipgen_nodebynode_rtlsim.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_cnv_w2a2_ipgen_nodebynode_rtlsim.onnx" + ) + ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True) + y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname] + # produce results with whole-network (stitched ip) rtlsim + load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx" + ) + sdp_node.set_nodeattr( + "model", build_dir + "/end2end_cnv_w2a2_ipstitch_whole_rtlsim.onnx" + ) + # this is a particularly long-running test, set liveness thr. to unlimited + os.environ["LIVENESS_THRESHOLD"] = "-1" + ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True) + y_whole_rtlsim = ret_whole_rtlsim[oname] + assert np.isclose(y_golden, y_cppsim).all() + assert np.isclose(y_golden, y_nodebynode_rtlsim).all() + assert np.isclose(y_golden, y_whole_rtlsim).all() + assert np.argmax(y_golden) == 3 + + +@pytest.mark.vivado +def test_end2end_cnv_w2a2_make_pynq_proj(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_ipstitch.onnx") + model = model.transform(MakePYNQProject(test_pynq_board)) + model.save(build_dir + "/end2end_cnv_w2a2_pynq_project.onnx") + + +@pytest.mark.slow +@pytest.mark.vivado +def test_end2end_cnv_w2a2_synth_pynq_project(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_pynq_project.onnx" + ) + model = model.transform(SynthPYNQProject()) + model = model.transform(AnnotateResources("synth")) + model.save(build_dir + "/end2end_cnv_w2a2_synth.onnx") + + +def test_end2end_cnv_w2a2_make_driver(): + model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_synth.onnx") + model = model.transform(MakePYNQDriver()) + model.save(build_dir + "/end2end_cnv_w2a2_pynq_driver.onnx") + + +def test_end2end_cnv_w2a2_deploy_on_pynq(): + model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_pynq_driver.onnx" + ) + try: + ip = os.environ["PYNQ_IP"] # no fault for this one; skip if not defined + if ip == "": + pytest.skip("PYNQ board IP address not specified") + username = os.getenv("PYNQ_USERNAME", "xilinx") + password = os.getenv("PYNQ_PASSWORD", "xilinx") + port = os.getenv("PYNQ_PORT", 22) + target_dir = os.getenv("PYNQ_TARGET_DIR", "/home/xilinx/finn") + model = model.transform(DeployToPYNQ(ip, port, username, password, target_dir)) + # save the model to be able to link it to the parent + model.save(build_dir + "/end2end_cnv_w2a2_pynq_deploy.onnx") + except KeyError: + pytest.skip("PYNQ board IP address not specified") + + +def test_end2end_cnv_w2a2_run_on_pynq(): + # use the streamlined model as the "golden" model for right answers + golden = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_streamlined.onnx" + ) + iname = golden.graph.input[0].name + oname = golden.graph.output[0].name + # load one of the test vectors + fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") + input_tensor = np.load(fn)["arr_0"].astype(np.float32) + input_tensor = input_tensor / 255 + assert input_tensor.shape == (1, 3, 32, 32) + x = input_tensor + # run using FINN-based execution + ret_golden = execute_onnx(golden, {iname: x}, True) + y_golden = ret_golden[oname] + # set up parent+child graph to test + # we'll use models from the previous step as the child model + parent_model = load_test_checkpoint_or_skip( + build_dir + "/end2end_cnv_w2a2_dataflow_parent.onnx" + ) + iname = parent_model.graph.input[0].name + oname = parent_model.graph.output[0].name + try: + ip = os.environ["PYNQ_IP"] # NOQA + if ip == "": + pytest.skip("PYNQ board IP address not specified") + # produce results with cppsim + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node = getCustomOp(sdp_node) + load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w2a2_pynq_deploy.onnx") + sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w2a2_pynq_deploy.onnx") + ret = execute_onnx(parent_model, {iname: x}, True) + y = ret[oname] + assert np.isclose(y, y_golden).all() + assert np.argmax(y) == 3 + + except KeyError: + pytest.skip("PYNQ board IP address not specified") diff --git a/tests/pynq/test_pynq_performance_end2end.py b/tests/pynq/test_pynq_performance_end2end.py index 66a93a190061e0142637be19bb2ea841d192745a..3b6ea86741b8adefce4faaa65b791f1d213cf3ae 100644 --- a/tests/pynq/test_pynq_performance_end2end.py +++ b/tests/pynq/test_pynq_performance_end2end.py @@ -10,7 +10,7 @@ from finn.core.throughput_test import throughput_test build_dir = "/tmp/" + os.environ["FINN_INST_NAME"] -@pytest.mark.parametrize("end2end_example", ["tfc_w1a1", "cnv_w1a1"]) +@pytest.mark.parametrize("end2end_example", ["tfc_w1a1", "cnv_w1a1", "cnv_w2a2"]) @pytest.mark.slow def test_pynq_performance_end2end(end2end_example): model = load_test_checkpoint_or_skip( diff --git a/tests/transformation/streamline/test_streamline_cnv.py b/tests/transformation/streamline/test_streamline_cnv.py index 56dcd26076ec0a5fba6e9be6acac7f5e13572c3d..103967dfb6b86cc6e2ce2bc9ab78249d8945d47d 100644 --- a/tests/transformation/streamline/test_streamline_cnv.py +++ b/tests/transformation/streamline/test_streamline_cnv.py @@ -44,9 +44,9 @@ from finn.transformation.double_to_single_float import DoubleToSingleFloat export_onnx_path = make_build_dir("test_streamline_cnv_") # act bits -@pytest.mark.parametrize("abits", [1]) +@pytest.mark.parametrize("abits", [1, 2]) # weight bits -@pytest.mark.parametrize("wbits", [1]) +@pytest.mark.parametrize("wbits", [1, 2]) # network topology / size @pytest.mark.parametrize("size", ["CNV"]) def test_streamline_cnv(size, wbits, abits): @@ -74,6 +74,7 @@ def test_streamline_cnv(size, wbits, abits): # model.save("orig_cnv.onnx") model = model.transform(Streamline()) # model.save("streamlined_cnv.onnx") + assert len(model.graph.node) == 23 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all()