diff --git a/docs/img/finn-examples-header.png b/docs/img/finn-examples-header.png new file mode 100644 index 0000000000000000000000000000000000000000..50f8fa7761e10a958ed3567f268ef675cf1814f7 Binary files /dev/null and b/docs/img/finn-examples-header.png differ diff --git a/docs/img/imagenet.jpg b/docs/img/imagenet.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5cdd5aa303d9add5fbe6270936da0e152eca0135 Binary files /dev/null and b/docs/img/imagenet.jpg differ diff --git a/finn-rtllib/memstream/hdl/axilite_if.v b/finn-rtllib/memstream/hdl/axilite_if.v index 93b2227de1b51d4fca145e8b61e6ed6dc2ed3121..bdd4de288ed3a5de859cbb20c3157d7f21f8239c 100644 --- a/finn-rtllib/memstream/hdl/axilite_if.v +++ b/finn-rtllib/memstream/hdl/axilite_if.v @@ -127,7 +127,7 @@ always @(posedge aclk or negedge aresetn) always @(*) begin internal_waddr = awaddr >> $clog2(DATA_WIDTH/8); internal_wdata = wdata; - internal_wen = (state == STATE_IDLE) & awvalid & wvalid; + internal_wen = (state == STATE_IDLE) & awvalid & wvalid; end always @(posedge aclk) begin @@ -208,4 +208,3 @@ always @(posedge aclk or negedge aresetn) end endmodule - diff --git a/finn-rtllib/memstream/hdl/memstream_singleblock.v b/finn-rtllib/memstream/hdl/memstream_singleblock.v index 54ee56764e187520997e03bdcb291b4183e6ecf0..6bb3a97115325d81d4292c5af3c33921c2680a30 100644 --- a/finn-rtllib/memstream/hdl/memstream_singleblock.v +++ b/finn-rtllib/memstream/hdl/memstream_singleblock.v @@ -98,7 +98,7 @@ wire strm1_incr_en; assign strm0_incr_en = m_axis_0_tready | ~m_axis_0_tvalid; assign strm1_incr_en = m_axis_1_tready | ~m_axis_1_tvalid; -reg rack_shift[1:0]; +reg rack_shift[1:0]; generate if(MEM_DEPTH > 1) begin: use_ram diff --git a/finn-rtllib/memstream/sim/tb_memstream_writes.v b/finn-rtllib/memstream/sim/tb_memstream_writes.v index 867acfe813280cc3c9a473fb2a7e6bc9d7c05b23..a6ac747e967e594ac010f25a2827ebf7a7fcaa0f 100644 --- a/finn-rtllib/memstream/sim/tb_memstream_writes.v +++ b/finn-rtllib/memstream/sim/tb_memstream_writes.v @@ -179,7 +179,6 @@ task axi_read; data = data | (rdata<<(32*j)); end join - @(posedge clk); end end @@ -270,7 +269,6 @@ memstream MEM_WIDTH, ".", "auto", - //widths per stream STRM0_WIDTH, STRM1_WIDTH, @@ -278,7 +276,6 @@ memstream STRM3_WIDTH, STRM4_WIDTH, STRM5_WIDTH, - //depths per stream STRM0_DEPTH, STRM1_DEPTH, @@ -286,7 +283,6 @@ memstream STRM3_DEPTH, STRM4_DEPTH, STRM5_DEPTH, - //offsets for each stream STRM0_OFFSET, STRM1_OFFSET, @@ -332,32 +328,26 @@ dut m_axis_0_tready, m_axis_0_tvalid, m_axis_0_tdata, - m_axis_1_afull, m_axis_1_tready, m_axis_1_tvalid, m_axis_1_tdata, - m_axis_2_afull, m_axis_2_tready, m_axis_2_tvalid, m_axis_2_tdata, - m_axis_3_afull, m_axis_3_tready, m_axis_3_tvalid, m_axis_3_tdata, - m_axis_4_afull, m_axis_4_tready, m_axis_4_tvalid, m_axis_4_tdata, - m_axis_5_afull, m_axis_5_tready, m_axis_5_tvalid, m_axis_5_tdata - ); @@ -406,7 +396,6 @@ initial begin end end end - //check stream 2 begin $display("Starting stream 2 checker"); diff --git a/finn-rtllib/memstream/sim/test.sh b/finn-rtllib/memstream/sim/test.sh index 3348e64b715ccbba17a38ac3bdf2c2c4173c3956..7cb0497d261ac41a763bad8e58afabb204887d39 100755 --- a/finn-rtllib/memstream/sim/test.sh +++ b/finn-rtllib/memstream/sim/test.sh @@ -30,4 +30,3 @@ iverilog ../hdl/*.v tb_memstream_writes.v -o sim ./sim - diff --git a/finn-rtllib/memstream/xgui/memstream_v1_0.tcl b/finn-rtllib/memstream/xgui/memstream_v1_0.tcl index b8be5e0a2f5c960cc5cb47ff9b348efffad98762..87565bc5613ce783d6a8067e8323d2358adb8061 100644 --- a/finn-rtllib/memstream/xgui/memstream_v1_0.tcl +++ b/finn-rtllib/memstream/xgui/memstream_v1_0.tcl @@ -38,7 +38,6 @@ proc init_gui { IPINST } { proc update_PARAM_VALUE.AXILITE_ADDR_WIDTH { PARAM_VALUE.AXILITE_ADDR_WIDTH PARAM_VALUE.MEM_DEPTH PARAM_VALUE.MEM_WIDTH } { # Procedure called to update AXILITE_ADDR_WIDTH when any of the dependent parameters in the arguments change - set AXILITE_ADDR_WIDTH ${PARAM_VALUE.AXILITE_ADDR_WIDTH} set MEM_DEPTH ${PARAM_VALUE.MEM_DEPTH} set MEM_WIDTH ${PARAM_VALUE.MEM_WIDTH} @@ -393,4 +392,3 @@ proc update_MODELPARAM_VALUE.AXILITE_ADDR_WIDTH { MODELPARAM_VALUE.AXILITE_ADDR_ # Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value set_property value [get_property value ${PARAM_VALUE.AXILITE_ADDR_WIDTH}] ${MODELPARAM_VALUE.AXILITE_ADDR_WIDTH} } - diff --git a/src/finn/analysis/fpgadataflow/floorplan_params.py b/src/finn/analysis/fpgadataflow/floorplan_params.py index 8ea68eda3a8050d7068630083a61f7622be619c4..4c8cbf53de1ae7dc951911678a3f118bd3506dfe 100644 --- a/src/finn/analysis/fpgadataflow/floorplan_params.py +++ b/src/finn/analysis/fpgadataflow/floorplan_params.py @@ -36,7 +36,12 @@ def floorplan_params(model): Returns {node name : {slr, device id, partition id, memory port}}.""" ret_dict = { - "Defaults": {"slr": [-1, ["all"]], "partition_id": [0, ["all"]], "device_id": [0, ["all"]], "mem_port": ["", ["all"]]} + "Defaults": { + "slr": [-1, ["all"]], + "partition_id": [0, ["all"]], + "device_id": [0, ["all"]], + "mem_port": ["", ["all"]], + } } for node in model.graph.node: if is_fpgadataflow_node(node) is True: diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py index 2c714b1f12b75e9789f1865d6737422f4d9d9a97..31cfeb76a6d4f411808af5dcd265e4f07352ae02 100644 --- a/src/finn/analysis/fpgadataflow/res_estimation.py +++ b/src/finn/analysis/fpgadataflow/res_estimation.py @@ -61,7 +61,10 @@ def res_estimation_complete(model): if is_fpgadataflow_node(node) is True: op_type = node.op_type inst = registry.getCustomOp(node) - if op_type == "StreamingFCLayer_Batch" or op_type == "Vector_Vector_Activate_Batch": + if ( + op_type == "StreamingFCLayer_Batch" + or op_type == "Vector_Vector_Activate_Batch" + ): orig_restype = inst.get_nodeattr("resType") res_dict[node.name] = [] inst.set_nodeattr("resType", "dsp") diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py index 32b21cda55a3c9fb08cf2bd18155d1cdd5f140f8..c6bedd466e31efb622640cbd203d344ff9b3d88f 100644 --- a/src/finn/transformation/fpgadataflow/floorplan.py +++ b/src/finn/transformation/fpgadataflow/floorplan.py @@ -87,7 +87,6 @@ class Floorplan(Transformation): narrow_neighbour = model.find_consumer(node.output[0]) else: narrow_neighbour = model.find_producer(node.input[0]) - node_slr = getCustomOp(narrow_neighbour).get_nodeattr("slr") node_inst.set_nodeattr("slr", node_slr) if node.op_type == "StreamingFIFO": @@ -98,7 +97,6 @@ class Floorplan(Transformation): node_slr = getCustomOp(srcnode).get_nodeattr("slr") node_inst.set_nodeattr("slr", node_slr) - if unassigned_nodes > 0: warnings.warn( str(unassigned_nodes) @@ -106,7 +104,6 @@ class Floorplan(Transformation): + "and no default value was set" ) - # partition id generation partition_cnt = 0 diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py index 7f7e3f3abd8df3e489816a2175d62393c2a37b21..fe3a1db8a476e33bfc0d76996917fab9ae6ed98b 100644 --- a/tests/fpgadataflow/test_set_folding.py +++ b/tests/fpgadataflow/test_set_folding.py @@ -28,8 +28,6 @@ import pytest import numpy as np -import math -import random from onnx import TensorProto, helper from finn.custom_op.registry import getCustomOp @@ -43,45 +41,53 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import ( ) from finn.util.test import load_test_checkpoint_or_skip + def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes): - W = np.random.randint(wdt.min(), wdt.max()+1, size=(ch, ch)) + W = np.random.randint(wdt.min(), wdt.max() + 1, size=(ch, ch)) W = W.astype(np.float32) - T = np.random.randint(tdt.min(), tdt.max()+1, size=(ch, 2**adt.bitwidth()-1)) + T = np.random.randint(tdt.min(), tdt.max() + 1, size=(ch, 2 ** adt.bitwidth() - 1)) T = T.astype(np.float32) tensors = [] tensors.append(helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ch])) for i in range(1, nnodes): - inter = helper.make_tensor_value_info("inter_"+str(i), TensorProto.FLOAT, [1, ch]) + inter = helper.make_tensor_value_info( + "inter_" + str(i), TensorProto.FLOAT, [1, ch] + ) tensors.append(inter) tensors.append(helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ch])) - + FCLayer_nodes = [] for i in range(nnodes): pe = 1 simd = 1 - FCLayer_nodes += [helper.make_node( - "StreamingFCLayer_Batch", - [tensors[i].name, "weights_"+str(i), "thresh_"+str(i)], - [tensors[i+1].name], - domain="finn.custom_op.fpgadataflow", - backend="fpgadataflow", - MW=ch, - MH=ch, - SIMD=simd, - PE=pe, - inputDataType=adt.name, - weightDataType=wdt.name, - outputDataType=adt.name, - ActVal=0, - binaryXnorMode=0, - noActivation=0, - )] + FCLayer_nodes += [ + helper.make_node( + "StreamingFCLayer_Batch", + [tensors[i].name, "weights_" + str(i), "thresh_" + str(i)], + [tensors[i + 1].name], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + MW=ch, + MH=ch, + SIMD=simd, + PE=pe, + inputDataType=adt.name, + weightDataType=wdt.name, + outputDataType=adt.name, + ActVal=0, + binaryXnorMode=0, + noActivation=0, + ) + ] graph = helper.make_graph( - nodes=FCLayer_nodes, name="fclayer_graph", inputs=[tensors[0]], outputs=[tensors[-1]] + nodes=FCLayer_nodes, + name="fclayer_graph", + inputs=[tensors[0]], + outputs=[tensors[-1]], ) model = helper.make_model(graph, producer_name="fclayer-model") @@ -89,24 +95,27 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes): model.set_tensor_datatype("inp", adt) model.set_tensor_datatype("outp", adt) - - for i in range(1, nnodes+1): + + for i in range(1, nnodes + 1): model.graph.value_info.append(tensors[i]) - model.set_initializer("weights_"+str(i-1), W) - model.set_initializer("thresh_"+str(i-1), T) - model.set_tensor_datatype("weights_"+str(i-1), wdt) - model.set_tensor_datatype("thresh_"+str(i-1), tdt) + model.set_initializer("weights_" + str(i - 1), W) + model.set_initializer("thresh_" + str(i - 1), T) + model.set_tensor_datatype("weights_" + str(i - 1), wdt) + model.set_tensor_datatype("thresh_" + str(i - 1), tdt) return model + # desired frames per second @pytest.mark.parametrize("target_fps", [30, 10 ** 5, 10 ** 7]) # target chip or board @pytest.mark.parametrize("platform", ["Pynq-Z1", "Ultra96", "U200"]) def test_set_folding(target_fps, platform): - model = make_multi_fclayer_model(128, DataType.INT4, DataType.INT2, DataType.INT16, 5) - + model = make_multi_fclayer_model( + 128, DataType.INT4, DataType.INT2, DataType.INT16, 5 + ) + model = model.transform(GiveUniqueNodeNames()) parent_model = model.transform(CreateDataflowPartition()) sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] @@ -125,7 +134,6 @@ def test_set_folding(target_fps, platform): min_cycles["Pynq-Z1"] = 128 min_cycles["Ultra96"] = 64 min_cycles["U200"] = 1 - assert achieved_cycles_per_frame <= max( min_cycles[platform], target_cycles_per_frame