diff --git a/Dockerfile b/Dockerfile index 8c97a3ad9089bcc858134a51ac189e4105a98ed9..161ceb5ace3025e56b335064a8d1653c9ba26aba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,6 @@ ENV PYTHONPATH "${PYTHONPATH}:/workspace/brevitas_cnv_lfc/training_scripts" ENV PYTHONPATH "${PYTHONPATH}:/workspace/brevitas" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" ENV PYNQSHELL_PATH "/workspace/PYNQ-HelloWorld/boards" -ENV PYNQ_BOARD "Pynq-Z1" ARG GID ARG GNAME diff --git a/run-docker.sh b/run-docker.sh index 77441ed955c8a055ac57a7328f2998f8855c20e9..aadc4c78717d85bde3bdf3dcedd48824f5ba483b 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -17,8 +17,11 @@ DOCKER_TAG="finn_${DOCKER_UNAME}" # uncomment to run multiple instances with different names # DOCKER_INST_NAME="finn_${DOCKER_UNAME}_${DOCKER_RND}" DOCKER_INST_NAME="finn_${DOCKER_UNAME}" +# the settings below will be taken from environment variables if available, +# otherwise the defaults below will be used : ${JUPYTER_PORT=8888} : ${NETRON_PORT=8081} +: ${PYNQ_BOARD="Pynq-Z1"} # Absolute path to this script, e.g. /home/user/bin/foo.sh SCRIPT=$(readlink -f "$0") @@ -40,7 +43,7 @@ PYVERILATOR_LOCAL=$SCRIPTPATH/pyverilator PYNQSHELL_LOCAL=$SCRIPTPATH/PYNQ-HelloWorld BUILD_LOCAL=/tmp/$DOCKER_INST_NAME VIVADO_HLS_LOCAL=$VIVADO_PATH -: ${VIVADO_IP_CACHE=$BUILD_LOCAL/vivado_ip_cache} +VIVADO_IP_CACHE=$BUILD_LOCAL/vivado_ip_cache # clone dependency repos git clone --branch feature/finn_onnx_export $BREVITAS_REPO $BREVITAS_LOCAL || git -C "$BREVITAS_LOCAL" pull @@ -67,6 +70,7 @@ echo "Mounting $VIVADO_PATH into $VIVADO_PATH" echo "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT" echo "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT" echo "Vivado IP cache dir is at $VIVADO_IP_CACHE" +echo "Using default PYNQ board $PYNQ_BOARD" if [ "$1" = "test" ]; then echo "Running test suite" @@ -107,6 +111,7 @@ docker run -t --rm --name $DOCKER_INST_NAME -it \ -e FINN_INST_NAME=$DOCKER_INST_NAME \ -e FINN_ROOT="/workspace/finn" \ -e VIVADO_IP_CACHE="$VIVADO_IP_CACHE" \ +-e PYNQ_BOARD=$PYNQ_BOARD \ -p $JUPYTER_PORT:$JUPYTER_PORT \ -p $NETRON_PORT:$NETRON_PORT \ $DOCKER_TAG bash -c "$DOCKER_CMD" diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index 8a273367023829fed1581261ac35dcea4b9d1738..fe710f0be1bd0d8783e13352812eeef5ebd2c332 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -41,6 +41,9 @@ class StreamingFCLayer_Batch(HLSCustomOp): "binaryXnorMode": ("i", False, 0), # no-activation mode (produce accumulators) "noActivation": ("i", False, 0), + # input and output FIFO depths + "inFIFODepth": ("i", False, 0), + "outFIFODepth": ("i", False, 0), } my_attrs.update(super().get_nodeattr_types()) return my_attrs @@ -99,6 +102,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): info_messages.append('Attribute backend should be set to "fpgadataflow"') # verify that all necessary attributes exist + # TODO collect automatically from get_nodeattr_types try: self.get_nodeattr("code_gen_dir_npysim") self.get_nodeattr("executable_path") @@ -161,6 +165,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): Y. Umuroglu, M. Leeser and K. Vissers - 12. Sep 2018 """ + # TODO add in/out FIFO contributions P = self.get_nodeattr("PE") Q = self.get_nodeattr("SIMD") wdt = self.get_weight_datatype() @@ -178,6 +183,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): Y. Umuroglu, M. Leeser and K. Vissers - 12. Sep 2018 """ + # TODO add in/out FIFO contributions P = self.get_nodeattr("PE") Q = self.get_nodeattr("SIMD") wdt = self.get_weight_datatype() @@ -642,6 +648,17 @@ class StreamingFCLayer_Batch(HLSCustomOp): def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out") + in_fifo_depth = self.get_nodeattr("inFIFODepth") + out_fifo_depth = self.get_nodeattr("outFIFODepth") + # insert depth pragmas only if specified + if in_fifo_depth != 0: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS stream depth=%d variable=in0" % in_fifo_depth + ) + if out_fifo_depth != 0: + self.code_gen_dict["$PRAGMAS$"].append( + "#pragma HLS stream depth=%d variable=out" % out_fifo_depth + ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py index a613d0622ee95e7f1ca848142e2930cf6d3c91bd..ba45e01bf3ecee457e9788e5dbea4cd1c3ee0007 100644 --- a/src/finn/transformation/fpgadataflow/templates.py +++ b/src/finn/transformation/fpgadataflow/templates.py @@ -103,7 +103,9 @@ dma.sendchannel.wait() dma.recvchannel.wait() # unpack the packed output buffer from accelerator -obuf_folded = packed_bytearray_to_finnpy(obuf_packed, odt, oshape_folded) +obuf_folded = packed_bytearray_to_finnpy( + obuf_packed, odt, oshape_folded, reverse_endian=True +) # convert to normal reshape and save obuf_normal = obuf_folded.reshape(oshape_normal) np.save("output.npy", obuf_normal) diff --git a/src/finn/util/data_packing.py b/src/finn/util/data_packing.py index 58c62219287940eb6533d2513e66d2c9c33cfb01..2bdb992948a93cd75cc4b15f1ec0a8e8b4a3b372 100644 --- a/src/finn/util/data_packing.py +++ b/src/finn/util/data_packing.py @@ -152,7 +152,7 @@ def unpack_innermost_dim_from_hex_string( # interpret values as bipolar if dtype == DataType.BIPOLAR: ar_list = [2 * x - 1 for x in ar_list] - # interpret values as signed values + # interpret values as signed values elif dtype.name.startswith("INT"): mask = 2 ** (dtype.bitwidth() - 1) ar_list = [-(x & mask) + (x & ~mask) for x in ar_list] @@ -277,7 +277,13 @@ def finnpy_to_packed_bytearray(ndarray, dtype): return np.apply_along_axis(fn, packed_hexstring.ndim - 1, packed_hexstring) -def packed_bytearray_to_finnpy(packed_bytearray, dtype, output_shape=None, reverse_inner=False): +def packed_bytearray_to_finnpy( + packed_bytearray, + dtype, + output_shape=None, + reverse_inner=False, + reverse_endian=False, +): """Given a packed numpy uint8 ndarray, unpack it into a FINN array of given DataType. output_shape can be specified to remove padding from the packed dimension, or set to None to be inferred from the input.""" @@ -296,10 +302,20 @@ def packed_bytearray_to_finnpy(packed_bytearray, dtype, output_shape=None, rever assert packed_bits % target_bits == 0 n_target_elems = packed_bits // target_bits output_shape = packed_bytearray.shape[:-1] + (n_target_elems,) + if reverse_endian and target_bits > 8: + # revse the endianness of each element + orig_shape = packed_bytearray.shape + assert target_bits % 8 == 0 + target_bytes = target_bits // 8 + new_shape = orig_shape[:-1] + (-1, target_bytes) + packed_bytearray = np.flip(packed_bytearray.reshape(new_shape), axis=-1) + packed_bytearray = packed_bytearray.reshape(orig_shape) # convert innermost dim of byte array to hex strings packed_hexstring = np.apply_along_axis( npbytearray2hexstring, packed_dim, packed_bytearray ) - ret = unpack_innermost_dim_from_hex_string(packed_hexstring, dtype, output_shape, reverse_inner) + ret = unpack_innermost_dim_from_hex_string( + packed_hexstring, dtype, output_shape, reverse_inner + ) return ret diff --git a/tests/fpgadataflow/test_data_packing.py b/tests/fpgadataflow/test_data_packing.py index 3616219ef0e1046e7ef1a6daf3c1bfb6528a21cc..2c175953e6e3d7e3fc3f89cf1249575a10ea1fc0 100644 --- a/tests/fpgadataflow/test_data_packing.py +++ b/tests/fpgadataflow/test_data_packing.py @@ -1,5 +1,5 @@ -import shutil import os +import shutil import subprocess import numpy as np @@ -61,7 +61,9 @@ def make_npy2apintstream_testcase(ndarray, dtype): cmd_compile = """ g++ -o test_npy2apintstream test.cpp /workspace/cnpy/cnpy.cpp \ -I/workspace/cnpy/ -I{}/include -I/workspace/finn/src/finn/data/cpp \ ---std=c++11 -lz""".format(os.environ["VIVADO_PATH"]) +--std=c++11 -lz""".format( + os.environ["VIVADO_PATH"] + ) with open(test_dir + "/compile.sh", "w") as f: f.write(cmd_compile) compile = subprocess.Popen( @@ -191,3 +193,13 @@ def test_packed_bytearray_to_finnpy(): eE = np.asarray(eE, dtype=np.float32) shapeE = eE.shape assert (packed_bytearray_to_finnpy(E, DataType.INT32, shapeE) == eE).all() + F = np.asarray( + [[252, 255, 255, 255, 0, 0, 0, 0, 252, 255, 255, 255, 252, 255, 255, 255]], + dtype=np.uint8, + ) + eF = [[-4, 0, -4, -4]] + eF = np.asarray(eE, dtype=np.float32) + shapeF = eF.shape + assert ( + packed_bytearray_to_finnpy(F, DataType.INT32, shapeF, reverse_endian=True) == eF + ).all() diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py index f27d06b312981b810d4e1d7f9b6dcbe79ea56ccc..15b74b2b7e7a9bb32d5cb963cfddb6a02c4f5e3f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py @@ -11,6 +11,7 @@ from finn.custom_op.registry import getCustomOp from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen +from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject @@ -27,7 +28,7 @@ from finn.util.basic import ( test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1") test_fpga_part = pynq_part_map[test_pynq_board] -ip_stitch_model_dir = make_build_dir("test_fpgadataflow_ipstitch") +ip_stitch_model_dir = make_build_dir("test_fpgadataflow_ipstitch_") def create_one_fc_model(): @@ -40,10 +41,11 @@ def create_one_fc_model(): no_act = 1 binary_xnor_mode = 0 actval = 0 + simd = 2 + pe = 2 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, m]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m]) - outp_tlast = helper.make_tensor_value_info("outp_tlast", TensorProto.FLOAT, [1, m]) fc0 = helper.make_node( "StreamingFCLayer_Batch", @@ -54,8 +56,8 @@ def create_one_fc_model(): resType="ap_resource_lut()", MW=m, MH=m, - SIMD=m, - PE=m // 2, + SIMD=simd, + PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, @@ -64,23 +66,8 @@ def create_one_fc_model(): noActivation=no_act, ) - tlastmarker = helper.make_node( - "TLastMarker", - ["outp"], - ["outp_tlast"], - domain="finn", - backend="fpgadataflow", - NumIters=2, - ElemWidth=odt.bitwidth(), - StreamWidth=odt.bitwidth() * m, - ) - graph = helper.make_graph( - nodes=[fc0, tlastmarker], - name="fclayer_graph", - inputs=[inp], - outputs=[outp_tlast], - value_info=[outp], + nodes=[fc0], name="fclayer_graph", inputs=[inp], outputs=[outp], ) model = helper.make_model(graph, producer_name="fclayer-model") @@ -88,7 +75,6 @@ def create_one_fc_model(): model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", odt) - model.set_tensor_datatype("outp_tlast", odt) model.set_tensor_datatype("w0", wdt) # generate weights @@ -110,11 +96,12 @@ def create_two_fc_model(): actval = odt.min() no_act = 0 binary_xnor_mode = 0 + pe = 2 + simd = 2 inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, m]) mid = helper.make_tensor_value_info("mid", TensorProto.FLOAT, [1, m]) outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, m]) - outp_tlast = helper.make_tensor_value_info("outp_tlast", TensorProto.FLOAT, [1, m]) fc0 = helper.make_node( "StreamingFCLayer_Batch", @@ -125,8 +112,8 @@ def create_two_fc_model(): resType="ap_resource_lut()", MW=m, MH=m, - SIMD=1, - PE=1, + SIMD=simd, + PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, @@ -144,8 +131,8 @@ def create_two_fc_model(): resType="ap_resource_lut()", MW=m, MH=m, - SIMD=1, - PE=1, + SIMD=simd, + PE=pe, inputDataType=idt.name, weightDataType=wdt.name, outputDataType=odt.name, @@ -154,23 +141,12 @@ def create_two_fc_model(): noActivation=no_act, ) - tlastmarker = helper.make_node( - "TLastMarker", - ["outp"], - ["outp_tlast"], - domain="finn", - backend="fpgadataflow", - NumIters=m, - StreamWidth=2, - ElemWidth=odt.bitwidth(), - ) - graph = helper.make_graph( - nodes=[fc0, fc1, tlastmarker], + nodes=[fc0, fc1], name="fclayer_graph", inputs=[inp], - outputs=[outp_tlast], - value_info=[mid, outp], + outputs=[outp], + value_info=[mid], ) model = helper.make_model(graph, producer_name="fclayer-model") @@ -179,7 +155,6 @@ def create_two_fc_model(): model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("mid", idt) model.set_tensor_datatype("outp", odt) - model.set_tensor_datatype("outp_tlast", odt) model.set_tensor_datatype("w0", wdt) model.set_tensor_datatype("w1", wdt) @@ -213,14 +188,13 @@ def test_fpgadataflow_ipstitch_gen_model(): assert sdp_node.__class__.__name__ == "StreamingDataflowPartition" assert os.path.isfile(sdp_node.get_nodeattr("model")) model = ModelWrapper(sdp_node.get_nodeattr("model")) - + model = model.transform(InsertTLastMarker()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(CodeGen_ipgen(test_fpga_part, 5)) model = model.transform(HLSSynth_IPGen()) assert model.graph.node[0].op_type == "StreamingFCLayer_Batch" # assert model.graph.node[1].op_type == "StreamingFCLayer_Batch" - assert model.graph.node[1].op_type == "TLastMarker" - + assert model.graph.node[-1].op_type == "TLastMarker" model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx") diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py index 32b1c60fc714794e39fe1ade2d0252895bb33025..a7a096d6a69de743a318c0a514a2f24da5d7a29f 100644 --- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py +++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py @@ -7,6 +7,7 @@ from finn.core.modelwrapper import ModelWrapper from finn.transformation.fpgadataflow.cleanup import CleanUp from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim from finn.transformation.fpgadataflow.compile import Compile +from finn.transformation.fpgadataflow.set_sim_mode import SetSimMode def test_layer_streaming_maxpool_batch(): @@ -112,6 +113,7 @@ def test_layer_streaming_maxpool_batch(): ).reshape(2, 2, 4, 4) print(input_tensor) + model = model.transform(SetSimMode("npysim")) model = model.transform(CodeGen_npysim()) model = model.transform(Compile())