diff --git a/.github/workflows/quicktest-dev-pr.yml b/.github/workflows/quicktest-dev-pr.yml index 924fbd24a174df49af4b3e259ad57d0a7907d42b..0233a81ba06dc701a3a4579b9a5bd3ce17e47d04 100644 --- a/.github/workflows/quicktest-dev-pr.yml +++ b/.github/workflows/quicktest-dev-pr.yml @@ -5,7 +5,7 @@ on: branches: [ dev ] push: branches: [ dev ] - + jobs: @@ -18,6 +18,6 @@ jobs: uses: actions/checkout@v2 - name: DockerRunQuicktest - env: - NUM_DEFAULT_WORKERS: 4 - run: sh run-docker.sh quicktest + run: | + docker build -t finn_gha -f docker/Dockerfile.finn_ci --build-arg BUILD_PATH=/tmp/finn_gha . + docker run --init --hostname finn_gha -v $(pwd):/workspace/finn -e FINN_INST_NAME=finn_gha finn_gha quicktest.sh diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci index d06ff8521555ccd6d09383cab039850f1565fc61..7d5772d9f5118d1f1238dd14a6b57a1b4fd5004d 100644 --- a/docker/Dockerfile.finn_ci +++ b/docker/Dockerfile.finn_ci @@ -30,7 +30,6 @@ FROM pytorch/pytorch:1.1.0-cuda10.0-cudnn7.5-devel MAINTAINER Yaman Umuroglu <yamanu@xilinx.com> ARG PYTHON_VERSION=3.6 ARG BUILD_PATH -ARG FINN_CI_BRANCH WORKDIR /workspace @@ -55,10 +54,9 @@ RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-He # oh-my-xilinx RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx -# checkout desired FINN branch for testing -RUN git clone --branch $FINN_CI_BRANCH https://github.com/Xilinx/finn /workspace/finn - -RUN pip install -r /workspace/finn/requirements.txt +COPY requirements.txt . +RUN pip install -r requirements.txt +RUN rm requirements.txt RUN apt update; apt install nano RUN pip install pytest-dependency RUN pip install pytest-xdist @@ -78,8 +76,8 @@ RUN mkdir -p $VIVADO_IP_CACHE WORKDIR /workspace/finn -COPY finn_entrypoint.sh /usr/local/bin/ -COPY quicktest.sh /usr/local/bin/ +COPY docker/finn_entrypoint.sh /usr/local/bin/ +COPY docker/quicktest.sh /usr/local/bin/ RUN chmod 755 /usr/local/bin/finn_entrypoint.sh RUN chmod 755 /usr/local/bin/quicktest.sh ENTRYPOINT ["finn_entrypoint.sh"] diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev index f8919d7498e0e8ef08a52d1da0782988b56d6df4..8c1502eb4a1941061bd58e6f9a18106f98f259e2 100644 --- a/docker/Dockerfile.finn_dev +++ b/docker/Dockerfile.finn_dev @@ -50,7 +50,6 @@ COPY requirements.txt . RUN pip install -r requirements.txt RUN rm requirements.txt RUN pip install jupyter -RUN pip install netron RUN pip install matplotlib RUN pip install pytest-dependency RUN pip install sphinx @@ -81,13 +80,26 @@ RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld # oh-my-xilinx RUN git clone https://bitbucket.org/maltanar/oh-my-xilinx.git /workspace/oh-my-xilinx +# netron +RUN git clone https://github.com/lutzroeder/netron.git /workspace/netron + +# build and install netron +USER root +RUN curl -sL https://deb.nodesource.com/setup_12.x | bash - +RUN apt-get install -y nodejs +WORKDIR /workspace/netron +RUN git checkout 376e9d33733a3eacfe3c432808fd46e6cd1460cb +RUN npm install +RUN python setup.py build +RUN pip install /workspace/netron +USER $UNAME # for this developer-oriented Docker container we assume the FINN repo is cloned and mounted from the host # at /workspace/finn -- see run-docker.sh for an example of how to do this. ENV PYTHONPATH "${PYTHONPATH}:/workspace/finn/src" ENV PYTHONPATH "${PYTHONPATH}:/workspace/pyverilator" ENV PYNQSHELL_PATH "/workspace/PYNQ-HelloWorld/boards" -ENV PATH "${PATH}:/workspace/oh-my-xilinx" +ENV PATH "${PATH}:/workspace/oh-my-xilinx:/home/$UNAME/.local/bin" ENV OHMYXILINX "/workspace/oh-my-xilinx" WORKDIR /home/$UNAME/finn diff --git a/docker/Jenkinsfile b/docker/Jenkinsfile index 2215bc79cc7b2c20036d882fdc654fbe8721cab6..b2d3102bd4aa3c00620f41c102af5a8b385cede7 100644 --- a/docker/Jenkinsfile +++ b/docker/Jenkinsfile @@ -15,11 +15,13 @@ pipeline { string(name: 'DOCKER_CMD_RTLSIM', defaultValue: """python setup.py test --addopts "-k rtlsim --workers auto" """, description: 'rtlsim test command') // end2end tests: no parallel testing, use NUM_DEFAULT_WORKERS for parallel transformations string(name: 'DOCKER_CMD_END2END', defaultValue: """python setup.py test --addopts "-k end2end" """, description: 'end2end test command') + // allow specifying where to mount the cloned folder from, since Jenkins and FINN may be running in separate containers + string(name: 'WORKSPACE_MOUNT', defaultValue: '/var/jenkins_home/workspace/finn', description: 'Path to Jenkins workspace mount') } environment { DOCKER_TAG='finn_ci:$BUILD_ID' - DOCKER_INST_NAME='finn_ci_$BUILD_ID' - BUILD_PATH='/tmp/finn_ci_$BUILD_ID' + DOCKER_INST_NAME='finn_ci' + BUILD_PATH='/tmp/finn_ci' } stages { stage("Clone") { @@ -32,17 +34,17 @@ pipeline { sh """ docker build -t $DOCKER_TAG -f docker/Dockerfile.finn_ci \ --build-arg BUILD_PATH=$BUILD_PATH \ - --build-arg FINN_CI_BRANCH=${params.FINN_CI_BRANCH} \ - docker/ + . """ } } stage('test-main') { steps { - catchError { + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { sh """ docker run --init \ --hostname $DOCKER_INST_NAME \ + -v ${params.WORKSPACE_MOUNT}:/workspace/finn \ -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ -e NUM_DEFAULT_WORKERS=1 \ -e FINN_INST_NAME=$DOCKER_INST_NAME \ @@ -58,10 +60,11 @@ pipeline { } stage('test-rtlsim') { steps { - catchError { + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { sh """ docker run --init \ --hostname $DOCKER_INST_NAME \ + -v ${params.WORKSPACE_MOUNT}:/workspace/finn \ -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ -e NUM_DEFAULT_WORKERS=1 \ -e FINN_INST_NAME=$DOCKER_INST_NAME \ @@ -77,10 +80,11 @@ pipeline { } stage('test-end2end') { steps { - catchError { + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { sh """ docker run --init \ --hostname $DOCKER_INST_NAME \ + -v ${params.WORKSPACE_MOUNT}:/workspace/finn \ -v ${params.VIVADO_PATH}:${params.VIVADO_PATH}:ro \ -e NUM_DEFAULT_WORKERS=${params.NUM_DEFAULT_WORKERS} \ -e FINN_INST_NAME=$DOCKER_INST_NAME \ diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index f701952d5d64e5d9b95aa59170b492fe7722ae02..ee75089c657e4fad1e4a455ac7bd5fe4976e5d4c 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -1,6 +1,5 @@ #!/bin/bash -export XILINX_VIVADO=$VIVADO_PATH export SHELL=/bin/bash export FINN_ROOT=/workspace/finn @@ -48,7 +47,14 @@ gecho "oh-my-xilinx @ $OMX_COMMIT" git -C /workspace/oh-my-xilinx pull --quiet git -C /workspace/oh-my-xilinx checkout $OMX_COMMIT --quiet -# source Vivado env.vars -source $VIVADO_PATH/settings64.sh - +if [ ! -z "$VIVADO_PATH" ];then + # source Vivado env.vars + export XILINX_VIVADO=$VIVADO_PATH + source $VIVADO_PATH/settings64.sh +fi +if [ ! -z "$VITIS_PATH" ];then + # source Vitis env.vars + export XILINX_VITIS=$VITIS_PATH + source $VITIS_PATH/settings64.sh +fi exec "$@" diff --git a/docs/finn/getting_started.rst b/docs/finn/getting_started.rst index 8b20cebcfc49d14d0afbb26edd678d65425476d3..323692897800d45c6e6cf55b688a2c7b2b9a5277 100644 --- a/docs/finn/getting_started.rst +++ b/docs/finn/getting_started.rst @@ -13,7 +13,7 @@ The FINN compiler should not be thought of a single pushbutton tool that does ev Requirements ============ -* Ubuntu 18.04 +* Ubuntu 18.04 with `bash` installed * Docker * A working Vivado 2019.1 installation * A `VIVADO_PATH` environment variable pointing to the Vivado installation directory (e.g. the directory where settings64.sh is located) @@ -26,9 +26,11 @@ We use Docker extensively for developing and deploying FINN. If you are not fami Getting an interactive shell for development or experimentation *************************************************************** +.. note:: **run-docker.sh requires bash to execute correctly.** + :: - sh run_docker.sh + ./run_docker.sh Simply running sh run-docker.sh without any additional arguments will clone the dependency repos, create a Docker container and give you a terminal with you can use for development for experimentation. If you want a new terminal on an already-running container, you can do this with `docker exec -it finn_dev_<username> bash`. @@ -41,7 +43,7 @@ Running the Jupyter notebooks ***************************** :: - sh run-docker.sh notebook + ./run-docker.sh notebook This will launch the `Jupyter notebook <https://jupyter.org/>`_ server inside a Docker container, and print a link on the terminal that you can open in your browser to run the FINN notebooks or create new ones. .. note:: The link will look something like this (the token you get will be different): @@ -57,14 +59,14 @@ by: :: - sh run-docker.sh test + ./run-docker.sh test There is a quicker variant of the test suite that skips the tests marked as requiring Vivado or as slow-running tests: :: - sh run-docker.sh quicktest + ./run-docker.sh quicktest If you want to run individual tests, you can do this *inside the Docker container from the FINN root directory* as follows: diff --git a/run-docker.sh b/run-docker.sh index 00ca8f86985a78d8f2af099c51dcd4b80cd2e974..88956586c6a2ba9780d0597f8149038dad4aa6ab 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -50,6 +50,15 @@ if [ -z "$PYNQ_IP" ];then recho "Please set the PYNQ_IP env.var. to enable PYNQ deployment tests." fi +if [ -z "$VITIS_PATH" ];then + recho "Please set the VITIS_PATH that contains the path to your Vitis installation directory." + recho "FINN functionality depending on Vitis will not be available." +else + if [ -z "$PLATFORM_REPO_PATHS" ];then + recho "Please set PLATFORM_REPO_PATHS pointing to Vitis platform files (DSAs)." + fi +fi + DOCKER_GID=$(id -g) DOCKER_GNAME=$(id -gn) DOCKER_UNAME=$(id -un) @@ -93,6 +102,7 @@ mkdir -p $FINN_SSH_KEY_DIR gecho "Instance is named as $DOCKER_INST_NAME" gecho "Mounting $BUILD_LOCAL into $BUILD_LOCAL" gecho "Mounting $VIVADO_PATH into $VIVADO_PATH" +gecho "Mounting $VITIS_PATH into $VITIS_PATH" gecho "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT" gecho "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT" gecho "Vivado IP cache dir is at $VIVADO_IP_CACHE" @@ -128,24 +138,34 @@ docker build -f docker/Dockerfile.finn_dev --tag=$DOCKER_TAG \ # Launch container with current directory mounted # important to pass the --init flag here for correct Vivado operation, see: # https://stackoverflow.com/questions/55733058/vivado-synthesis-hangs-in-docker-container-spawned-by-jenkins -docker run -t --rm --name $DOCKER_INST_NAME $DOCKER_INTERACTIVE --init \ ---hostname $DOCKER_INST_NAME \ --e "XILINX_VIVADO=$VIVADO_PATH" \ --e "SHELL=/bin/bash" \ --v $SCRIPTPATH:/workspace/finn \ --v $BUILD_LOCAL:$BUILD_LOCAL \ --v $VIVADO_PATH:$VIVADO_PATH \ --v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh \ --e VIVADO_PATH=$VIVADO_PATH \ --e FINN_INST_NAME=$DOCKER_INST_NAME \ --e FINN_ROOT="/workspace/finn" \ --e VIVADO_IP_CACHE="$VIVADO_IP_CACHE" \ --e PYNQ_BOARD=$PYNQ_BOARD \ --e PYNQ_IP=$PYNQ_IP \ --e PYNQ_USERNAME=$PYNQ_USERNAME \ --e PYNQ_PASSWORD=$PYNQ_PASSWORD \ --e PYNQ_TARGET_DIR=$PYNQ_TARGET_DIR \ --e NUM_DEFAULT_WORKERS=$NUM_DEFAULT_WORKERS \ --p $JUPYTER_PORT:$JUPYTER_PORT \ --p $NETRON_PORT:$NETRON_PORT \ -$DOCKER_TAG $DOCKER_CMD +DOCKER_EXEC="docker run -t --rm --name $DOCKER_INST_NAME $DOCKER_INTERACTIVE --init " +DOCKER_EXEC+="--hostname $DOCKER_INST_NAME " +DOCKER_EXEC+="-e SHELL=/bin/bash " +DOCKER_EXEC+="-v $SCRIPTPATH:/workspace/finn " +DOCKER_EXEC+="-v $BUILD_LOCAL:$BUILD_LOCAL " +DOCKER_EXEC+="-v $FINN_SSH_KEY_DIR:/home/$DOCKER_UNAME/.ssh " +DOCKER_EXEC+="-e FINN_INST_NAME=$DOCKER_INST_NAME " +DOCKER_EXEC+="-e FINN_ROOT="/workspace/finn" " +DOCKER_EXEC+="-e VIVADO_IP_CACHE=$VIVADO_IP_CACHE " +DOCKER_EXEC+="-e PYNQ_BOARD=$PYNQ_BOARD " +DOCKER_EXEC+="-e PYNQ_IP=$PYNQ_IP " +DOCKER_EXEC+="-e PYNQ_USERNAME=$PYNQ_USERNAME " +DOCKER_EXEC+="-e PYNQ_PASSWORD=$PYNQ_PASSWORD " +DOCKER_EXEC+="-e PYNQ_TARGET_DIR=$PYNQ_TARGET_DIR " +DOCKER_EXEC+="-e NUM_DEFAULT_WORKERS=$NUM_DEFAULT_WORKERS " +DOCKER_EXEC+="-p $JUPYTER_PORT:$JUPYTER_PORT " +DOCKER_EXEC+="-p $NETRON_PORT:$NETRON_PORT " +if [ ! -z "$VIVADO_PATH" ];then + DOCKER_EXEC+="-e "XILINX_VIVADO=$VIVADO_PATH" " + DOCKER_EXEC+="-v $VIVADO_PATH:$VIVADO_PATH " + DOCKER_EXEC+="-e VIVADO_PATH=$VIVADO_PATH " +fi +if [ ! -z "$VITIS_PATH" ];then + DOCKER_EXEC+="-v $VITIS_PATH:$VITIS_PATH " + DOCKER_EXEC+="-v $PLATFORM_REPO_PATHS:/workspace/finn/vitis_platforms " + DOCKER_EXEC+="-e VITIS_PATH=$VITIS_PATH " + DOCKER_EXEC+="-e PLATFORM_REPO_PATHS=/workspace/finn/vitis_platforms " +fi +DOCKER_EXEC+="$DOCKER_TAG $DOCKER_CMD" + +$DOCKER_EXEC diff --git a/src/finn/core/rtlsim_exec.py b/src/finn/core/rtlsim_exec.py index 1e1bee3aa7435d5cab6cbf5ea23dd37dcdfa4380..bb5b3075582b8e01e8eed95f709934302fcadb42 100644 --- a/src/finn/core/rtlsim_exec.py +++ b/src/finn/core/rtlsim_exec.py @@ -114,19 +114,19 @@ def rtlsim_exec(model, execution_context): def _reset_rtlsim(sim): """Sets reset input in pyverilator to zero, toggles the clock and set it back to one""" - sim.io.ap_rst_n_0 = 0 + sim.io.ap_rst_n = 0 _toggle_clk(sim) _toggle_clk(sim) - sim.io.ap_rst_n_0 = 1 + sim.io.ap_rst_n = 1 _toggle_clk(sim) _toggle_clk(sim) def _toggle_clk(sim): """Toggles the clock input in pyverilator once.""" - sim.io.ap_clk_0 = 0 + sim.io.ap_clk = 0 sim.eval() - sim.io.ap_clk_0 = 1 + sim.io.ap_clk = 1 sim.eval() @@ -140,7 +140,7 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True): from finn.util.fpgadataflow)""" inputs = inp outputs = [] - sim.io.out_r_0_tready = 1 + sim.io.m_axis_0_tready = 1 # observe if output is completely calculated # observation_count will contain the number of cycles the calculation ran @@ -159,12 +159,12 @@ def _run_rtlsim(sim, inp, num_out_values, trace_file=None, reset=True): _reset_rtlsim(sim) while not (output_observed): - sim.io.in0_V_V_0_tvalid = 1 if len(inputs) > 0 else 0 - sim.io.in0_V_V_0_tdata = inputs[0] if len(inputs) > 0 else 0 - if sim.io.in0_V_V_0_tready == 1 and sim.io.in0_V_V_0_tvalid == 1: + sim.io.s_axis_0_tvalid = 1 if len(inputs) > 0 else 0 + sim.io.s_axis_0_tdata = inputs[0] if len(inputs) > 0 else 0 + if sim.io.s_axis_0_tready == 1 and sim.io.s_axis_0_tvalid == 1: inputs = inputs[1:] - if sim.io.out_r_0_tvalid == 1 and sim.io.out_r_0_tready == 1: - outputs = outputs + [sim.io.out_r_0_tdata] + if sim.io.m_axis_0_tvalid == 1 and sim.io.m_axis_0_tready == 1: + outputs = outputs + [sim.io.m_axis_0_tdata] _toggle_clk(sim) observation_count = observation_count + 1 diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index 71c731f96ca45519c443a5f932ead050770e17de..bc816f18c5f72338dc726e504182998f3f4430b7 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -102,6 +102,23 @@ class HLSCustomOp(CustomOp): prefixed_top_name = "%s_%s" % (node.name, node.name) return prefixed_top_name + def get_verilog_top_module_intf_names(self): + """Return a dict of names of input and output interfaces. + The keys reflect the protocols each interface implements: + 'clk', 'rst', 'm_axis', 's_axis', 'aximm', 'axilite'. + Values are lists of names: + 's_axis' names correspond to the list of node inputs in order, + 'm_axis' names correspond to the list of node outputs in order' + Each block must have at most one aximm and one axilite.""" + intf_names = {} + intf_names["clk"] = ["ap_clk"] + intf_names["rst"] = ["ap_rst_n"] + intf_names["s_axis"] = ["in0_V_V"] + intf_names["m_axis"] = ["out_V_V"] + intf_names["aximm"] = [] + intf_names["axilite"] = [] + return intf_names + def get_verilog_top_filename(self): "Return the Verilog top module filename for this node." diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py index d5f5c1194d36e86b895610c084222db5ab9eb2bf..d73f22672e7163eef0738d067f951e90fe80a89f 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py @@ -356,3 +356,8 @@ class AddStreams_Batch(HLSCustomOp): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" ) + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + intf_names["s_axis"] = ["in0_V_V", "in1_V_V"] + return intf_names diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py index 9b718ecbbc490610790b68871080de23a54f4891..05870b8d9d5d3a11bad7882c9a7d122f8cd34cf6 100644 --- a/src/finn/custom_op/fpgadataflow/iodma.py +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -344,3 +344,15 @@ class IODMA(HLSCustomOp): def strm_decl(self): pass + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + if self.get_nodeattr("direction") == "out": + intf_names["s_axis"] = ["in0_V_V"] + intf_names["m_axis"] = [] + else: + intf_names["s_axis"] = [] + intf_names["m_axis"] = ["out_V_V"] + intf_names["axilite"] = ["s_axi_control"] + intf_names["aximm"] = ["m_axi_gmem"] + return intf_names diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py index a7ebff68749120868cae9ce5ac18d2856fe2cb8a..9c3bd3ac87b94f3e0ff11a2937bf5083aae614f6 100644 --- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py @@ -87,7 +87,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): "numInputVectors": ("ints", False, [1]), # memory mode for the FC weights # const -- embedded weights, default, long compile/synth times - # decoupled -- streaming weights + # decoupled -- streaming weights with weight streamer packaged inside IP + # external -- streaming weights with external streamer "mem_mode": ("s", False, "const"), # FPGA resource type for memories in decoupled mode # auto -- let Vivado decide @@ -105,14 +106,14 @@ class StreamingFCLayer_Batch(HLSCustomOp): node = self.onnx_node # set top name depending on mem_mode mem_mode = self.get_nodeattr("mem_mode") - if mem_mode == "const": + if mem_mode == "const" or mem_mode == "external": prefixed_top_name = "%s_%s" % (node.name, node.name) elif mem_mode == "decoupled": prefixed_top_name = "%s_memstream" % (node.name) else: raise Exception( - """Please set mem_mode to "const" or "decoupled", currently no other - parameter value is supported!""" + """Please set mem_mode to "const", "decoupled", or "external", + currently no other parameter value is supported!""" ) return prefixed_top_name @@ -301,7 +302,10 @@ class StreamingFCLayer_Batch(HLSCustomOp): def get_weightstream_width(self): """Returns weight stream width. Used only in decoupled mode.""" - if self.get_nodeattr("mem_mode") == "decoupled": + if ( + self.get_nodeattr("mem_mode") == "decoupled" + or self.get_nodeattr("mem_mode") == "external" + ): pe = self.get_nodeattr("PE") simd = self.get_nodeattr("SIMD") wp = self.get_weight_datatype().bitwidth() @@ -484,7 +488,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): def generate_params(self, model, path): mem_mode = self.get_nodeattr("mem_mode") - # weights + code_gen_dir = path + # weights, if not external weights = model.get_initializer(self.onnx_node.input[1]) # convert weights into hlslib-compatible format weight_tensor = self.get_hls_compatible_weight_tensor(weights) @@ -493,7 +498,6 @@ class StreamingFCLayer_Batch(HLSCustomOp): # so use it as such for weight generation if self.get_weight_datatype() == DataType.BIPOLAR: export_wdt = DataType.BINARY - code_gen_dir = path if mem_mode == "const": """Saves weights into params.h""" @@ -523,7 +527,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): f_weights.write(weight_hls_code) f_weights.close() - elif mem_mode == "decoupled": + elif mem_mode == "decoupled" or mem_mode == "external": """Saves weights in corresponding file format for cppsim or rtlsim""" # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD) weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3)) @@ -552,37 +556,37 @@ class StreamingFCLayer_Batch(HLSCustomOp): os.path.join(code_gen_dir, "weights.npy"), weight_tensor_simd_flipped ) - """Saves weights into .dat file""" - # convert weight values into hexstring - weight_width = self.get_weightstream_width() - # pad to nearest 4 bits to get hex strings - weight_width_padded = roundup_to_integer_multiple(weight_width, 4) - weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( - weight_tensor_pe_flipped, export_wdt, weight_width_padded, prefix="" - ) - weight_stream_len = np.prod(weight_tensor_pe_flipped.shape) - factor = math.ceil(weight_stream_len / 1024) - # add zeroes to pad out file to 1024 entries - weight_stream = weight_tensor_pe_flipped.flatten() - pad_amt = (factor * 1024) - weight_stream_len - weight_stream = np.pad( - weight_stream, (0, pad_amt), mode="constant", constant_values="0" - ) - weight_stream = weight_stream.copy() - i = 0 - j = 0 - for val in weight_stream: - if i == 1024: - i = 0 - j += 1 - with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f: - f.write(val + "\n") - i += 1 - + if mem_mode == "decoupled": + """Saves weights into .dat file""" + # convert weight values into hexstring + weight_width = self.get_weightstream_width() + # pad to nearest 4 bits to get hex strings + weight_width_padded = roundup_to_integer_multiple(weight_width, 4) + weight_tensor_pe_flipped = pack_innermost_dim_as_hex_string( + weight_tensor_pe_flipped, export_wdt, weight_width_padded, prefix="" + ) + weight_stream_len = np.prod(weight_tensor_pe_flipped.shape) + factor = math.ceil(weight_stream_len / 1024) + # add zeroes to pad out file to 1024 entries + weight_stream = weight_tensor_pe_flipped.flatten() + pad_amt = (factor * 1024) - weight_stream_len + weight_stream = np.pad( + weight_stream, (0, pad_amt), mode="constant", constant_values="0" + ) + weight_stream = weight_stream.copy() + i = 0 + j = 0 + for val in weight_stream: + if i == 1024: + i = 0 + j += 1 + with open("{}/memblock_{}.dat".format(code_gen_dir, j), "a+") as f: + f.write(val + "\n") + i += 1 else: raise Exception( - """Please set mem_mode to "const"i or "decoupled", currently no other - parameter value is supported!""" + """Please set mem_mode to "const", "decoupled", or "external", + currently no other parameter value is supported!""" ) # save thresholds in thresh.h @@ -630,6 +634,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") + mem_mode = self.get_nodeattr("mem_mode") node = self.onnx_node # TODO ensure codegen dir exists @@ -698,7 +703,24 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) super().reset_rtlsim(sim) super().toggle_clk(sim) - output = self.rtlsim(sim, inp) + if mem_mode == "external": + wnbits = self.get_weightstream_width() + export_wdt = self.get_weight_datatype() + # we have converted bipolar weights to binary for export, + # so use it as such for weight generation + if self.get_weight_datatype() == DataType.BIPOLAR: + export_wdt = DataType.BINARY + wei = npy_to_rtlsim_input( + "{}/weights.npy".format(code_gen_dir), export_wdt, wnbits + ) + io_dict = { + "inputs": {"in0": inp, "weights": wei}, + "outputs": {"out": []}, + } + self.rtlsim_multi_io(sim, io_dict) + output = io_dict["outputs"]["out"] + else: + output = self.rtlsim(sim, inp) odt = self.get_output_datatype() target_bits = odt.bitwidth() packed_bits = self.get_outstream_width() @@ -729,12 +751,12 @@ class StreamingFCLayer_Batch(HLSCustomOp): if mem_mode == "const": # self.code_gen_dict["$GLOBALS$"] += ['#include "params.h"'] pass - elif mem_mode == "decoupled": + elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$GLOBALS$"] += ['#include "mvau.hpp"'] else: raise Exception( - """Please set mem_mode to "const" or "decoupled", currently no other - parameter value is supported!""" + """Please set mem_mode to "const", "decoupled", or "external", + currently no other parameter value is supported!""" ) if self.calc_tmem() != 0: # TODO find a better way of checking for no pregenerated thresholds @@ -757,7 +779,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): numReps, ) ] - if mem_mode == "decoupled": + if mem_mode == "decoupled" or mem_mode == "external": wdt = self.get_weight_datatype() self.code_gen_dict["$DEFINES$"].append( "#define WP1 {}\n".format(wdt.bitwidth()) @@ -783,7 +805,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) mem_mode = self.get_nodeattr("mem_mode") - if mem_mode == "decoupled": + if mem_mode == "decoupled" or mem_mode == "external": wdt = self.get_weight_datatype() elem_bits = wdt.bitwidth() packed_bits = self.get_weightstream_width() @@ -807,7 +829,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) ) - if mem_mode == "decoupled": + if mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<ap_uint<{}>> weights ("weights");'.format( self.get_weightstream_width() @@ -835,7 +857,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.get_nodeattr("resType"), ) ] - elif mem_mode == "decoupled": + elif mem_mode == "decoupled" or mem_mode == "external": wdt = self.get_weight_datatype() if wdt == DataType.BIPOLAR: export_wdt = DataType.BINARY @@ -856,8 +878,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): else: raise Exception( - """Please set mem_mode to "const" or "decoupled", currently no other - parameter value is supported!""" + """Please set mem_mode to "const", "decoupled", or "external", + currently no other parameter value is supported!""" ) def dataoutstrm(self): @@ -903,7 +925,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): self.get_outstream_width(), ) ] - elif mem_mode == "decoupled": + elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ """void {}( hls::stream<ap_uint<{}>> &in0, @@ -952,7 +974,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): "complete dim=1" ) ) - elif mem_mode == "decoupled": + elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE axis port=weights" ) @@ -962,8 +984,8 @@ class StreamingFCLayer_Batch(HLSCustomOp): else: raise Exception( - """Please set mem_mode to "const", currently no other - parameter value is supported!""" + """Please set mem_mode to "const", "decoupled", or external, + currently no other parameter value is supported!""" ) # the threshold tensor is acc_type [PE][TMEM][N_THRES] @@ -1092,3 +1114,10 @@ class StreamingFCLayer_Batch(HLSCustomOp): ) self.set_nodeattr("ip_vlnv", vlnv) self.code_gen_dict.clear() + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + mem_mode = self.get_nodeattr("mem_mode") + if mem_mode == "external": + intf_names["s_axis"] = ["in0_V_V", "weights_V_V"] + return intf_names diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 17ba44b959577faf573d77ae222f7b2a3be6669d..38a139c279701ae7892f41b63c3c717a3e736691 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -33,8 +33,9 @@ class TLastMarker(HLSCustomOp): """Node that adds/removes AXI stream TLAST signals where needed. Its behavior is transparent in node-by-node execution, only visible in IP-stitched rtlsim or actual hardware. - This node may be needed at the end of the network to signal a DMA write (needed by the - FINN PYNQ shell) or at the beginning to remove the end-of-burst from DMA read.""" + This node may be needed at the end of the network to signal a DMA write + (needed by the FINN PYNQ shell) or at the beginning to remove the end-of-burst + from DMA read.""" def __init__(self, onnx_node): super().__init__(onnx_node) @@ -239,3 +240,15 @@ class TLastMarker(HLSCustomOp): self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<OutDType> out ("out");' ) + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + if self.get_nodeattr("Direction") == "in": + intf_names["s_axis"] = ["in0"] + intf_names["m_axis"] = ["out_V_V"] + else: + intf_names["s_axis"] = ["in0_V_V"] + intf_names["m_axis"] = ["out_r"] + if self.get_nodeattr("DynIters") == 1: + intf_names["axilite"] = ["s_axi_control"] + return intf_names diff --git a/src/finn/transformation/fpgadataflow/create_stitched_ip.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py index 0e898f63db785f80cfce2683df0c9b6268e3ec7e..018ad385f33a8e0aea4aa42599fd47fe5dae57dd 100644 --- a/src/finn/transformation/fpgadataflow/create_stitched_ip.py +++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py @@ -33,6 +33,8 @@ import subprocess from finn.transformation import Transformation from finn.util.basic import get_by_name, make_build_dir from finn.custom_op.registry import getCustomOp +from finn.util.basic import get_num_default_workers +import multiprocessing as mp class CreateStitchedIP(Transformation): @@ -49,20 +51,137 @@ class CreateStitchedIP(Transformation): The packaged block design IP can be found under the ip subdirectory. """ - def __init__(self, fpgapart, clk_ns = 10.0): + def __init__(self, fpgapart, clk_ns=10.0, ip_name="finn_design", vitis=False): super().__init__() self.fpgapart = fpgapart self.clk_ns = clk_ns + self.ip_name = ip_name + self.vitis = vitis if float(clk_ns) not in [5.0, 10.0, 20.0]: warnings.warn( """The chosen frequency may lead to failure due to clock divider constraints.""" ) + self.has_axilite = False + self.has_aximm = False + self.has_m_axis = False + self.m_axis_idx = 0 + self.has_s_axis = False + self.s_axis_idx = 0 + self.clock_reset_are_external = False + self.create_cmds = [] + self.connect_cmds = [] + # keep track of top-level interface names + self.intf_names = { + "clk": [], + "rst": [], + "s_axis": [], + "m_axis": [], + "aximm": [], + "axilite": [], + } + + def connect_clk_rst(self, node): + inst_name = node.name + node_inst = getCustomOp(node) + clock_intf_name = node_inst.get_verilog_top_module_intf_names()["clk"][0] + reset_intf_name = node_inst.get_verilog_top_module_intf_names()["rst"][0] + # make clock and reset external, if they aren't already + if not self.clock_reset_are_external: + self.connect_cmds.append( + "make_bd_pins_external [get_bd_pins %s/%s]" + % (inst_name, clock_intf_name) + ) + self.connect_cmds.append("set_property name ap_clk [get_bd_ports ap_clk_0]") + self.connect_cmds.append( + "make_bd_pins_external [get_bd_pins %s/%s]" + % (inst_name, reset_intf_name) + ) + self.connect_cmds.append( + "set_property name ap_rst_n [get_bd_ports ap_rst_n_0]" + ) + self.clock_reset_are_external = True + self.intf_names["clk"] = ["ap_clk"] + self.intf_names["rst"] = ["ap_rst_n"] + # otherwise connect clock and reset + else: + self.connect_cmds.append( + "connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins %s/%s]" + % (inst_name, reset_intf_name) + ) + self.connect_cmds.append( + "connect_bd_net [get_bd_ports ap_clk] [get_bd_pins %s/%s]" + % (inst_name, clock_intf_name) + ) + + def connect_axi(self, node): + inst_name = node.name + node_inst = getCustomOp(node) + axilite_intf_name = node_inst.get_verilog_top_module_intf_names()["axilite"] + aximm_intf_name = node_inst.get_verilog_top_module_intf_names()["aximm"] + if len(axilite_intf_name) != 0: + self.connect_cmds.append( + "make_bd_intf_pins_external " + "[get_bd_intf_pins %s/%s]" % (inst_name, axilite_intf_name[0]) + ) + self.connect_cmds.append( + "set_property name s_axi_control " "[get_bd_intf_ports s_axi_control_0]" + ) + assert ( + self.has_axilite is False + ), "Currently limited to one slave AXI-Stream" + self.intf_names["axilite"] = ["s_axi_control"] + self.has_axilite = True + if len(aximm_intf_name) != 0: + self.connect_cmds.append( + "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" + % (inst_name, aximm_intf_name[0]) + ) + self.connect_cmds.append( + "set_property name m_axi_gmem0 [get_bd_intf_ports m_axi_gmem_0]" + ) + self.intf_names["aximm"] = ["m_axi_gmem0"] + assert self.has_aximm is False, "Currently limited to one AXI-MM interface" + self.has_aximm = True + + def connect_m_axis_external(self, node): + inst_name = node.name + node_inst = getCustomOp(node) + output_intf_names = node_inst.get_verilog_top_module_intf_names()["m_axis"] + # make output axis external + for output_intf_name in output_intf_names: + self.connect_cmds.append( + "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" + % (inst_name, output_intf_name) + ) + self.connect_cmds.append( + "set_property name m_axis_%d [get_bd_intf_ports %s_0]" + % (self.m_axis_idx, output_intf_name) + ) + self.has_m_axis = True + self.intf_names["m_axis"].append("m_axis_%d" % self.m_axis_idx) + self.m_axis_idx += 1 + + def connect_s_axis_external(self, node): + inst_name = node.name + node_inst = getCustomOp(node) + input_intf_names = node_inst.get_verilog_top_module_intf_names()["s_axis"] + # make input axis external + for input_intf_name in input_intf_names: + self.connect_cmds.append( + "make_bd_intf_pins_external [get_bd_intf_pins %s/%s]" + % (inst_name, input_intf_name) + ) + self.connect_cmds.append( + "set_property name s_axis_%d [get_bd_intf_ports %s_0]" + % (self.s_axis_idx, input_intf_name) + ) + self.has_s_axis = True + self.intf_names["s_axis"].append("s_axis_%d" % self.s_axis_idx) + self.s_axis_idx += 1 def apply(self, model): ip_dirs = ["list"] - create_cmds = [] - connect_cmds = [] # ensure that all nodes are fpgadataflow, and that IPs are generated for node in model.graph.node: assert node.domain == "finn", 'Node domain is not set to "finn"' @@ -80,59 +199,62 @@ class CreateStitchedIP(Transformation): vlnv = node_inst.get_nodeattr("ip_vlnv") inst_name = node.name create_cmd = "create_bd_cell -type ip -vlnv %s %s" % (vlnv, inst_name) - create_cmds += [create_cmd] - # TODO nonlinear topologies: check this for all inputs + self.create_cmds += [create_cmd] my_producer = model.find_producer(node.input[0]) + self.connect_clk_rst(node) + self.connect_axi(node) if my_producer is None: # first node in graph - # make clock and reset external - connect_cmds.append( - "make_bd_pins_external [get_bd_pins %s/ap_clk]" % inst_name - ) - connect_cmds.append( - "make_bd_pins_external [get_bd_pins %s/ap_rst_n]" % inst_name - ) - # make input external - connect_cmds.append( - "make_bd_intf_pins_external [get_bd_intf_pins %s/in0_V_V]" - % inst_name - ) + self.connect_s_axis_external(node) + if node.op_type == "TLastMarker": + assert ( + node_inst.get_nodeattr("Direction") == "in" + ), """Output TLastMarker incorrect direction""" + elif node.op_type == "IODMA": + assert ( + node_inst.get_nodeattr("direction") == "in" + ), """Input DMA incorrect direction""" else: # intermediate node - # wire up global clock and reset - connect_cmds.append( - "connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins %s/ap_rst_n]" - % inst_name - ) - connect_cmds.append( - "connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins %s/ap_clk]" - % inst_name - ) - # wire up input to previous output - # TODO nonlinear topologies: loop over all inputs - my_in_name = "%s/in0_V_V" % (inst_name) - prev_out_name = "%s/out_V_V" % (my_producer.name) - connect_cmds.append( - "connect_bd_intf_net [get_bd_intf_pins %s] [get_bd_intf_pins %s]" - % (prev_out_name, my_in_name) - ) - if model.find_consumer(node.output[0]) is None: + # wire up input(s) to previous node output(s) + # foreach input + # find producer + # find index of producer output connected to our target input + # get names of hdl interfaces for input and producer output + # issue a TCL directive to connect input to output + for i in range(len(node.input)): + producer = model.find_producer(node.input[i]) + if producer is None: + continue + j = list(producer.output).index(node.input[i]) + src_intf_name = getCustomOp( + producer + ).get_verilog_top_module_intf_names()["m_axis"][j] + dst_intf_name = node_inst.get_verilog_top_module_intf_names()[ + "s_axis" + ][i] + self.connect_cmds.append( + "connect_bd_intf_net [get_bd_intf_pins %s/%s] " + "[get_bd_intf_pins %s/%s]" + % (producer.name, src_intf_name, node.name, dst_intf_name) + ) + if model.find_consumers(node.output[0]) is None: # last node in graph + self.connect_m_axis_external(node) # ensure it is a TLastMarker to have a valid TLast signal assert ( - node.op_type == "TLastMarker" - ), """Last node is not TLastMarker. - Please run transformation InsertTLastMarker to ensure a valid - TLast signal""" - # make output external - connect_cmds.append( - "make_bd_intf_pins_external [get_bd_intf_pins %s/out_r]" % inst_name - ) - # make AXI lite IF external - connect_cmds.append( - "make_bd_intf_pins_external [get_bd_intf_pins %s/s_axi_control]" - % inst_name - ) + node.op_type == "TLastMarker" or node.op_type == "IODMA" + ), """Last node is not TLastMarker or DMA. + Please run transformation InsertTLastMarker/InsertIODMA to ensure + a valid TLast signal""" + if node.op_type == "TLastMarker": + assert ( + node_inst.get_nodeattr("Direction") == "out" + ), """Output TLastMarker incorrect direction""" + elif node.op_type == "IODMA": + assert ( + node_inst.get_nodeattr("direction") == "out" + ), """Output DMA incorrect direction""" # create a temporary folder for the project prjname = "finn_vivado_stitch_proj" @@ -150,22 +272,54 @@ class CreateStitchedIP(Transformation): tcl.append("set_property ip_repo_paths [%s] [current_project]" % ip_dirs_str) tcl.append("update_ip_catalog") # create block design and instantiate all layers - block_name = "finn_design" + block_name = self.ip_name tcl.append('create_bd_design "%s"' % block_name) - tcl.extend(create_cmds) - tcl.extend(connect_cmds) + tcl.extend(self.create_cmds) + tcl.extend(self.connect_cmds) fclk_mhz = 1 / (self.clk_ns * 0.001) fclk_hz = fclk_mhz * 1000000 model.set_metadata_prop("clk_ns", str(self.clk_ns)) - tcl.append("set_property CONFIG.FREQ_HZ %f [get_bd_ports /ap_clk_0]" % fclk_hz) + tcl.append("set_property CONFIG.FREQ_HZ %f [get_bd_ports /ap_clk]" % fclk_hz) tcl.append("regenerate_bd_layout") tcl.append("validate_bd_design") tcl.append("save_bd_design") + # create wrapper hdl (for rtlsim later on) + bd_base = "%s/%s.srcs/sources_1/bd/%s" % ( + vivado_stitch_proj_dir, + prjname, + block_name, + ) + bd_filename = "%s/%s.bd" % (bd_base, block_name) + tcl.append("make_wrapper -files [get_files %s] -top" % bd_filename) + wrapper_filename = "%s/hdl/%s_wrapper.v" % (bd_base, block_name) + tcl.append("add_files -norecurse %s" % wrapper_filename) + model.set_metadata_prop("wrapper_filename", wrapper_filename) + # synthesize to DCP and export stub, DCP and constraints + if self.vitis: + tcl.append( + "set_property SYNTH_CHECKPOINT_MODE Hierarchical [ get_files %s ]" + % bd_filename + ) + tcl.append( + "set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} " + "-value {-mode out_of_context} -objects [get_runs synth_1]" + ) + num_workers = get_num_default_workers() + assert num_workers >= 0, "Number of workers must be nonnegative." + if num_workers == 0: + num_workers = mp.cpu_count() + tcl.append("launch_runs synth_1 -jobs %s" % str(num_workers)) + tcl.append("wait_on_run [get_runs synth_1]") + tcl.append("open_run synth_1 -name synth_1") + tcl.append("write_verilog -force -mode synth_stub %s.v" % block_name) + tcl.append("write_checkpoint %s.dcp" % block_name) + tcl.append("write_xdc %s.xdc" % block_name) # export block design itself as an IP core block_vendor = "xilinx_finn" block_library = "finn" block_vlnv = "%s:%s:%s:1.0" % (block_vendor, block_library, block_name) model.set_metadata_prop("vivado_stitch_vlnv", block_vlnv) + model.set_metadata_prop("vivado_stitch_ifnames", str(self.intf_names)) tcl.append( ( "ipx::package_project -root_dir %s/ip -vendor %s " @@ -175,19 +329,89 @@ class CreateStitchedIP(Transformation): ) tcl.append("set_property core_revision 2 [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::create_xgui_files [ipx::find_open_core %s]" % block_vlnv) + # if targeting Vitis, add some properties to the IP + if self.vitis: + tcl.append( + "ipx::remove_bus_parameter FREQ_HZ " + "[ipx::get_bus_interfaces CLK.AP_CLK -of_objects [ipx::current_core]]" + ) + # replace source code with dcp + tcl.append( + "set_property sdx_kernel true [ipx::find_open_core %s]" % block_vlnv + ) + tcl.append( + "set_property sdx_kernel_type rtl [ipx::find_open_core %s]" % block_vlnv + ) + tcl.append( + "set_property supported_families { } [ipx::find_open_core %s]" + % block_vlnv + ) + tcl.append( + "set_property xpm_libraries {XPM_CDC XPM_MEMORY XPM_FIFO} " + "[ipx::find_open_core %s]" % block_vlnv + ) + tcl.append( + "set_property auto_family_support_level level_2 " + "[ipx::find_open_core %s]" % block_vlnv + ) + # remove all files from synthesis and sim groups + # we'll replace with DCP, stub, and xdc + tcl.append( + "ipx::remove_all_file " + "[ipx::get_file_groups xilinx_anylanguagebehavioralsimulation]" + ) + tcl.append( + "ipx::remove_all_file " + "[ipx::get_file_groups xilinx_anylanguagesynthesis]" + ) + tcl.append( + "ipx::remove_file_group " + "xilinx_anylanguagebehavioralsimulation [ipx::current_core]" + ) + tcl.append( + "ipx::remove_file_group " + "xilinx_anylanguagesynthesis [ipx::current_core]" + ) + # remove sim and src folders + tcl.append("file delete -force %s/ip/sim" % vivado_stitch_proj_dir) + tcl.append("file delete -force %s/ip/src" % vivado_stitch_proj_dir) + # copy and add DCP, stub, and xdc + tcl.append("file mkdir %s/ip/dcp" % vivado_stitch_proj_dir) + tcl.append("file mkdir %s/ip/impl" % vivado_stitch_proj_dir) + tcl.append( + "file copy -force %s.dcp %s/ip/dcp" + % (block_name, vivado_stitch_proj_dir) + ) + tcl.append( + "file copy -force %s.xdc %s/ip/impl" + % (block_name, vivado_stitch_proj_dir) + ) + tcl.append("ipx::add_file_group xilinx_implementation [ipx::current_core]") + tcl.append( + "ipx::add_file impl/%s.xdc [ipx::get_file_groups xilinx_implementation]" + % block_name + ) + tcl.append( + "set_property used_in [list implementation] " + "[ipx::get_files impl/%s.xdc " + "-of_objects [ipx::get_file_groups xilinx_implementation]]" % block_name + ) + tcl.append( + "ipx::add_file_group " "xilinx_synthesischeckpoint [ipx::current_core]" + ) + tcl.append( + "ipx::add_file dcp/%s.dcp " + "[ipx::get_file_groups xilinx_synthesischeckpoint]" % block_name + ) + tcl.append( + "ipx::add_file_group xilinx_simulationcheckpoint [ipx::current_core]" + ) + tcl.append( + "ipx::add_file dcp/%s.dcp " + "[ipx::get_file_groups xilinx_simulationcheckpoint]" % block_name + ) tcl.append("ipx::update_checksums [ipx::find_open_core %s]" % block_vlnv) tcl.append("ipx::save_core [ipx::find_open_core %s]" % block_vlnv) - # create wrapper hdl (for rtlsim later on) - bd_base = "%s/%s.srcs/sources_1/bd/%s" % ( - vivado_stitch_proj_dir, - prjname, - block_name, - ) - bd_filename = "%s/%s.bd" % (bd_base, block_name) - tcl.append("make_wrapper -files [get_files %s] -top" % bd_filename) - wrapper_filename = "%s/hdl/%s_wrapper.v" % (bd_base, block_name) - tcl.append("add_files -norecurse %s" % wrapper_filename) - model.set_metadata_prop("wrapper_filename", wrapper_filename) # export list of used Verilog files (for rtlsim later on) tcl.append("set all_v_files [get_files -filter {FILE_TYPE == Verilog}]") v_file_list = "%s/all_verilog_srcs.txt" % vivado_stitch_proj_dir diff --git a/src/finn/transformation/fpgadataflow/make_pynq_proj.py b/src/finn/transformation/fpgadataflow/make_pynq_proj.py index 91f6bd2c4ab19c736fcf21322979cac17a163f24..a874d7a7c702e1b3e9125fc031aa65dc287a407d 100644 --- a/src/finn/transformation/fpgadataflow/make_pynq_proj.py +++ b/src/finn/transformation/fpgadataflow/make_pynq_proj.py @@ -67,6 +67,16 @@ class MakePYNQProject(Transformation): raise Exception( "No vlnv for stitched IP found, apply CreateStitchedIP first." ) + vivado_stitch_ifnames = model.get_metadata_prop("vivado_stitch_ifnames") + if vivado_stitch_ifnames is None: + raise Exception("No IF name metadata found, apply CreateStitchedIP first.") + vivado_stitch_ifnames = eval(vivado_stitch_ifnames) + # recover interface names from dict + self.clk_name = vivado_stitch_ifnames["clk"][0] + self.rst_name = vivado_stitch_ifnames["rst"][0] + self.s_axis_if_name = vivado_stitch_ifnames["s_axis"][0] + self.m_axis_if_name = vivado_stitch_ifnames["m_axis"][0] + self.s_aximm_if_name = vivado_stitch_ifnames["axilite"][0] # collect list of all IP dirs ip_dirs = ["list"] @@ -105,11 +115,11 @@ class MakePYNQProject(Transformation): multiple of 8.""" in_bytes = i_bits_per_cycle_padded / 8 out_bytes = o_bits_per_cycle_padded / 8 - in_if_name = "in0_V_V_0" - out_if_name = "out_r_0" - clk_name = "ap_clk_0" - nrst_name = "ap_rst_n_0" - axi_lite_if_name = "s_axi_control_0" + in_if_name = self.s_axis_if_name + out_if_name = self.m_axis_if_name + clk_name = self.clk_name + nrst_name = self.rst_name + axi_lite_if_name = self.s_aximm_if_name vivado_ip_cache = os.getenv("VIVADO_IP_CACHE", default="") # create a temporary folder for the project diff --git a/src/finn/transformation/fpgadataflow/synth_ooc.py b/src/finn/transformation/fpgadataflow/synth_ooc.py index 1d49970c819961d1794cc89e998108639ca15593..8fd7e4724ef7f255b1435d5ab5e680d155d39487 100644 --- a/src/finn/transformation/fpgadataflow/synth_ooc.py +++ b/src/finn/transformation/fpgadataflow/synth_ooc.py @@ -37,7 +37,7 @@ from finn.util.basic import make_build_dir class SynthOutOfContext(Transformation): """Run out-of-context Vivado synthesis on a stitched IP design.""" - def __init__(self, part, clk_period_ns, clk_name="ap_clk_0"): + def __init__(self, part, clk_period_ns, clk_name="ap_clk"): super().__init__() self.part = part self.clk_period_ns = clk_period_ns diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 2cde191435894c72cadf73af82df3f315fb2998c..b47f269dd6f2671c3d98c9316954483c0e72f14f 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -545,6 +545,7 @@ class MoveScalarLinearPastInvariants(Transformation): # move prod0 from input to output, old_prod0_in = prod0.input[0] old_prod0_out = prod0.output[0] + scalar_op_odt = model.get_tensor_datatype(old_prod0_out) old_n_out = n.output[0] in_shape = model.get_tensor_shape(n.input[0]) out_shape = model.get_tensor_shape(n.output[0]) @@ -555,12 +556,16 @@ class MoveScalarLinearPastInvariants(Transformation): model.set_tensor_shape(n.input[0], in_shape) model.set_tensor_shape(n.output[0], out_shape) model.set_tensor_shape(prod0.output[0], out_shape) + model.set_tensor_datatype(prod0.output[0], scalar_op_odt) + model.set_tensor_datatype(n.output[0], DataType.FLOAT32) graph.node.remove(prod0) graph.node.insert(node_ind - 1, prod0) graph_modified = True else: continue - model = model.transform(InferShapes()) + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) return (model, graph_modified) diff --git a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py index 4fb84be59333ef0e696204c9064fcf77e35b5d9b..59ac1c09f4fe338ef03a8166c63b9d4b29bbc08e 100644 --- a/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py +++ b/tests/fpgadataflow/test_fpgadataflow_duplicatestreams.py @@ -33,6 +33,8 @@ from onnx import TensorProto, helper import finn.core.onnx_exec as oxe from finn.core.datatype import DataType from finn.core.modelwrapper import ModelWrapper +from finn.transformation.infer_shapes import InferShapes +from finn.transformation.infer_datatypes import InferDataTypes from finn.transformation.fpgadataflow.prepare_ip import PrepareIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim @@ -72,6 +74,9 @@ def make_dupstreams_modelwrapper(ch, pe, idim, idt): model.set_tensor_datatype("inp", idt) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return model diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py index fc5cdb7745945bee99564ba9ab19423a66d8e035..952d994076fc4da7e7f763d9f0fe303d8da0ff11 100644 --- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py +++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py @@ -134,7 +134,7 @@ def prepare_inputs(input_tensor, idt, wdt): # mem_mode: const or decoupled -@pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.parametrize("mem_mode", ["const", "decoupled", "external"]) # activation: None or DataType @pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT4]) # weight datatype @@ -221,7 +221,7 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh): # mem_mode: const or decoupled -@pytest.mark.parametrize("mem_mode", ["const", "decoupled"]) +@pytest.mark.parametrize("mem_mode", ["const", "decoupled", "external"]) # activation: None or DataType @pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT4]) # weight datatype diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py index a9f5bf5ffa1f816b82ef701800e92249056b7c74..7cb31557dfaa61e3a5e5c0a7c65e1fbe717bf0f1 100644 --- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py +++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py @@ -119,7 +119,7 @@ def create_one_fc_model(): return model -def create_two_fc_model(): +def create_two_fc_model(mem_mode="decoupled"): # create a model with two StreamingFCLayer instances wdt = DataType.INT2 idt = DataType.INT32 @@ -152,7 +152,7 @@ def create_two_fc_model(): ActVal=actval, binaryXnorMode=binary_xnor_mode, noActivation=no_act, - mem_mode="decoupled", + mem_mode=mem_mode, ) fc1 = helper.make_node( @@ -172,7 +172,7 @@ def create_two_fc_model(): ActVal=actval, binaryXnorMode=binary_xnor_mode, noActivation=no_act, - mem_mode="decoupled", + mem_mode=mem_mode, ) graph = helper.make_graph( @@ -247,35 +247,35 @@ def test_fpgadataflow_ipstitch_rtlsim(): model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd") sim = pyverilate_stitched_ip(model) exp_io = [ - "ap_clk_0", - "ap_rst_n_0", - "in0_V_V_0_tdata", - "in0_V_V_0_tready", - "in0_V_V_0_tvalid", - "out_r_0_tdata", - "out_r_0_tkeep", - "out_r_0_tlast", - "out_r_0_tready", - "out_r_0_tvalid", - "s_axi_control_0_araddr", - "s_axi_control_0_arready", - "s_axi_control_0_arvalid", - "s_axi_control_0_awaddr", - "s_axi_control_0_awready", - "s_axi_control_0_awvalid", - "s_axi_control_0_bready", - "s_axi_control_0_bresp", - "s_axi_control_0_bvalid", - "s_axi_control_0_rdata", - "s_axi_control_0_rready", - "s_axi_control_0_rresp", - "s_axi_control_0_rvalid", - "s_axi_control_0_wdata", - "s_axi_control_0_wready", - "s_axi_control_0_wstrb", - "s_axi_control_0_wvalid", + "ap_clk", + "ap_rst_n", + "s_axis_0_tdata", + "s_axis_0_tready", + "s_axis_0_tvalid", + "m_axis_0_tdata", + "m_axis_0_tkeep", + "m_axis_0_tlast", + "m_axis_0_tready", + "m_axis_0_tvalid", + "s_axi_control_araddr", + "s_axi_control_arready", + "s_axi_control_arvalid", + "s_axi_control_awaddr", + "s_axi_control_awready", + "s_axi_control_awvalid", + "s_axi_control_bready", + "s_axi_control_bresp", + "s_axi_control_bvalid", + "s_axi_control_rdata", + "s_axi_control_rready", + "s_axi_control_rresp", + "s_axi_control_rvalid", + "s_axi_control_wdata", + "s_axi_control_wready", + "s_axi_control_wstrb", + "s_axi_control_wvalid", ] - assert dir(sim.io) == exp_io + assert sorted(dir(sim.io)) == sorted(exp_io) model.set_metadata_prop("exec_mode", "rtlsim") idt = model.get_tensor_datatype("inp") ishape = model.get_tensor_shape("inp")