From e8ca508b9d172d54f2dac3e191c3db44d59237fc Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu <maltanar@gmail.com> Date: Wed, 5 Oct 2022 15:41:36 +0200 Subject: [PATCH] [Test] introduce test_fpgadataflow_conv_dynamic --- ...dataflow_convinputgenerator_rtl_dynamic.py | 231 +++++++++++++++--- 1 file changed, 202 insertions(+), 29 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py index f2d51d9ea..2a3413cb1 100644 --- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py +++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator_rtl_dynamic.py @@ -28,23 +28,220 @@ import pytest +import copy +import numpy as np +import onnx.parser as oprs from onnx import TensorProto, helper from pyverilator.util.axi_utils import axilite_write, reset_rtlsim from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.general.im2col import compute_conv_output_dim from qonnx.custom_op.registry import getCustomOp -from qonnx.transformation.general import GiveUniqueNodeNames -from qonnx.util.basic import gen_finn_dt_tensor +from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames +from qonnx.transformation.infer_datatypes import InferDataTypes +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul +from qonnx.util.basic import gen_finn_dt_tensor, get_by_name import finn.core.onnx_exec as oxe +import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls +import finn.transformation.streamline.absorb as absorb +from finn.core.onnx_exec import execute_onnx from finn.core.rtlsim_exec import rtlsim_exec +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +def create_conv_model(idim, ifm, k, stride, ofm, idt, wdt): + np.random.seed(0) + ishp = (1, ifm, idim, idim) + int_dim = compute_conv_output_dim(idim, k, stride) + odim = compute_conv_output_dim(int_dim, k, stride) + oshp = (1, ofm, odim, odim) + wshp = (ofm, ifm, k, k) + wshp_1 = (ofm, ofm, k, k) + ishp_str = str(list(ishp)) + oshp_str = str(list(oshp)) + wshp_str = str(list(wshp)) + wshp_1_str = str(list(wshp_1)) + kshp_str = str([k, k]) + pad_str = str([0, 0, 0, 0]) + stride_str = str([stride, stride]) + dil_str = str([1, 1]) + + input = f""" + < + ir_version: 7, + opset_import: ["" : 9] + > + agraph (float{ishp_str} in0) => (float{oshp_str} out0) + < + float{wshp_str} param_c0_weight, + float{wshp_1_str} param_c1_weight + > + {{ + conv0 = Conv< + dilations={dil_str},group=1,kernel_shape={kshp_str},pads={pad_str}, + strides={stride_str} + >(in0, param_c0_weight) + out0 = Conv< + dilations={dil_str},group=1,kernel_shape={kshp_str},pads={pad_str}, + strides={stride_str} + >(conv0, param_c1_weight) + }} + """ + model = oprs.parse_model(input) + model = ModelWrapper(model) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model.set_tensor_datatype("in0", idt) + model.set_tensor_datatype("param_c0_weight", wdt) + model.set_tensor_datatype("param_c1_weight", wdt) + model.set_initializer("param_c0_weight", gen_finn_dt_tensor(wdt, wshp)) + model.set_initializer("param_c1_weight", gen_finn_dt_tensor(wdt, wshp_1)) + return model + + +def update_conv_model_dims(model, idim_new): + cnode = model.get_nodes_by_op_type("Conv")[0] + k, _ = get_by_name(cnode.attribute, "kernel_shape").ints + stride, _ = get_by_name(cnode.attribute, "strides").ints + ishp = model.get_tensor_shape("in0") + n, ci, _, _ = ishp + n, co, _, _ = model.get_tensor_shape("out0") + int_dim = compute_conv_output_dim(idim_new, k, stride) + odim = compute_conv_output_dim(int_dim, k, stride) + model.set_tensor_shape("in0", (n, ci, idim_new, idim_new)) + model.set_tensor_shape("out0", (n, co, odim, odim)) + # remove all existing shapes + del model.graph.value_info[:] + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return model + + +# Helper function to update tensor dimensions manually because shape inference +# does not work on FINN nodes (they assume well-defined tensor shapes). +def update_tensor_dim(model, tensor_name, new_hw): + shape = model.get_tensor_shape(tensor_name) + shape[1] = new_hw[0] + shape[2] = new_hw[1] + model.set_tensor_shape(tensor_name, shape) + + +# Helper function that delivers the hook to program the SWG via AXI-Lite +def config_hook(configs): + if configs is None: + return None + + def write_swg_config(sim): + for axi_name, config in configs: + # 1. Write config registers to the SWG, dict defines (addr, value) tuples + for config_entry in config.values(): + axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name) + # 2. Set cfg_valid flag (>= 1 cycle) + axilite_write(sim, 0, 1, basename=axi_name) + # 3. Reset component (>= 1 cycle) + reset_rtlsim(sim) + + return write_swg_config + + +@pytest.mark.slow +@pytest.mark.vivado +def test_fpgadataflow_conv_dynamic(): + idims = [32, 16] + ifm = 4 + k = 4 + stride = 1 + ofm = 8 + idt = DataType["UINT8"] + wdt = DataType["INT2"] + exp_cfgs = [] + largest_model = None + for idim in idims: + ishp = (1, ifm, idim, idim) + np.random.seed(0) + inp = gen_finn_dt_tensor(idt, ishp) + model = create_conv_model(idim, ifm, k, stride, ofm, idt, wdt) + _, _, int_dim, _ = model.get_tensor_shape("conv0") + _, _, odim, _ = model.get_tensor_shape("out0") + if idim == max(idims): + # use largest model for hardware conversion + largest_model = copy.deepcopy(model) + golden = execute_onnx(model, {"in0": inp})["out0"] + exp_cfg = (idim, int_dim, odim, inp, golden) + exp_cfgs.append(exp_cfg) + + # convert to hardware and prepare simulation + model = largest_model.transform(LowerConvsToMatMul()) + model = model.transform(to_hls.InferConvInpGen(use_rtl_variant=True)) + model = model.transform( + to_hls.InferQuantizedMatrixVectorActivation(mem_mode="decoupled") + ) + model = model.transform(absorb.AbsorbConsecutiveTransposes()) + parent_model = model.transform(CreateDataflowPartition()) + sdp_inst = getCustomOp( + parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + ) + model = ModelWrapper(sdp_inst.get_nodeattr("model")) + for swg_node in model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl"): + getCustomOp(swg_node).set_nodeattr("SIMD", 1) + getCustomOp(swg_node).set_nodeattr("dynamic_mode", 1) + getCustomOp(swg_node).set_nodeattr("inFIFODepth", 16) + getCustomOp(swg_node).set_nodeattr("outFIFODepth", 16) + print("SWG initial config:") + idim = getCustomOp(swg_node).get_nodeattr("IFMDim") + print(getCustomOp(swg_node).get_dynamic_config(idim)) + model = model.transform(InsertFIFO()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(GiveReadableTensorNames()) + model = model.transform(PrepareIP("xc7z020clg400-1", 5)) + model = model.transform(HLSSynthIP()) + model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5)) + model.set_metadata_prop("exec_mode", "rtlsim") + + # loop through experiment configurations + for exp_cfg in exp_cfgs: + idim, int_dim, odim, inp, golden = exp_cfg + # model.set_metadata_prop("rtlsim_trace", "trace_size0.vcd") + # get config for the new dimensions + swg_nodes = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl") + swg0 = getCustomOp(swg_nodes[0]) + update_tensor_dim(model, swg0.onnx_node.input[0], (idim, idim)) + update_tensor_dim(model, swg0.onnx_node.output[0], (int_dim, int_dim)) + config0 = swg0.get_dynamic_config((idim, idim)) + swg1 = getCustomOp(swg_nodes[1]) + update_tensor_dim(model, swg1.onnx_node.input[0], (int_dim, int_dim)) + update_tensor_dim(model, swg1.onnx_node.output[0], (odim, odim)) + config1 = swg1.get_dynamic_config((int_dim, int_dim)) + configs = [("s_axi_cfg_0_", config0), ("s_axi_cfg_1_", config1)] + # adjust folded shapes for I/O FIFOs + # (since rtlsim_exec uses folded shape info to fold global i/o tensors) + first_node = getCustomOp(model.graph.node[0]) + first_node_shp = list(first_node.get_folded_input_shape()) + first_node_shp[1] = idim + first_node_shp[2] = idim + first_node.set_nodeattr("folded_shape", first_node_shp) + update_tensor_dim(model, first_node.onnx_node.input[0], (idim, idim)) + last_node = getCustomOp(model.graph.node[-1]) + last_node_shp = list(last_node.get_folded_output_shape()) + last_node_shp[1] = odim + last_node_shp[2] = odim + update_tensor_dim(model, last_node.onnx_node.output[0], (odim, odim)) + last_node.set_nodeattr("folded_shape", last_node_shp) + model.set_metadata_prop("rtlsim_trace", "trace_size1.vcd") + ctx = {"global_in": inp.transpose(0, 2, 3, 1)} + rtlsim_exec(model, ctx, pre_hook=config_hook(configs)) + ret = ctx["global_out"].transpose(0, 3, 1, 2) + assert np.isclose(golden, ret).all() + + def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, stride, dilation, idt): k_h, k_w = k ifm_dim_h, ifm_dim_w = ifm_dim @@ -229,31 +426,6 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic( model = model.transform(CreateStitchedIP("xc7z020clg400-1", 5)) model.set_metadata_prop("exec_mode", "rtlsim") - # Helper function that delivers the hook to program the SWG via AXI-Lite - def config_hook(config): - if config is None: - return None - - def write_swg_config(sim): - axi_name = "s_axi_cfg_0_" - # 1. Write config registers to the SWG, dict defines (addr, value) tuples - for config_entry in config.values(): - axilite_write(sim, config_entry[0], config_entry[1], basename=axi_name) - # 2. Set cfg_valid flag (>= 1 cycle) - axilite_write(sim, 0, 1, basename=axi_name) - # 3. Reset component (>= 1 cycle) - reset_rtlsim(sim) - - return write_swg_config - - # Helper function to update tensor dimensions manually because shape inference - # does not work on FINN nodes (they assume well-defined tensor shapes). - def update_tensor_dim(model, tensor_name, new_hw): - shape = model.get_tensor_shape(tensor_name) - shape[1] = new_hw[0] - shape[2] = new_hw[1] - model.set_tensor_shape(tensor_name, shape) - # Simulate 1 FM for each dimension in the series for i, ifm_dim in enumerate(ifm_dim_series): ifm_dim_h, ifm_dim_w = ifm_dim @@ -261,7 +433,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic( ofm_dim_w = compute_conv_output_dim(ifm_dim_w, k_w, stride_w, 0, dilation_w) ofm_dim = [ofm_dim_h, ofm_dim_w] - config = None + configs = None if i > 0: # skip re-programming for initial FM dimension # Necessary update of node and tensor attributes to make rtlsim work: swg_node = model.get_nodes_by_op_type("ConvolutionInputGenerator_rtl")[0] @@ -271,6 +443,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic( # Generate config, also overwrites IFMDim/OFMDim attributes: config = swg_inst.get_dynamic_config(ifm_dim) + configs = [("s_axi_cfg_0_", config)] # Also update FIFO nodes and corresponding tensors fifo_node = model.get_nodes_by_op_type("StreamingFIFO")[0] @@ -292,7 +465,7 @@ def test_fpgadataflow_slidingwindow_rtl_dynamic( # Run rtlsim on stitched-ip x = gen_finn_dt_tensor(idt, (1, ifm_dim_h, ifm_dim_w, ifm_ch)) context = prepare_inputs(x) - rtlsim_exec(model, context, pre_hook=config_hook(config)) + rtlsim_exec(model, context, pre_hook=config_hook(configs)) y_produced = context["outp"] # Generate golden result -- GitLab