Skip to content
Snippets Groups Projects
Commit 8331c41c authored by Yaman Umuroglu's avatar Yaman Umuroglu
Browse files

Merge branch 'feature/weight_streamers_rtlsim' of...

Merge branch 'feature/weight_streamers_rtlsim' of https://github.com/Xilinx/finn into feature/cnv_w1a1_convert_to_hls_layers
parents f2dd151b fe7d83e8
No related branches found
No related tags found
No related merge requests found
...@@ -68,7 +68,8 @@ SCRIPTPATH=$(dirname "$SCRIPT") ...@@ -68,7 +68,8 @@ SCRIPTPATH=$(dirname "$SCRIPT")
BREVITAS_REPO=https://github.com/Xilinx/brevitas.git BREVITAS_REPO=https://github.com/Xilinx/brevitas.git
EXAMPLES_REPO=https://github.com/maltanar/brevitas_cnv_lfc.git EXAMPLES_REPO=https://github.com/maltanar/brevitas_cnv_lfc.git
CNPY_REPO=https://github.com/rogersce/cnpy.git CNPY_REPO=https://github.com/rogersce/cnpy.git
FINN_HLS_REPO=https://github.com/Xilinx/finn-hlslib.git #FINN_HLS_REPO=https://github.com/Xilinx/finn-hlslib.git
FINN_HLS_REPO=https://github.com/Tobi-Alonso/finn-hlslib.git
PYVERILATOR_REPO=https://github.com/maltanar/pyverilator PYVERILATOR_REPO=https://github.com/maltanar/pyverilator
PYNQSHELL_REPO=https://github.com/maltanar/PYNQ-HelloWorld.git PYNQSHELL_REPO=https://github.com/maltanar/PYNQ-HelloWorld.git
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
import math import math
import os import os
from shutil import copy
import numpy as np import numpy as np
from pyverilator import PyVerilator from pyverilator import PyVerilator
...@@ -39,7 +40,9 @@ from finn.util.data_packing import ( ...@@ -39,7 +40,9 @@ from finn.util.data_packing import (
npy_to_rtlsim_input, npy_to_rtlsim_input,
numpy_to_hls_code, numpy_to_hls_code,
rtlsim_output_to_npy, rtlsim_output_to_npy,
pack_innermost_dim_as_hex_string,
) )
from . import templates
# ONNX i/o tensor shape assumptions for StreamingFCLayer: # ONNX i/o tensor shape assumptions for StreamingFCLayer:
# input 0 is the input tensor, shape (.., i_size) = (..., MW) # input 0 is the input tensor, shape (.., i_size) = (..., MW)
...@@ -54,6 +57,7 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -54,6 +57,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
def __init__(self, onnx_node): def __init__(self, onnx_node):
super().__init__(onnx_node) super().__init__(onnx_node)
self.decoupled_wrapper = templates.decoupled_wrapper
def get_nodeattr_types(self): def get_nodeattr_types(self):
my_attrs = { my_attrs = {
...@@ -413,14 +417,14 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -413,14 +417,14 @@ class StreamingFCLayer_Batch(HLSCustomOp):
# convert weights into hlslib-compatible format # convert weights into hlslib-compatible format
weight_tensor = self.get_hls_compatible_weight_tensor(weights) weight_tensor = self.get_hls_compatible_weight_tensor(weights)
export_wdt = self.get_weight_datatype() export_wdt = self.get_weight_datatype()
# we have converted bipolar weights to binary for export,
# so use it as such for weight generation
if self.get_weight_datatype() == DataType.BIPOLAR:
export_wdt = DataType.BINARY
code_gen_dir = path code_gen_dir = path
if mem_mode == "const": if mem_mode == "const":
"""Saves weights into params.h""" """Saves weights into params.h"""
# we have converted bipolar weights to binary for export,
# so use it as such for weight generation
if self.get_weight_datatype() == DataType.BIPOLAR:
export_wdt = DataType.BINARY
weight_hls_code = numpy_to_hls_code( weight_hls_code = numpy_to_hls_code(
weight_tensor, export_wdt, "weights", True, True weight_tensor, export_wdt, "weights", True, True
) )
...@@ -448,18 +452,48 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -448,18 +452,48 @@ class StreamingFCLayer_Batch(HLSCustomOp):
f_weights.close() f_weights.close()
elif mem_mode == "decoupled": elif mem_mode == "decoupled":
"""Saves weights into .npy file""" """Saves weights in corresponding file format for npysim or rtlsim"""
# transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD) # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD)
weight_tensor = np.transpose(weight_tensor, (0, 2, 1, 3)) # and save as unflipped weight tensor to be able to differentiate between
# flip PE dimension # flipped an unflipped weight tensor (has to be flipped for npysim)
weight_tensor = np.flip(weight_tensor, axis=-2)
weight_tensor = np.flip(weight_tensor, axis=-1) weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3))
# reshape weight tensor to desired shape
# flip PE dimension and reverse SIMD flip for saving weights in .npy
weight_tensor_flipped = np.flip(weight_tensor_unflipped, axis=-2)
weight_tensor_flipped = np.flip(weight_tensor_flipped, axis=-1)
# reshape weight tensor (flipped and unflipped) to desired shape
pe = self.get_nodeattr("PE") pe = self.get_nodeattr("PE")
simd = self.get_nodeattr("SIMD") simd = self.get_nodeattr("SIMD")
weight_tensor = weight_tensor.reshape(1, -1, pe * simd) # unflipped
weight_tensor = weight_tensor.copy() weight_tensor_unflipped = weight_tensor_unflipped.reshape(1, -1, pe * simd)
np.save(os.path.join(code_gen_dir, "weights.npy"), weight_tensor) weight_tensor_unflipped = weight_tensor_unflipped.copy()
# flipped
weight_tensor_flipped = weight_tensor_flipped.reshape(1, -1, pe * simd)
weight_tensor_flipped = weight_tensor_flipped.copy()
"""Saves weights into .npy file"""
np.save(os.path.join(code_gen_dir, "weights.npy"), weight_tensor_flipped)
"""Saves weights into .dat file"""
# convert weight value sinto hexstring
weight_width = self.get_weightstream_width()
weight_tensor_unflipped = pack_innermost_dim_as_hex_string(
weight_tensor_unflipped, export_wdt, weight_width
)
weight_pad = np.zeros((1024), int).astype(str)
weight_tensor_unflipped = weight_tensor_unflipped.flatten()
# delete "0x" in the beginning of the hexstring
for i in range(len(weight_tensor_unflipped)):
weight_tensor_unflipped[i] = weight_tensor_unflipped[i][2:]
weight_pad[: weight_tensor_unflipped.shape[0]] = weight_tensor_unflipped
weight_pad = weight_pad.copy()
f = open("{}/memblock_0.dat".format(code_gen_dir), "w+")
for val in weight_pad:
f.write(val + "\n")
f.close()
else: else:
raise Exception( raise Exception(
"""Please set mem_mode to "const"i or "decoupled", currently no other """Please set mem_mode to "const"i or "decoupled", currently no other
...@@ -572,7 +606,17 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -572,7 +606,17 @@ class StreamingFCLayer_Batch(HLSCustomOp):
oshape = self.get_normal_output_shape() oshape = self.get_normal_output_shape()
context[node.output[0]] = context[node.output[0]].reshape(*oshape) context[node.output[0]] = context[node.output[0]].reshape(*oshape)
elif mode == "rtlsim": elif mode == "rtlsim":
prefixed_top_name = "%s_%s" % (node.name, node.name) # set top name depending on mem_mode
mem_mode = self.get_nodeattr("mem_mode")
if mem_mode == "const":
prefixed_top_name = "%s_%s" % (node.name, node.name)
elif mem_mode == "decoupled":
prefixed_top_name = "%s_memstream" % (node.name)
else:
raise Exception(
"""Please set mem_mode to "const" or "decoupled", currently no other
parameter value is supported!"""
)
# check if needed file exists # check if needed file exists
verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format( verilog_file = "{}/project_{}/sol1/impl/verilog/{}.v".format(
code_gen_dir, node.name, prefixed_top_name code_gen_dir, node.name, prefixed_top_name
...@@ -657,16 +701,16 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -657,16 +701,16 @@ class StreamingFCLayer_Batch(HLSCustomOp):
numReps, numReps,
) )
] ]
if var == "ipgen":
self.code_gen_dict["$DEFINES$"].append("#define PRAGMA_SUB(x) _Pragma (#x)")
self.code_gen_dict["$DEFINES$"].append("#define DO_PRAGMA(x) PRAGMA_SUB(x)")
if mem_mode == "decoupled": if mem_mode == "decoupled":
wdt = self.get_weight_datatype() wdt = self.get_weight_datatype()
self.code_gen_dict["$DEFINES$"].append( self.code_gen_dict["$DEFINES$"].append(
"#define WP1 {}\n".format(wdt.bitwidth()) "#define WP1 {}\n".format(wdt.bitwidth())
) )
if var == "ipgen":
self.code_gen_dict["$DEFINES$"].append("#define PRAGMA_SUB(x) _Pragma (#x)")
self.code_gen_dict["$DEFINES$"].append("#define DO_PRAGMA(x) PRAGMA_SUB(x)")
def read_npy_data(self): def read_npy_data(self):
code_gen_dir = self.get_nodeattr("code_gen_dir_npysim") code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
dtype = self.get_input_datatype() dtype = self.get_input_datatype()
...@@ -807,33 +851,46 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -807,33 +851,46 @@ class StreamingFCLayer_Batch(HLSCustomOp):
self.get_outstream_width(), self.get_outstream_width(),
) )
] ]
elif mem_mode == "decoupled":
self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
"""void {}(
hls::stream<ap_uint<{}>> &in0,
hls::stream<ap_uint<{}>> &weights,
hls::stream<ap_uint<{}>> &out
)""".format(
self.onnx_node.name,
self.get_instream_width(),
self.get_weightstream_width(),
self.get_outstream_width(),
)
]
else: else:
raise Exception( raise Exception(
"""Please set mem_mode to "const", currently no other """Please set mem_mode to "const" or "decoupled", currently no other
parameter value is supported!""" parameter value is supported!"""
) )
def pragmas(self): def pragmas(self):
mem_mode = self.get_nodeattr("mem_mode") mem_mode = self.get_nodeattr("mem_mode")
if mem_mode == "const": self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"]
self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0"] self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out")
in_fifo_depth = self.get_nodeattr("inFIFODepth")
out_fifo_depth = self.get_nodeattr("outFIFODepth")
# insert depth pragmas only if specified
if in_fifo_depth != 0:
self.code_gen_dict["$PRAGMAS$"].append( self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE axis port=out" "#pragma HLS stream depth=%d variable=in0" % in_fifo_depth
) )
in_fifo_depth = self.get_nodeattr("inFIFODepth") if out_fifo_depth != 0:
out_fifo_depth = self.get_nodeattr("outFIFODepth")
# insert depth pragmas only if specified
if in_fifo_depth != 0:
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS stream depth=%d variable=in0" % in_fifo_depth
)
if out_fifo_depth != 0:
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS stream depth=%d variable=out" % out_fifo_depth
)
self.code_gen_dict["$PRAGMAS$"].append( self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE ap_ctrl_none port=return" "#pragma HLS stream depth=%d variable=out" % out_fifo_depth
) )
self.code_gen_dict["$PRAGMAS$"].append(
"#pragma HLS INTERFACE ap_ctrl_none port=return"
)
if mem_mode == "const":
# the weight tensor is ap_uint<simd*prec> [PE][WMEM] # the weight tensor is ap_uint<simd*prec> [PE][WMEM]
# partition for parallel access along the PE dimension (dim 1) # partition for parallel access along the PE dimension (dim 1)
self.code_gen_dict["$PRAGMAS$"].append( self.code_gen_dict["$PRAGMAS$"].append(
...@@ -842,25 +899,111 @@ class StreamingFCLayer_Batch(HLSCustomOp): ...@@ -842,25 +899,111 @@ class StreamingFCLayer_Batch(HLSCustomOp):
"variable=weights.m_weights complete dim=1)" "variable=weights.m_weights complete dim=1)"
) )
) )
# the threshold tensor is acc_type [PE][TMEM][N_THRES] elif mem_mode == "decoupled":
# partition for parallel access along PE and N_THRES self.code_gen_dict["$PRAGMAS$"].append(
# dimensions (dims 1 and 3) "#pragma HLS INTERFACE axis port=weights"
if self.calc_tmem() != 0: )
# TODO find a better way of checking for no pregenerated thresholds self.code_gen_dict["$PRAGMAS$"].append(
self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS stream depth=8 variable=8"
( )
"DO_PRAGMA(HLS ARRAY_PARTITION variable=threshs.m_thresholds "
"complete dim=1)"
)
)
self.code_gen_dict["$PRAGMAS$"].append(
(
"DO_PRAGMA(HLS ARRAY_PARTITION variable=threshs.m_thresholds "
"complete dim=3)"
)
)
else: else:
raise Exception( raise Exception(
"""Please set mem_mode to "const", currently no other """Please set mem_mode to "const", currently no other
parameter value is supported!""" parameter value is supported!"""
) )
# the threshold tensor is acc_type [PE][TMEM][N_THRES]
# partition for parallel access along PE and N_THRES
# dimensions (dims 1 and 3)
if self.calc_tmem() != 0:
# TODO find a better way of checking for no pregenerated thresholds
self.code_gen_dict["$PRAGMAS$"].append(
(
"DO_PRAGMA(HLS ARRAY_PARTITION variable=threshs.m_thresholds "
"complete dim=1)"
)
)
self.code_gen_dict["$PRAGMAS$"].append(
(
"DO_PRAGMA(HLS ARRAY_PARTITION variable=threshs.m_thresholds "
"complete dim=3)"
)
)
def code_generation_ipgen(self, model, fpgapart, clk):
# generate code for all mem_mode of MVAU/FCLayer unit
super().code_generation_ipgen(model, fpgapart, clk)
# if mem_mode = "decoupled" generate code for verilog wrapper
mem_mode = self.get_nodeattr("mem_mode")
if mem_mode == "decoupled":
# empty code gen dictionary for new entries
self.code_gen_dict.clear()
self.code_gen_dict["$TOPNAME$"] = [
"{}_memstream".format(self.onnx_node.name)
]
self.code_gen_dict["$LAYER_NAME$"] = [
"{}_{}".format(self.onnx_node.name, self.onnx_node.name)
]
# make instream width a multiple of 8 for axi interface
in_width = self.get_instream_width()
if in_width % 8 != 0:
in_width = math.floor(in_width / 8) + 8
self.code_gen_dict["$IN_RANGE$"] = ["[{}:0]".format(in_width - 1)]
self.code_gen_dict["$OUT_RANGE$"] = [
"[{}:0]".format(self.get_outstream_width() - 1)
]
# make weight stream width a multiple of 8 for axi interface
weight_width = self.get_weightstream_width()
if weight_width % 8 != 0:
weight_width = math.floor(weight_width / 8) + 8
self.code_gen_dict["$WEIGHT_RANGE$"] = ["[{}:0]".format(weight_width - 1)]
self.code_gen_dict["$WEIGHT_WIDTH$"] = [str(weight_width)]
mw = self.get_nodeattr("MW")
mh = self.get_nodeattr("MH")
self.code_gen_dict["$WEIGHT_DEPTH$"] = [str(int(mw * mh))]
template = self.decoupled_wrapper
for key in self.code_gen_dict:
# transform list into long string separated by '\n'
code_gen_line = "\n".join(self.code_gen_dict[key])
template = template.replace(key, code_gen_line)
code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
f = open(
os.path.join(
code_gen_dir, "{}_memstream.v".format(self.onnx_node.name)
),
"w",
)
f.write(template)
f.close()
self.code_gen_dict.clear()
def ipgen_singlenode_code(self):
# generate ip block of MVAU/FCLayer unit for all mem modes
super().ipgen_singlenode_code()
mem_mode = self.get_nodeattr("mem_mode")
if mem_mode == "decoupled":
# copy necessary verilog and .dat files
# into verilog folder in code generation folder
code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
verilog_folder = "{}/project_{}/sol1/impl/verilog/".format(
code_gen_dir, self.onnx_node.name
)
# copy memstream components from finn-rtllib
memstream_dir = "/workspace/finn/finn-rtllib/memstream/hdl/"
for file in os.listdir(memstream_dir):
if file.endswith(".v"):
verilog_file = os.path.join(memstream_dir, file)
copy(verilog_file, verilog_folder)
# copy .dat file of weights
dat_file = "{}/memblock_0.dat".format(code_gen_dir)
copy(dat_file, verilog_folder)
# copy verilog wrapper
verilog_wrapper = "{}/{}_memstream.v".format(
code_gen_dir, self.onnx_node.name
)
copy(verilog_wrapper, verilog_folder)
...@@ -102,3 +102,166 @@ csynth_design ...@@ -102,3 +102,166 @@ csynth_design
export_design -format ip_catalog export_design -format ip_catalog
exit 0 exit 0
""" """
# verilog wrapper for decoupled mem mode
decoupled_wrapper = """
module $TOPNAME$(
ap_clk,
ap_rst_n,
in0_V_V_TDATA,
in0_V_V_TVALID,
in0_V_V_TREADY,
out_V_V_TDATA,
out_V_V_TVALID,
out_V_V_TREADY
);
input ap_clk;
input ap_rst_n;
input $IN_RANGE$ in0_V_V_TDATA;
input in0_V_V_TVALID;
output in0_V_V_TREADY;
output $OUT_RANGE$ out_V_V_TDATA;
output out_V_V_TVALID;
input out_V_V_TREADY;
reg [31:0] config_address = 0;
reg config_ce = 0;
reg config_we = 0;
reg [31:0] config_d0 = 0;
wire [31:0] config_q0;
//multiple wire AXI Streams
reg m_axis_0_afull = 0;
reg m_axis_0_tready;
wire m_axis_0_tvalid;
wire $WEIGHT_RANGE$ m_axis_0_tdata;
reg m_axis_1_afull = 0;
reg m_axis_1_tready = 1;
wire m_axis_1_tvalid;
wire $WEIGHT_RANGE$ m_axis_1_tdata;
reg m_axis_2_afull = 0;
reg m_axis_2_tready = 1;
wire m_axis_2_tvalid;
wire $WEIGHT_RANGE$ m_axis_2_tdata;
reg m_axis_3_afull = 0;
reg m_axis_3_tready = 1;
wire m_axis_3_tvalid;
wire $WEIGHT_RANGE$ m_axis_3_tdata;
reg m_axis_4_afull = 0;
reg m_axis_4_tready = 1;
wire m_axis_4_tvalid;
wire $WEIGHT_RANGE$ m_axis_4_tdata;
reg m_axis_5_afull = 0;
reg m_axis_5_tready = 1;
wire m_axis_5_tvalid;
wire $WEIGHT_RANGE$ m_axis_5_tdata;
//memstream component
memstream
#(
//parameters to enable/disable axi-mm, set number of streams, set readmemh for
// memory, set per-stream offsets in memory, set per-stream widths
.CONFIG_EN(1),
.NSTREAMS(1),
.MEM_DEPTH(1024),
.MEM_WIDTH($WEIGHT_WIDTH$),
.MEM_INIT("./"),
//widths per stream
.STRM0_WIDTH($WEIGHT_WIDTH$),
.STRM1_WIDTH($WEIGHT_WIDTH$),
.STRM2_WIDTH($WEIGHT_WIDTH$),
.STRM3_WIDTH($WEIGHT_WIDTH$),
.STRM4_WIDTH($WEIGHT_WIDTH$),
.STRM5_WIDTH($WEIGHT_WIDTH$),
//depths per stream
.STRM0_DEPTH($WEIGHT_DEPTH$),
.STRM1_DEPTH(1),
.STRM2_DEPTH(1),
.STRM3_DEPTH(1),
.STRM4_DEPTH(1),
.STRM5_DEPTH(1),
//offsets for each stream
.STRM0_OFFSET(0),
.STRM1_OFFSET(0),
.STRM2_OFFSET(0),
.STRM3_OFFSET(0),
.STRM4_OFFSET(0),
.STRM5_OFFSET(0)
)
mem
(
.aclk(ap_clk),
.aresetn(ap_rst_n),
//optional configuration interface compatible with ap_memory
.config_address(config_address),
.config_ce(config_ce),
.config_we(config_we),
.config_d0(config_d0),
.config_q0(config_q0),
//multiple output AXI Streams, TDATA width rounded to multiple of 8 bits
.m_axis_0_afull(m_axis_0_afull),
.m_axis_0_tready(m_axis_0_tready),
.m_axis_0_tvalid(m_axis_0_tvalid),
.m_axis_0_tdata(m_axis_0_tdata),
.m_axis_1_afull(m_axis_1_afull),
.m_axis_1_tready(m_axis_1_tready),
.m_axis_1_tvalid(m_axis_1_tvalid),
.m_axis_1_tdata(m_axis_1_tdata),
.m_axis_2_afull(m_axis_2_afull),
.m_axis_2_tready(m_axis_2_tready),
.m_axis_2_tvalid(m_axis_2_tvalid),
.m_axis_2_tdata(m_axis_2_tdata),
.m_axis_3_afull(m_axis_3_afull),
.m_axis_3_tready(m_axis_3_tready),
.m_axis_3_tvalid(m_axis_3_tvalid),
.m_axis_3_tdata(m_axis_3_tdata),
.m_axis_4_afull(m_axis_4_afull),
.m_axis_4_tready(m_axis_4_tready),
.m_axis_4_tvalid(m_axis_4_tvalid),
.m_axis_4_tdata(m_axis_4_tdata),
.m_axis_5_afull(m_axis_5_afull),
.m_axis_5_tready(m_axis_5_tready),
.m_axis_5_tvalid(m_axis_5_tvalid),
.m_axis_5_tdata(m_axis_5_tdata)
);
//MVA_Stream_Unit
$LAYER_NAME$
MVA_Stream_U
(
.ap_clk(ap_clk), //input
.ap_rst_n(ap_rst_n), //input
.in0_V_V_TDATA(in0_V_V_TDATA), //$IN_RANGE$ input
.in0_V_V_TVALID(in0_V_V_TVALID), //input
.in0_V_V_TREADY(in0_V_V_TREADY), //output
.weights_V_V_TDATA(m_axis_0_tdata), //$WEIGHT_RANGE$ input
.weights_V_V_TVALID(m_axis_0_tvalid), //input
.weights_V_V_TREADY(m_axis_0_tready), //output
.out_V_V_TDATA(out_V_V_TDATA), //$OUT_RANGE$ output
.out_V_V_TVALID(out_V_V_TVALID), //output
.out_V_V_TREADY(out_V_V_TREADY) //input
);
endmodule
"""
...@@ -210,9 +210,12 @@ def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh): ...@@ -210,9 +210,12 @@ def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
y_produced = oxe.execute_onnx(model, input_dict)["outp"] y_produced = oxe.execute_onnx(model, input_dict)["outp"]
y_produced = y_produced.reshape(y_expected.shape) y_produced = y_produced.reshape(y_expected.shape)
assert (y_produced == y_expected).all(), "npysim failed" assert (y_produced == y_expected).all(), "npysim failed"
# mem_mode: const or decoupled
@pytest.mark.parametrize("mem_mode", ["const", "decoupled"])
# activation: None or DataType # activation: None or DataType
@pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT2]) @pytest.mark.parametrize("act", [None, DataType.BIPOLAR, DataType.INT2])
# weight datatype # weight datatype
...@@ -227,7 +230,7 @@ def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh): ...@@ -227,7 +230,7 @@ def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
@pytest.mark.parametrize("mw", [4]) @pytest.mark.parametrize("mw", [4])
# HLS matrix height (output features) # HLS matrix height (output features)
@pytest.mark.parametrize("mh", [4]) @pytest.mark.parametrize("mh", [4])
def test_fpgadataflow_fclayer_rtlsim(idt, wdt, act, nf, sf, mw, mh): def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
if nf == -1: if nf == -1:
nf = mh nf = mh
if sf == -1: if sf == -1:
...@@ -264,6 +267,11 @@ def test_fpgadataflow_fclayer_rtlsim(idt, wdt, act, nf, sf, mw, mh): ...@@ -264,6 +267,11 @@ def test_fpgadataflow_fclayer_rtlsim(idt, wdt, act, nf, sf, mw, mh):
else: else:
tdt = DataType.INT32 tdt = DataType.INT32
model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt) model = make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T, tdt)
for node in model.graph.node:
# lookup op_type in registry of CustomOps
inst = getCustomOp(node)
inst.set_nodeattr("mem_mode", mem_mode)
# prepare input data # prepare input data
input_dict = prepare_inputs(x, idt, wdt) input_dict = prepare_inputs(x, idt, wdt)
if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR: if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment