diff --git a/finn-rtllib/checksum/checksum.cpp b/custom_hls/checksum.cpp similarity index 100% rename from finn-rtllib/checksum/checksum.cpp rename to custom_hls/checksum.cpp diff --git a/finn-rtllib/checksum/checksum.hpp b/custom_hls/checksum.hpp similarity index 99% rename from finn-rtllib/checksum/checksum.hpp rename to custom_hls/checksum.hpp index 35f6271d6e154508e17d68c410895056bc4409ae..bf580f31a6228ffd446221ff5c7cd5f29e439837 100644 --- a/finn-rtllib/checksum/checksum.hpp +++ b/custom_hls/checksum.hpp @@ -43,7 +43,7 @@ * The provided DefaultSubwordSlicer assumes an `ap_(u)int`-like word * type with a member `width` and a range-based slicing operator. It * further assumes a little-endian arrangement of subwords within words - * for the canonical subword stream order. + * for the canonical subword stream order. * - Subwords wider than 23 bits are folded using bitwise XOR across * slices of 23 bits starting from the LSB. * - The folded subword values are weighted according to their position diff --git a/finn-rtllib/checksum/checksum_tb.sv b/custom_hls/checksum_tb.sv similarity index 100% rename from finn-rtllib/checksum/checksum_tb.sv rename to custom_hls/checksum_tb.sv diff --git a/src/finn/custom_op/fpgadataflow/checksum.py b/src/finn/custom_op/fpgadataflow/checksum.py index 0c264fcce6a6bdaaf70ff8ed8d0daa2784dfd9f6..22f9a92bd8cd856561b21659b3f828135b0cd08f 100644 --- a/src/finn/custom_op/fpgadataflow/checksum.py +++ b/src/finn/custom_op/fpgadataflow/checksum.py @@ -124,6 +124,9 @@ class checksum(HLSCustomOp): return normal_ishape + def get_ap_int_max_w(self): + return max(super().get_ap_int_max_w(), 32) + def get_normal_output_shape(self): # same shape as input return self.get_normal_input_shape() @@ -132,11 +135,17 @@ class checksum(HLSCustomOp): folded_oshape = self.get_folded_output_shape() return np.prod(folded_oshape[:-1]) + def npy_to_dynamic_output(self, context): + super().npy_to_dynamic_output(context) + node = self.onnx_node + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + output_checksum = np.load("{}/output_checksum.npy".format(code_gen_dir)) + context[node.output[1]] = output_checksum + def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node inp = context[node.input[0]] - exp_shape = self.get_normal_input_shape() # TODO ensure codegen dir exists if mode == "cppsim": @@ -152,9 +161,9 @@ class checksum(HLSCustomOp): ) if mode == "cppsim": - output = inp - output = np.asarray([output], dtype=np.float32).reshape(*exp_shape) - context[node.output[0]] = output + self.dynamic_input_to_npy(context, 1) + self.exec_precompiled_singlenode_model() + self.npy_to_dynamic_output(context) elif mode == "rtlsim": # create a npy file for the input of the node assert ( @@ -221,10 +230,30 @@ class checksum(HLSCustomOp): self.code_gen_dict["$DEFINES$"] = my_defines def read_npy_data(self): - pass + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_input_datatype() + elem_bits = dtype.bitwidth() + packed_bits = self.get_instream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"] = [] + # note: the innermost dim is reversed for the input + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' + % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + ) def strm_decl(self): - pass + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append("ap_uint<32> chk;") def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [ @@ -232,10 +261,39 @@ class checksum(HLSCustomOp): ] def dataoutstrm(self): - pass + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + dtype = self.get_output_datatype() + if dtype == DataType["BIPOLAR"]: + # use binary for bipolar storage + dtype = DataType["BINARY"] + elem_bits = dtype.bitwidth() + packed_bits = self.get_outstream_width() + packed_hls_type = "ap_uint<%d>" % packed_bits + elem_hls_type = dtype.get_hls_datatype_str() + npy_type = "float" + npy_out = "%s/output.npy" % code_gen_dir + shape = tuple(self.get_folded_output_shape()) + shape_cpp_str = str(shape).replace("(", "{").replace(")", "}") + + # note: the innermost dim is not reversed for the output + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + shape_cpp_str, + npy_out, + ), + "std::vector<unsigned int> checksum(1);", + "checksum[0] = chk;", + 'cnpy::npy_save("%s/output_checksum.npy",&checksum[0],{1},"w");' + % code_gen_dir, + ] def save_as_npy(self): - pass + self.code_gen_dict["$SAVEASCNPY$"] = [] def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py index ed12d2f1af7fcbba019d8896bedfb67ef03847b0..030d1834ffbb59bda1f9473af60aeb1a181af4dc 100644 --- a/src/finn/custom_op/fpgadataflow/hlscustomop.py +++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py @@ -32,6 +32,7 @@ import os import subprocess from abc import abstractmethod +from finn.core.datatype import DataType from finn.custom_op.base import CustomOp from finn.util.basic import ( CppBuilder, @@ -433,10 +434,22 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio # assuming dynamic inputs start from 0 for in_ind in range(count): current_input_name = node.input[in_ind] - # make copy before saving array - input_array = context[current_input_name].copy() + input_array = context[current_input_name] + if in_ind == 0: + expected_inp_shape = self.get_folded_input_shape() + idt = self.get_input_datatype() + else: + expected_inp_shape = self.get_folded_input_shape(in_ind) + idt = self.get_input_datatype(in_ind) + reshaped_input = input_array.reshape(expected_inp_shape) + if idt == DataType["BIPOLAR"]: + # store bipolar activations as binary + reshaped_input = (reshaped_input + 1) / 2 + # make copy before saving the array + reshaped_input = reshaped_input.copy() np.save( - os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), input_array + os.path.join(code_gen_dir, "input_{}.npy".format(in_ind)), + reshaped_input, ) def npy_to_dynamic_output(self, context): @@ -445,7 +458,8 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio node = self.onnx_node code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") output = np.load("{}/output.npy".format(code_gen_dir)) - context[node.output[0]] = output + exp_shape = self.get_normal_output_shape() + context[node.output[0]] = output.reshape(exp_shape) def npy_to_dynamic_outputs(self, context, npy_list): """Reads the output from .npy files generated from cppsim and places @@ -456,7 +470,11 @@ Found no codegen dir for this node, did you run the prepare_cppsim transformatio code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") for i in range(len(npy_list)): output = np.load("{}/{}".format(code_gen_dir, npy_list[i])) - context[node.output[i]] = output + if i == 0: + exp_shape = self.get_normal_output_shape() + else: + exp_shape = self.get_normal_output_shape(i) + context[node.output[i]] = output.reshape(exp_shape) def exec_precompiled_singlenode_model(self): """Executes precompiled executable.""" diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 975da7b6d2a85bd58879a92ac5a8dc1efb4dabe6..e73fa9bb2872d4a5023afb0c4e6953b4e6866b8d 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -90,13 +90,11 @@ set config_bnnlibdir "$::env(FINN_ROOT)/deps/finn-hlslib" puts "finn-hlslib dir: $config_bnnlibdir" set config_customhlsdir "$::env(FINN_ROOT)/custom_hls" puts "custom HLS dir: $config_customhlsdir" -set config_customrtldir "$::env(FINN_ROOT)/finn-rtllib/checksum" -puts "custom RTL dir: $config_customrtldir" set config_toplevelfxn "$TOPFXN$" set config_clkperiod $CLKPERIOD$ open_project $config_proj_name -add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++14 -I$config_bnnlibdir -I$config_customhlsdir -I$config_customrtldir" +add_files $config_hwsrcdir/top_$TOPFXN$.cpp -cflags "-std=c++14 -I$config_bnnlibdir -I$config_customhlsdir" set_top $config_toplevelfxn open_solution sol1 diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 707289d393e2486780aed2c4af336dd3bafd37a6..3acfc7d8b004733131ee997f69aa4ac2aac88577 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -545,12 +545,10 @@ class Thresholding_Batch(HLSCustomOp): out = context[node.output[0]] out = 2 * out - 1 context[node.output[0]] = out + oshape = self.get_normal_output_shape() assert ( - context[node.output[0]].shape == self.get_folded_output_shape() + context[node.output[0]].shape == oshape ), """Output shape is not as expected""" - # reshape output to have expected shape - oshape = self.get_normal_output_shape() - context[node.output[0]] = context[node.output[0]].reshape(*oshape) elif mode == "rtlsim": sim = self.get_rtlsim() nbits = self.get_instream_width() @@ -691,9 +689,12 @@ class Thresholding_Batch(HLSCustomOp): ) ] elif mem_mode == "decoupled": + # note that numReps is set to 1 in the invocation below, since + # - for cppsim the repetition comes from the threshold stream reader+input + # - for synth the unit runs continuously anyway (ap_ctrl_none) self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<{}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> - (in0, out, weights, numReps);""".format( + (in0, out, weights, 1);""".format( "Thresholding_Stream_Batch", total_spatial_size, tmpl_args["TSrcI"], diff --git a/src/finn/transformation/fpgadataflow/insert_hook.py b/src/finn/transformation/fpgadataflow/insert_hook.py index 22050c008b2991671b8483404e1a6e8772de691e..c1fce40c574eb58b67e728b78d31454f0c709b78 100644 --- a/src/finn/transformation/fpgadataflow/insert_hook.py +++ b/src/finn/transformation/fpgadataflow/insert_hook.py @@ -84,6 +84,9 @@ class InsertHook(Transformation): ) # insert checksum node graph.node.insert(node_ind + 1, chk_node) + # insert newly-created tensors + graph.value_info.append(chk_otensor) + graph.value_info.append(chk_result) # set chk output tensor as new input tensor of second node if len(consumers) == 1: