diff --git a/requirements.txt b/requirements.txt index a9e691fea28c0fc8f633a68905dbbe74cdd79a7a..e3f74c23f9701280812d26b9caa720d037bc26ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ bitstring==3.1.7 clize==4.1.1 dataclasses-json==0.5.7 gspread==3.6.0 +ipython==8.12.2 numpy==1.24.1 onnx==1.13.0 onnxoptimizer diff --git a/src/finn/custom_op/fpgadataflow/addstreams_batch.py b/src/finn/custom_op/fpgadataflow/addstreams_batch.py index af106d9c0698d2d49bbd8f8998f57cad0b2e781e..8fbdf9c452ab356e2623ef7d3f00f077682961e2 100644 --- a/src/finn/custom_op/fpgadataflow/addstreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/addstreams_batch.py @@ -268,37 +268,60 @@ class AddStreams_Batch(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) npy_in = "%s/input_1.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in1);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in1_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in1 ("in1");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in1_{} ("in1_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, {}, {}, {}, {}> (in0, in1, out, 1);""".format( + """{}<{}, {}, {}, {}, {}> (in0_{}, in1_{}, out_{}, 1);""".format( node.op_type, self.get_nodeattr("PE"), self.get_input_datatype().get_hls_datatype_str(), self.get_input_datatype().get_hls_datatype_str(), self.get_output_datatype().get_hls_datatype_str(), self.get_number_output_values(), + self.hls_sname(), + self.hls_sname(), + self.hls_sname(), ) ] @@ -315,12 +338,13 @@ class AddStreams_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -331,24 +355,27 @@ class AddStreams_Batch(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, hls::stream<ap_uint<{}>> &in1, - hls::stream<ap_uint<{}>> &out)""".format( + """void {}(hls::stream<ap_uint<{}>> &in0_{}, hls::stream<ap_uint<{}>> &in1_{}, + hls::stream<ap_uint<{}>> &out_{})""".format( self.onnx_node.name, self.get_nodeattr("PE") * self.get_input_datatype().bitwidth(), + self.hls_sname(), self.get_nodeattr("PE") * self.get_input_datatype().bitwidth(), + self.hls_sname(), self.get_nodeattr("PE") * self.get_output_datatype().bitwidth(), + self.hls_sname(), ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=in1 name=in1_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in1_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py index 7791647abf1c5a51c0425ae835471ec9ceb7c21c..71fc37b184a27ac707bc4dfacd860f2c5256c60c 100644 --- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py +++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py @@ -486,17 +486,28 @@ class ChannelwiseOp_Batch(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -512,10 +523,12 @@ class ChannelwiseOp_Batch(HLSCustomOp): raise Exception("""Unexpeted input shape""") self.code_gen_dict["$DOCOMPUTE$"] = [ """Thresholding_Batch<{}, NumChannels1, PE1, {}, {}> - (in0, out, threshs, numReps);""".format( + (in0_{}, out_{}, threshs, numReps);""".format( spatial_dim, tmpl_args["TSrcI"], tmpl_args["TDstI"], + self.hls_sname(), + self.hls_sname(), ) ] @@ -536,12 +549,13 @@ class ChannelwiseOp_Batch(HLSCustomOp): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", false);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), shape_cpp_str, npy_out, ) @@ -552,21 +566,23 @@ class ChannelwiseOp_Batch(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &out + """void {}(hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/checksum.py b/src/finn/custom_op/fpgadataflow/checksum.py index 99646274fa1bc5b710b23ea42a25d0fed0da529c..c9d16c0011b65e63b8d7965083e5be9f1c4415c1 100644 --- a/src/finn/custom_op/fpgadataflow/checksum.py +++ b/src/finn/custom_op/fpgadataflow/checksum.py @@ -241,17 +241,28 @@ class CheckSum(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append("ap_uint<32> chk;") # set drain = false for cppsim @@ -259,7 +270,8 @@ class CheckSum(HLSCustomOp): def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [ - """checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(in0, out, chk, drain);""" + """checksum<WORDS_PER_FRAME, ITEMS_PER_WORD>(in0_%s, out_%s, chk, drain);""" + % (self.hls_sname(), self.hls_sname()) ] def dataoutstrm(self): @@ -279,12 +291,13 @@ class CheckSum(HLSCustomOp): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", false);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), shape_cpp_str, npy_out, ), @@ -299,18 +312,18 @@ class CheckSum(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """using T = ap_uint<WORD_SIZE>;\n void {}(hls::stream<T> &in0, - hls::stream<T> &out, ap_uint<32> &chk, ap_uint<1> &drain)""".format( - self.onnx_node.name + """using T = ap_uint<WORD_SIZE>;\n void {}(hls::stream<T> &in0_{}, + hls::stream<T> &out_{}, ap_uint<32> &chk, ap_uint<1> &drain)""".format( + self.onnx_node.name, self.hls_sname(), self.hls_sname() ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS interface axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS interface axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS interface axis port=out name=out_" + self.hls_sname() + "#pragma HLS interface axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS interface s_axilite port=chk bundle=checksum" diff --git a/src/finn/custom_op/fpgadataflow/concat.py b/src/finn/custom_op/fpgadataflow/concat.py index 8b655b570d0396e253a1c98231702f816072da20..c43e88d59df4f80765680bd09025198ce3ea9adb 100644 --- a/src/finn/custom_op/fpgadataflow/concat.py +++ b/src/finn/custom_op/fpgadataflow/concat.py @@ -278,8 +278,16 @@ class StreamingConcat(HLSCustomOp): packed_hls_type = "ap_uint<%d>" % packed_bits npy_in = "%s/input_%d.npy" % (code_gen_dir, i) self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in%d);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in, i) + 'npy2apintstream<%s, %s, %d, %s>("%s", in%d_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + i, + self.hls_sname(), + ) ) def strm_decl(self): @@ -288,21 +296,28 @@ class StreamingConcat(HLSCustomOp): for i in range(n_inputs): packed_bits = self.get_instream_width(i) packed_hls_type = "ap_uint<%d>" % packed_bits - stream_name = "in%d" % i + stream_name = "in%d_%s" % (i, self.hls_sname()) self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<%s> %s ("%s");' % (packed_hls_type, stream_name, stream_name) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [] n_inputs = self.get_n_inputs() - in_stream_names = ["in%d" % x for x in range(n_inputs)] - in_stream_names = ",".join(in_stream_names) - comp_call = "StreamingConcat(%s, out, NumReps);" % (in_stream_names) + in_streams = [] + for i in range(n_inputs): + in_streams.append("in%d_%s" % (i, self.hls_sname())) + in_stream_names = ",".join(in_streams) + comp_call = "StreamingConcat(%s, out_%s, NumReps);" % ( + in_stream_names, + self.hls_sname(), + ) self.code_gen_dict["$DOCOMPUTE$"] = [comp_call] def dataoutstrm(self): @@ -318,12 +333,13 @@ class StreamingConcat(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -337,10 +353,15 @@ class StreamingConcat(HLSCustomOp): in_streams = [] for i in range(n_inputs): iwidth = self.get_instream_width(i) - in_streams.append("hls::stream<ap_uint<%d>> &in%d" % (iwidth, i)) + in_streams.append( + "hls::stream<ap_uint<%d>> &in%d_%s" % (iwidth, i, self.hls_sname()) + ) in_streams = ",".join(in_streams) total_width = self.get_input_datatype().bitwidth() * self.get_total_elems() - out_stream = "hls::stream<ap_uint<%d>> &out" % (total_width) + out_stream = "hls::stream<ap_uint<%d>> &out_%s" % ( + total_width, + self.hls_sname(), + ) blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_streams, out_stream) self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] @@ -349,12 +370,11 @@ class StreamingConcat(HLSCustomOp): pragmas = [] for i in range(n_inputs): pragmas.append( - "#pragma HLS INTERFACE axis port=in%d name=in%d_%s" - % (i, i, self.hls_sname()) + "#pragma HLS INTERFACE axis port=in%d_%s" % (i, self.hls_sname()) ) self.code_gen_dict["$PRAGMAS$"] = pragmas self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py index 6cc9208bb81ff68fe941c8d8d006c65b635eb437..c80f79a8c970ce7f55024d1ad7cb57001dc7ec22 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py @@ -401,17 +401,28 @@ class ConvolutionInputGenerator(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -436,15 +447,15 @@ class ConvolutionInputGenerator(HLSCustomOp): if self.get_nodeattr("depthwise") == 1: self.code_gen_dict["$DOCOMPUTE$"] = [ """{}_dws<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, - OFMDim1, SIMD1, Stride1> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style + OFMDim1, SIMD1, Stride1> (in0_{}, out_{}, numReps, {});""".format( + hls_call, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] else: self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1, IFMChannels1, Input_precision1, IFMDim1, - OFMDim1, SIMD1, Stride1> (in0, out, numReps, {});""".format( - hls_call, hls_ram_style + OFMDim1, SIMD1, Stride1> (in0_{}, out_{}, numReps, {});""".format( + hls_call, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] @@ -464,12 +475,13 @@ class ConvolutionInputGenerator(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -480,18 +492,18 @@ class ConvolutionInputGenerator(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0, - hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format( - self.onnx_node.name + """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0_{}, + hls::stream<ap_uint<SIMD1*Input_precision1>> &out_{})""".format( + self.onnx_node.name, self.hls_sname(), self.hls_sname() ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py index 6e792ca585718ff9690b0a2430fc09ba46e0a2ba..43e8df17b45c8f7f2c804b1257a01ef717929b12 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py @@ -601,17 +601,28 @@ class ConvolutionInputGenerator1D(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -630,40 +641,40 @@ class ConvolutionInputGenerator1D(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, IFMDim1_x, OFMDim1_x, Stride1_x, SIMD1> - (in0, out, numReps, {});""".format( - swu_variant, hls_ram_style + (in0_{}, out_{}, numReps, {});""".format( + swu_variant, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] if swu_variant == "ConvolutionInputGenerator_1D": self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, IFMDim1_x, OFMDim1_x, Stride1_x, SIMD1> - (in0, out, numReps, {});""".format( - swu_variant, hls_ram_style + (in0_{}, out_{}, numReps, {});""".format( + swu_variant, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] if swu_variant == "ConvolutionInputGenerator_1D_dws": self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, IFMDim1_x, OFMDim1_x, SIMD1> - (in0, out, numReps, {});""".format( - swu_variant, hls_ram_style + (in0_{}, out_{}, numReps, {});""".format( + swu_variant, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] if swu_variant == "ConvolutionInputGenerator_1D_dws_stride": self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, IFMDim1_x, OFMDim1_x, Stride1_x, SIMD1> - (in0, out, numReps, {});""".format( - swu_variant, hls_ram_style + (in0_{}, out_{}, numReps, {});""".format( + swu_variant, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] if swu_variant == "ConvolutionInputGenerator_1D_dws_naive": self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ConvKernelDim1_x, IFMChannels1, Input_precision1, IFMDim1_x, OFMDim1_x, Stride1_x, Dilation1_x, SIMD1> - (in0, out, numReps, {});""".format( - swu_variant, hls_ram_style + (in0_{}, out_{}, numReps, {});""".format( + swu_variant, self.hls_sname(), self.hls_sname(), hls_ram_style ) ] @@ -690,12 +701,13 @@ class ConvolutionInputGenerator1D(HLSCustomOp): multi_pixel_out = 1 self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", true, 1, %d);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", true, 1, %d);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, multi_pixel_out, @@ -708,26 +720,26 @@ class ConvolutionInputGenerator1D(HLSCustomOp): def blackboxfunction(self): if self.use_parallel_window_output(): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0, + """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0_{}, hls::stream<ap_uint<ConvKernelDim1_x*SIMD1*Input_precision1>> - &out)""".format( - self.onnx_node.name + &out_{})""".format( + self.onnx_node.name, self.hls_sname(), self.hls_sname() ) ] else: self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0, - hls::stream<ap_uint<SIMD1*Input_precision1>> &out)""".format( - self.onnx_node.name + """void {}(hls::stream<ap_uint<SIMD1*Input_precision1>> &in0_{}, + hls::stream<ap_uint<SIMD1*Input_precision1>> &out_{})""".format( + self.onnx_node.name, self.hls_sname(), self.hls_sname() ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/downsampler.py b/src/finn/custom_op/fpgadataflow/downsampler.py index 255606ee7f1998586c2b357904bd32b9a5590c96..d42a076c3004290c398da5888c2b178ec9f761f9 100644 --- a/src/finn/custom_op/fpgadataflow/downsampler.py +++ b/src/finn/custom_op/fpgadataflow/downsampler.py @@ -212,24 +212,36 @@ class DownSampler(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): dim_var = "1D" if (self.get_nodeattr("is1D") == 1) else "2D" + sname = self.hls_sname() self.code_gen_dict["$DOCOMPUTE$"] = [ f"""ConvolutionInputGenerator_{dim_var}_kernel1<IFMChannels, Input_precision, - IFMDim, SIMD,Stride> (in0, out, numReps);""" + IFMDim, SIMD,Stride> (in0_{sname}, out_{sname}, numReps);""" ] def dataoutstrm(self): @@ -248,12 +260,13 @@ class DownSampler(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -266,16 +279,22 @@ class DownSampler(HLSCustomOp): packed_bits = self.get_instream_width() packed_hls_type = "ap_uint<%d>" % packed_bits self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_hls_type, packed_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + packed_hls_type, + self.hls_sname(), + packed_hls_type, + self.hls_sname(), + ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py index 312f5e7e4a799d75aa0b9b7cd82b83c1b0e51dd9..0d5d806dc5e6df4bb71c0298c4305d45558ac586 100644 --- a/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py +++ b/src/finn/custom_op/fpgadataflow/duplicatestreams_batch.py @@ -309,18 +309,27 @@ class DuplicateStreams_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): n_outputs = self.get_num_output_streams() self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) for i in range(n_outputs): - out_name = "out%d" % i + out_name = "out%d_%s" % (i, self.hls_sname()) self.code_gen_dict["$STREAMDECLARATIONS$"].append( 'hls::stream<ap_uint<%d>> %s ("%s");' % (self.get_outstream_width(), out_name, out_name) @@ -328,8 +337,13 @@ class DuplicateStreams_Batch(HLSCustomOp): def docompute(self): n_outputs = self.get_num_output_streams() - ostreams = ["out%d" % x for x in range(n_outputs)] - dc = "DuplicateStreamsCustom(in0, %s);" % (",".join(ostreams)) + ostreams = [] + for i in range(n_outputs): + ostreams.append("out%d_%s" % (i, self.hls_sname())) + dc = "DuplicateStreamsCustom(in0_%s, %s);" % ( + self.hls_sname(), + ",".join(ostreams), + ) self.code_gen_dict["$DOCOMPUTE$"] = [dc] def dataoutstrm(self): @@ -346,7 +360,7 @@ class DuplicateStreams_Batch(HLSCustomOp): outstrm_code = [] for i in range(n_outputs): - out_name = "out%d" % i + out_name = "out%d_%s" % (i, self.hls_sname()) npy_out = "%s/output%d.npy" % (code_gen_dir, i) outstrm_code.append( 'apintstream2npy<%s, %s, %d, %s>(%s, %s, "%s");' @@ -371,10 +385,14 @@ class DuplicateStreams_Batch(HLSCustomOp): inp_streams = [] o_stream_w = self.get_outstream_width() i_stream_w = self.get_instream_width() - in_stream = "hls::stream<ap_uint<%d> > &in0" % (i_stream_w) + in_stream = "hls::stream<ap_uint<%d> > &in0_%s" % (i_stream_w, self.hls_sname()) inp_streams.append(in_stream) for i in range(n_outputs): - out_stream = "hls::stream<ap_uint<%d> > &out%d" % (o_stream_w, i) + out_stream = "hls::stream<ap_uint<%d> > &out%d_%s" % ( + o_stream_w, + i, + self.hls_sname(), + ) inp_streams.append(out_stream) self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ @@ -387,12 +405,11 @@ class DuplicateStreams_Batch(HLSCustomOp): def pragmas(self): n_outputs = self.get_num_output_streams() self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] for i in range(n_outputs): self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out%d name=out%d_%s" - % (i, i, self.hls_sname()) + "#pragma HLS INTERFACE axis port=out%d_%s" % (i, self.hls_sname()) ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/eltwise.py b/src/finn/custom_op/fpgadataflow/eltwise.py index c96f12f06bb1104152cecc6f5c6cdf5c0cc215f1..348e3147928631d5d8abc8586641900ad653e20e 100644 --- a/src/finn/custom_op/fpgadataflow/eltwise.py +++ b/src/finn/custom_op/fpgadataflow/eltwise.py @@ -354,25 +354,45 @@ class StreamingEltwise(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type_0, elem_hls_type_0, elem_bits_0, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type_0, + elem_hls_type_0, + elem_bits_0, + npy_type, + npy_in, + self.hls_sname(), + ) ) npy_in = "%s/input_1.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in1);' - % (packed_hls_type_1, elem_hls_type_1, elem_bits_1, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in1_%s);' + % ( + packed_hls_type_1, + elem_hls_type_1, + elem_bits_1, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width(0)) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(0), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in1 ("in1");'.format(self.get_instream_width(1)) + 'hls::stream<ap_uint<{}>> in1_{} ("in1_{}");'.format( + self.get_instream_width(1), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -394,7 +414,7 @@ class StreamingEltwise(HLSCustomOp): out_hls_type, ) self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, {}, {}, {}, {}, {}>(in0, in1, out, {});""".format( + """{}<{}, {}, {}, {}, {}, {}>(in0_{}, in1_{}, out_{}, {});""".format( "StreamingEltwise", self.get_nodeattr("NumChannels"), self.get_nodeattr("PE"), @@ -402,6 +422,9 @@ class StreamingEltwise(HLSCustomOp): slice_in0, slice_in1, slice_out, + self.hls_sname(), + self.hls_sname(), + self.hls_sname(), eltwise_op_str, ) ] @@ -419,12 +442,13 @@ class StreamingEltwise(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -435,24 +459,27 @@ class StreamingEltwise(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, hls::stream<ap_uint<{}>> &in1, - hls::stream<ap_uint<{}>> &out)""".format( + """void {}(hls::stream<ap_uint<{}>> &in0_{}, hls::stream<ap_uint<{}>> &in1_{}, + hls::stream<ap_uint<{}>> &out_{})""".format( self.onnx_node.name, self.get_nodeattr("PE") * self.get_input_datatype(0).bitwidth(), + self.hls_sname(), self.get_nodeattr("PE") * self.get_input_datatype(1).bitwidth(), + self.hls_sname(), self.get_nodeattr("PE") * self.get_output_datatype().bitwidth(), + self.hls_sname(), ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=in1 name=in1_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in1_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py index bdb5775c3eea84b09297025501f0116438b09ae7..ea9028d925c82efa24238d9e64a3b4556fb90e02 100644 --- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py +++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py @@ -228,17 +228,28 @@ class FMPadding_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -256,8 +267,8 @@ class FMPadding_Batch(HLSCustomOp): hls_call = node.op_type self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ImgDim1, OutputDim1, PaddingBefore1, PaddingBehind1, NumChannels1, SIMD1, - {}> (in0, out, numReps);""".format( - hls_call, in_t + {}> (in0_{}, out_{}, numReps);""".format( + hls_call, in_t, self.hls_sname(), self.hls_sname() ) ] else: @@ -265,8 +276,8 @@ class FMPadding_Batch(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<OutputDim1_x, OutputDim1_y, PaddingLeft1, PaddingRight1, PaddingTop1, PaddingBottom1, NumChannels1, - SIMD1, {}> (in0, out, numReps);""".format( - hls_call, in_t + SIMD1, {}> (in0_{}, out_{}, numReps);""".format( + hls_call, in_t, self.hls_sname(), self.hls_sname() ) ] @@ -286,12 +297,13 @@ class FMPadding_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -304,16 +316,22 @@ class FMPadding_Batch(HLSCustomOp): packed_bits = self.get_instream_width() packed_hls_type = "ap_uint<%d>" % packed_bits self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_hls_type, packed_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + packed_hls_type, + self.hls_sname(), + packed_hls_type, + self.hls_sname(), + ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py index 220856922c1ed805ccfa60213dc0cf32f45573a1..e5185070342e81af17d7fd49b0bbc5a4df495738 100644 --- a/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/globalaccpool_batch.py @@ -267,27 +267,40 @@ class GlobalAccPool_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [ - """AccPool_Batch<{}, {}, {}, {}, {}> (in0, out, 1);""".format( + """AccPool_Batch<{}, {}, {}, {}, {}> (in0_{}, out_{}, 1);""".format( self.get_normal_input_shape()[1], self.get_nodeattr("NumChannels"), self.get_input_datatype().get_hls_datatype_str(), self.get_nodeattr("PE"), self.get_output_datatype().get_hls_datatype_str(), + self.hls_sname(), + self.hls_sname(), ) ] @@ -304,12 +317,13 @@ class GlobalAccPool_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -320,20 +334,22 @@ class GlobalAccPool_Batch(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &out)""".format( + """void {}(hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &out_{})""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py index 8a756b630ddbd25d5740f0e46297a4ae6f686d2b..4b4ad28defc5e9832316143702f34d82735abf38 100644 --- a/src/finn/custom_op/fpgadataflow/iodma.py +++ b/src/finn/custom_op/fpgadataflow/iodma.py @@ -47,7 +47,7 @@ from finn.custom_op.fpgadataflow.hlscustomop import HLSCustomOp # Interfaces # - AXI-MM name specified by intfName unless this is set to "" (empty, the default) -# in which case output AXI-MM are named "out" and input AXI-MM are named "in0" +# in which case output AXI-MM are named "out_V" and input AXI-MM are named "in0_V" # - AXI-MM interface width (in bits) is specified by intfWidth # - AXI-Stream interface width (in bits) is specified by streamWidth # - If inftWidth and streamWidth are not equal, the DMA core performs @@ -254,15 +254,24 @@ class IODMA(HLSCustomOp): # DWCs depend on AXI MM and out interface width if strmw == intfw: # case 0: AXI MM width = out width, no DWCs needed - self.code_gen_dict["$DOCOMPUTE$"] = [dma_inst_template % ("in0", "out")] + self.code_gen_dict["$DOCOMPUTE$"] = [ + dma_inst_template + % ("in0_" + self.hls_sname(), "out_" + self.hls_sname()) + ] elif (strmw % intfw == 0) or (intfw % strmw == 0): # case 1: AXI MM width divisible by out width or vice versa # single DWC + single extra stream needed self.code_gen_dict["$DOCOMPUTE$"] = [ "hls::stream<ap_uint<%d> > dma2dwc;" % intfw, - dma_inst_template % ("in0", "dma2dwc"), + dma_inst_template % ("in0_" + self.hls_sname(), "dma2dwc"), dwc_inst_template - % (intfw, strmw, total_bits // intfw, "dma2dwc", "out"), + % ( + intfw, + strmw, + total_bits // intfw, + "dma2dwc", + "out_" + self.hls_sname(), + ), ] else: # case 2: AXI MM width not divisible by out width or vice versa @@ -271,26 +280,41 @@ class IODMA(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] = [ "hls::stream<ap_uint<%d> > dma2lcm;" % intfw, "hls::stream<ap_uint<%d> > lcm2out;" % width_lcm, - dma_inst_template % ("in0", "dma2lcm"), + dma_inst_template % ("in0_" + self.hls_sname(), "dma2lcm"), dwc_inst_template % (intfw, width_lcm, total_bits // intfw, "dma2lcm", "lcm2out"), dwc_inst_template - % (width_lcm, strmw, total_bits // width_lcm, "lcm2out", "out"), + % ( + width_lcm, + strmw, + total_bits // width_lcm, + "lcm2out", + "out_" + self.hls_sname(), + ), ] elif direction == "out": # in0 -> (DWCs) -> IODMA -> AXI MM # DWCs depend on AXI MM and out interface width if strmw == intfw: # case 0: in width = AXI MM width, no DWCs needed - self.code_gen_dict["$DOCOMPUTE$"] = [dma_inst_template % ("in0", "out")] + self.code_gen_dict["$DOCOMPUTE$"] = [ + dma_inst_template + % ("in0_" + self.hls_sname(), "out_" + self.hls_sname()) + ] elif (strmw % intfw == 0) or (intfw % strmw == 0): # case 1: AXI MM width divisible by in width or vice versa # single DWC + single extra stream needed self.code_gen_dict["$DOCOMPUTE$"] = [ "hls::stream<ap_uint<%d> > dwc2dma;" % intfw, dwc_inst_template - % (strmw, intfw, total_bits // strmw, "in0", "dwc2dma"), - dma_inst_template % ("dwc2dma", "out"), + % ( + strmw, + intfw, + total_bits // strmw, + "in0_" + self.hls_sname(), + "dwc2dma", + ), + dma_inst_template % ("dwc2dma", "out_" + self.hls_sname()), ] else: # case 2: AXI MM width not divisible by out width or vice versa @@ -300,10 +324,16 @@ class IODMA(HLSCustomOp): "hls::stream<ap_uint<%d> > in2lcm;" % width_lcm, "hls::stream<ap_uint<%d> > lcm2dma;" % intfw, dwc_inst_template - % (strmw, width_lcm, total_bits // strmw, "in0", "in2lcm"), + % ( + strmw, + width_lcm, + total_bits // strmw, + "in0_" + self.hls_sname(), + "in2lcm", + ), dwc_inst_template % (width_lcm, intfw, total_bits // width_lcm, "in2lcm", "lcm2dma"), - dma_inst_template % ("lcm2dma", "out"), + dma_inst_template % ("lcm2dma", "out_" + self.hls_sname()), ] else: raise Exception("Unknown IODMA direction: %s" % direction) @@ -316,13 +346,25 @@ class IODMA(HLSCustomOp): direction = self.get_nodeattr("direction") if direction == "in": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(%s *in0, hls::stream<%s > &out, unsigned int numReps)" - % (self.onnx_node.name, packed_hls_type_in, packed_hls_type_out) + "void %s(%s *in0_%s, hls::stream<%s > &out_%s, unsigned int numReps)" + % ( + self.onnx_node.name, + packed_hls_type_in, + self.hls_sname(), + packed_hls_type_out, + self.hls_sname(), + ) ] elif direction == "out": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, %s *out, unsigned int numReps)" - % (self.onnx_node.name, packed_hls_type_in, packed_hls_type_out) + "void %s(hls::stream<%s > &in0_%s, %s *out_%s, unsigned int numReps)" + % ( + self.onnx_node.name, + packed_hls_type_in, + self.hls_sname(), + packed_hls_type_out, + self.hls_sname(), + ) ] else: raise ValueError("Invalid IODMA direction, please set to in or out") @@ -339,32 +381,36 @@ class IODMA(HLSCustomOp): if direction == "in": if intfname == "": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE m_axi offset=slave port=in0" + "#pragma HLS INTERFACE m_axi offset=slave port=in0_" + + self.hls_sname() ) else: self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE m_axi offset=slave port=%s" % (intfname) ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE s_axilite port=in0 bundle=control" + "#pragma HLS INTERFACE s_axilite port=in0_%s bundle=control" + % (self.hls_sname()) ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) elif direction == "out": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ) if intfname == "": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE m_axi offset=slave port=out" + "#pragma HLS INTERFACE m_axi offset=slave port=out_" + + self.hls_sname() ) else: self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE m_axi offset=slave port=%s" % (intfname) ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE s_axilite port=out bundle=control" + "#pragma HLS INTERFACE s_axilite port=out_%s bundle=control" + % (self.hls_sname()) ) else: raise ValueError("Invalid IODMA direction, please set to in or out") diff --git a/src/finn/custom_op/fpgadataflow/labelselect_batch.py b/src/finn/custom_op/fpgadataflow/labelselect_batch.py index 492cd0107321f3abbfe02d5e456ee3732da982d0..12a88dacd4dfb366780c5ef5e42b355a0ca51b59 100644 --- a/src/finn/custom_op/fpgadataflow/labelselect_batch.py +++ b/src/finn/custom_op/fpgadataflow/labelselect_batch.py @@ -275,29 +275,42 @@ class LabelSelect_Batch(HLSCustomOp): # Also notice that StreamingDataWidthConverter_Batch performs LE packing self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0,false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): node = self.onnx_node self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, {}, {}, {}, {} > (in0, out, 1);""".format( + """{}<{}, {}, {}, {}, {} > (in0_{}, out_{}, 1);""".format( node.op_type, self.get_nodeattr("Labels"), self.get_nodeattr("PE"), self.get_nodeattr("K"), self.get_input_datatype().get_hls_datatype_str(), self.get_output_datatype().get_hls_datatype_str(), + self.hls_sname(), + self.hls_sname(), ) ] @@ -314,12 +327,13 @@ class LabelSelect_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -330,21 +344,23 @@ class LabelSelect_Batch(HLSCustomOp): def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}*{}>> &in0, - hls::stream<ap_uint<{}> > &out)""".format( + """void {}(hls::stream<ap_uint<{}*{}>> &in0_{}, + hls::stream<ap_uint<{}> > &out_{})""".format( self.onnx_node.name, self.get_nodeattr("PE"), self.get_input_datatype().bitwidth(), + self.hls_sname(), self.get_output_datatype().bitwidth(), + self.hls_sname(), ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/lookup.py b/src/finn/custom_op/fpgadataflow/lookup.py index ed560ac962477965bae39d296287c09eb077eca0..ecf630ef7f10b32b4ccea319aaddda603ad43bec 100644 --- a/src/finn/custom_op/fpgadataflow/lookup.py +++ b/src/finn/custom_op/fpgadataflow/lookup.py @@ -206,8 +206,15 @@ class Lookup(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def dataoutstrm(self): @@ -226,12 +233,13 @@ class Lookup(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", %s);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", %s);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, "false", @@ -244,10 +252,14 @@ class Lookup(HLSCustomOp): def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -255,12 +267,14 @@ class Lookup(HLSCustomOp): if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ """StreamingLookup<NumEmbeddings, EmbeddingDim, NumInputs, - InputType, EmbeddingType >(in0, out, embeddings);""" + InputType, EmbeddingType >(in0_%s, out_%s, embeddings);""" + % (self.hls_sname(), self.hls_sname()) ] elif mem_mode == "external": self.code_gen_dict["$DOCOMPUTE$"] = [ - """StreamingLookup_ext<EmbeddingSize>(in0, out, mem, size, oob_count, + """StreamingLookup_ext<EmbeddingSize>(in0_%s, out_%s, mem, size, oob_count, oob_irq);""" + % (self.hls_sname(), self.hls_sname()) ] def blackboxfunction(self): @@ -271,26 +285,29 @@ class Lookup(HLSCustomOp): packed_output_hls_type = "ap_uint<%d>" % obits if mem_mode == "const": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_input_hls_type, packed_output_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + packed_input_hls_type, + self.hls_sname(), + packed_output_hls_type, + self.hls_sname(), + ) ] elif mem_mode == "external": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ "void " + self.onnx_node.name - + "(hls::stream<T_SRC> &in0, hls::stream<T_DST> &out, " + + "(hls::stream<T_SRC> &in0_%s, hls::stream<T_DST> &out_%s, " + % (self.hls_sname(), self.hls_sname()) + "T_DST const *const mem, unsigned const size, " + "unsigned &oob_count, bool &oob_irq)" ] def pragmas(self): mem_mode = self.get_nodeattr("mem_mode") - my_pragmas = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() - ] - my_pragmas.append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() - ) + my_pragmas = ["#pragma HLS INTERFACE axis port=in0_" + self.hls_sname()] + my_pragmas.append("#pragma HLS INTERFACE axis port=out_" + self.hls_sname()) my_pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return") if mem_mode == "const": my_pragmas.append( diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 899bce98d2dd9572d1adf2f20910a0463f9d5994..fae2d86d887ce3ae0523f3a99681315af12dd645 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -1097,8 +1097,15 @@ class MatrixVectorActivation(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) mem_mode = self.get_nodeattr("mem_mode") @@ -1112,24 +1119,35 @@ class MatrixVectorActivation(HLSCustomOp): npy_in = "%s/weights.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", weights, false, numReps);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", weights_%s, false, numReps);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): mem_mode = self.get_nodeattr("mem_mode") self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) if mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> weights ("weights");'.format( - self.get_weightstream_width() + 'hls::stream<ap_uint<{}>> weights_{} ("weights_{}");'.format( + self.get_weightstream_width(), self.hls_sname(), self.hls_sname() ) ) @@ -1149,10 +1167,12 @@ class MatrixVectorActivation(HLSCustomOp): if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ """Matrix_Vector_Activate_Batch<MW1, MH1, SIMD1, PE1, 1, {}, {}, {}> - (in0, out, weights, {}, numReps, {});""".format( + (in0_{}, out_{}, weights, {}, numReps, {});""".format( tmpl_args["TSrcI"], tmpl_args["TDstI"], tmpl_args["TWeightI"], + self.hls_sname(), + self.hls_sname(), threshs, map_to_hls_mult_style[self.get_nodeattr("resType")], ) @@ -1166,11 +1186,14 @@ class MatrixVectorActivation(HLSCustomOp): wdtype_hls_str = export_wdt.get_hls_datatype_str() self.code_gen_dict["$DOCOMPUTE$"] = [ """Matrix_Vector_Activate_Stream_Batch<MW1, MH1, SIMD1, PE1, {}, {}, {}, {} > - (in0, out, weights, {}, numReps, {});""".format( + (in0_{}, out_{}, weights_{}, {}, numReps, {});""".format( tmpl_args["TSrcI"], tmpl_args["TDstI"], tmpl_args["TWeightI"], wdtype_hls_str, + self.hls_sname(), + self.hls_sname(), + self.hls_sname(), threshs, map_to_hls_mult_style[self.get_nodeattr("resType")], ) @@ -1199,12 +1222,13 @@ class MatrixVectorActivation(HLSCustomOp): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", false);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), shape_cpp_str, npy_out, ) @@ -1217,25 +1241,30 @@ class MatrixVectorActivation(HLSCustomOp): mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "const": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &out + """void {}(hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ """void {}( - hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &weights, - hls::stream<ap_uint<{}>> &out + hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &weights_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_weightstream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] @@ -1249,10 +1278,10 @@ class MatrixVectorActivation(HLSCustomOp): mem_mode = self.get_nodeattr("mem_mode") ram_style_thresholds = self.get_nodeattr("ram_style_thresholds") self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" @@ -1270,11 +1299,10 @@ class MatrixVectorActivation(HLSCustomOp): ) elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=weights name=weights_" - + self.hls_sname() + "#pragma HLS INTERFACE axis port=weights_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS stream depth=8 variable=weights" + "#pragma HLS stream depth=8 variable=weights_" + self.hls_sname() ) else: diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py index 813f13e504eae181f4398eccbe40ad66b6e3bf16..8ccfce78209d82a77aa0d018e43ed4c4e80d7ebc 100644 --- a/src/finn/custom_op/fpgadataflow/pool_batch.py +++ b/src/finn/custom_op/fpgadataflow/pool_batch.py @@ -239,17 +239,28 @@ class Pool_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0,false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -281,8 +292,8 @@ class Pool_Batch(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] += [ """Pool_batch<Channels, PE, KernelSize,Slice<{} >, Slice< {} > > - (in0,out, pool_fxn, OFMDimTotal*numReps);""".format( - i_hls_dt, o_hls_dt + (in0_{}, out_{}, pool_fxn, OFMDimTotal*numReps);""".format( + i_hls_dt, o_hls_dt, self.hls_sname(), self.hls_sname() ) ] @@ -302,12 +313,13 @@ class Pool_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s",false);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", false);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -323,16 +335,22 @@ class Pool_Batch(HLSCustomOp): packed_obits = self.get_outstream_width() packed_out_hls_type = "ap_uint<%d>" % packed_obits self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_in_hls_type, packed_out_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + packed_in_hls_type, + self.hls_sname(), + packed_out_hls_type, + self.hls_sname(), + ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py index a80d2bbefac96e8ec2a48e04179d3d285e78cef7..dc905658b199e3d0d13db3cea2d24ab7f5aed92c 100644 --- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py @@ -236,14 +236,23 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) if self.needs_lcm(): self.code_gen_dict["$STREAMDECLARATIONS$"].append( @@ -252,7 +261,9 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -263,13 +274,15 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): 'hls::stream<ap_uint<{}>> intermediate ("intermediate");'.format( self.get_iowidth_lcm() ), - "%s<InWidth, LCMWidth, NumInWords>(in0, intermediate, numReps);" % (op), - "%s<LCMWidth, OutWidth, NumLCMToOut>(intermediate, out, numReps);" - % (op), + "%s<InWidth, LCMWidth, NumInWords>(in0_%s, intermediate, numReps);" + % (op, self.hls_sname()), + "%s<LCMWidth, OutWidth, NumLCMToOut>(intermediate, out_%s, numReps);" + % (op, self.hls_sname()), ] else: self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<InWidth, OutWidth, NumInWords>(in0, out, numReps);" % (op) + "%s<InWidth, OutWidth, NumInWords>(in0_%s, out_%s, numReps);" + % (op, self.hls_sname(), self.hls_sname()) ] def dataoutstrm(self): @@ -288,12 +301,13 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -308,16 +322,22 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp): out_packed_bits = self.get_outstream_width() out_packed_hls_type = "ap_uint<%d>" % out_packed_bits self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, in_packed_hls_type, out_packed_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + in_packed_hls_type, + self.hls_sname(), + out_packed_hls_type, + self.hls_sname(), + ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py index a0e60931edd8590aaebc0560c4bd28d61d62e8ea..78f4095cbeaad0987fe44f2fcdfbd7f5652eb173 100755 --- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py +++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py @@ -254,17 +254,28 @@ class StreamingMaxPool_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -275,7 +286,8 @@ class StreamingMaxPool_Batch(HLSCustomOp): else: op = "StreamingMaxPool" self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels>(in0, out);" % (op) + "%s<ImgDim, PoolDim, NumChannels>(in0_%s, out_%s);" + % (op, self.hls_sname(), self.hls_sname()) ] else: dtype = self.get_input_datatype() @@ -285,14 +297,14 @@ class StreamingMaxPool_Batch(HLSCustomOp): op = "StreamingMaxPool_Precision_1d" self.code_gen_dict["$DOCOMPUTE$"] = [ """%s<ImgDim, PoolDim, NumChannels, PE, - OutputSize, %s, %s>(in0, out);""" - % (op, dtype_hls, minval_str) + OutputSize, %s, %s>(in0_%s, out_%s);""" + % (op, dtype_hls, minval_str, self.hls_sname(), self.hls_sname()) ] else: op = "StreamingMaxPool_Precision" self.code_gen_dict["$DOCOMPUTE$"] = [ - "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0, out);" - % (op, dtype_hls, minval_str) + "%s<ImgDim, PoolDim, NumChannels, %s, %s>(in0_%s, out_%s);" + % (op, dtype_hls, minval_str, self.hls_sname(), self.hls_sname()) ] def dataoutstrm(self): @@ -311,12 +323,13 @@ class StreamingMaxPool_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -329,16 +342,22 @@ class StreamingMaxPool_Batch(HLSCustomOp): packed_bits = self.get_instream_width() packed_hls_type = "ap_uint<%d>" % packed_bits self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_hls_type, packed_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + packed_hls_type, + self.hls_sname(), + packed_hls_type, + self.hls_sname(), + ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 12e635b3d612235f6b464e6f54d0f24011c7c907..fc5aa61d6669fc566b2454b1ea18d3a0a42f7d9c 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -613,8 +613,15 @@ class Thresholding_Batch(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "decoupled": @@ -627,23 +634,34 @@ class Thresholding_Batch(HLSCustomOp): npy_in = "%s/thresholds.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", weights, false, ImgDim1);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", weights_%s, false, ImgDim1);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "decoupled": self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> weights ("weights");'.format( - self.get_weightstream_width() + 'hls::stream<ap_uint<{}>> weights_{} ("weights_{}");'.format( + self.get_weightstream_width(), self.hls_sname(), self.hls_sname() ) ) @@ -654,10 +672,12 @@ class Thresholding_Batch(HLSCustomOp): if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ImgDim1, NumChannels1, PE1, {}, {}> - (in0, out, threshs, numReps);""".format( + (in0_{}, out_{}, threshs, numReps);""".format( node.op_type, tmpl_args["TSrcI"], tmpl_args["TDstI"], + self.hls_sname(), + self.hls_sname(), ) ] elif mem_mode == "decoupled": @@ -666,10 +686,13 @@ class Thresholding_Batch(HLSCustomOp): # - for synth the unit runs continuously anyway (ap_ctrl_none) self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<ImgDim1, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> - (in0, out, weights, numReps);""".format( + (in0_{}, out_{}, weights_{}, numReps);""".format( "Thresholding_Stream_Batch", tmpl_args["TSrcI"], tmpl_args["TDstI"], + self.hls_sname(), + self.hls_sname(), + self.hls_sname(), ) ] else: @@ -692,12 +715,13 @@ class Thresholding_Batch(HLSCustomOp): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", false);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), shape_cpp_str, npy_out, ) @@ -709,24 +733,29 @@ class Thresholding_Batch(HLSCustomOp): def blackboxfunction(self): if self.get_nodeattr("mem_mode") == "const": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &out + """void {}(hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] elif self.get_nodeattr("mem_mode") == "decoupled": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &weights, - hls::stream<ap_uint<{}>> &out + """void {}(hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &weights_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_weightstream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] else: @@ -734,10 +763,10 @@ class Thresholding_Batch(HLSCustomOp): def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" @@ -789,8 +818,7 @@ class Thresholding_Batch(HLSCustomOp): ) elif self.get_nodeattr("mem_mode") == "decoupled": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=weights name=weights_" - + self.hls_sname() + "#pragma HLS INTERFACE axis port=weights_" + self.hls_sname() ) def code_generation_ipi(self): diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py index 895a2eedab51cee6322c7307ea1944d49a0dade5..6eaf03ab16d323129dcbf1ddba0fd133b105b476 100644 --- a/src/finn/custom_op/fpgadataflow/tlastmarker.py +++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py @@ -130,9 +130,11 @@ class TLastMarker(HLSCustomOp): self.code_gen_dict["$DOCOMPUTE$"] = [ "for(unsigned int i=0; i<NumItersPerImg; i++) {", "#pragma HLS PIPELINE II=1", - "out.write(in0.read().get_data());" + "out_%s.write(in0_%s.read().get_data());" + % (self.hls_sname(), self.hls_sname()) if use_qdma_axis - else "out.write(in0.read().data);", + else "out_%s.write(in0_%s.read().data);" + % (self.hls_sname(), self.hls_sname()), "}", ] @@ -146,17 +148,21 @@ class TLastMarker(HLSCustomOp): "#pragma HLS protocol fixed", "// do a first read from stream before we decide on numIters", "// giving software a chance to set up the numIters prior to startup", - "t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();", + "t.set_data(in0_%s.read());" % self.hls_sname() + if use_qdma_axis + else "t.data = in0_%s.read();" % self.hls_sname(), "n = (numIters == 0 ? NumItersPerImg : numIters);", "t.set_last(n==1);" if use_qdma_axis else "t.last = (n==1);", - "out.write(t);", + "out_%s.write(t);" % self.hls_sname(), "} // end of cycle accurate region", "// do one less iteration than spec since we already did one", "for(unsigned int i=1; i<n; i++) {", "#pragma HLS PIPELINE II=1", - "t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();", + "t.set_data(in0_%s.read());" % self.hls_sname() + if use_qdma_axis + else "t.data = in0_%s.read();" % self.hls_sname(), "t.set_last(i==(n-1));" if use_qdma_axis else "t.last = (i==(n-1));", - "out.write(t);", + "out_%s.write(t);" % self.hls_sname(), "}", ] @@ -168,11 +174,13 @@ class TLastMarker(HLSCustomOp): "t.set_keep(-1);" if use_qdma_axis else "t.keep = -1;", "for(unsigned int i=0; i<NumItersPerImg; i++) {", "#pragma HLS PIPELINE II=1", - "t.set_data(in0.read());" if use_qdma_axis else "t.data = in0.read();", + "t.set_data(in0_%s.read());" % self.hls_sname() + if use_qdma_axis + else "t.data = in0_%s.read();" % self.hls_sname(), "t.set_last(i==(NumItersPerImg-1));" if use_qdma_axis else "t.last = (i==(NumItersPerImg-1));", - "out.write(t);", + "out_%s.write(t);" % self.hls_sname(), "}", ] @@ -187,22 +195,23 @@ class TLastMarker(HLSCustomOp): if dyn_iters == 1: self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void %s(hls::stream<InDType> &in0, - hls::stream<OutDType> &out, unsigned int numIters)""" - % self.onnx_node.name + """void %s(hls::stream<InDType> &in0_%s, + hls::stream<OutDType> &out_%s, unsigned int numIters)""" + % (self.onnx_node.name, self.hls_sname(), self.hls_sname()) ] else: self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void %s(hls::stream<InDType> &in0, hls::stream<OutDType> &out)""" - % self.onnx_node.name + """void %s(hls::stream<InDType> &in0_%s, + hls::stream<OutDType> &out_%s)""" + % (self.onnx_node.name, self.hls_sname(), self.hls_sname()) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) dyn_iters = self.get_nodeattr("DynIters") @@ -239,10 +248,12 @@ class TLastMarker(HLSCustomOp): def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<InDType> in0 ("in0");' + 'hls::stream<InDType> in0_%s ("in0_%s");' + % (self.hls_sname(), self.hls_sname()) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<OutDType> out ("out");' + 'hls::stream<OutDType> out_%s ("out_%s");' + % (self.hls_sname(), self.hls_sname()) ) def get_verilog_top_module_intf_names(self): diff --git a/src/finn/custom_op/fpgadataflow/upsampler.py b/src/finn/custom_op/fpgadataflow/upsampler.py index b653b9386e940dd2220fa1fb0d198e63b81a356d..ab5a734e7c20a17603583a62bdb2650b11319307 100644 --- a/src/finn/custom_op/fpgadataflow/upsampler.py +++ b/src/finn/custom_op/fpgadataflow/upsampler.py @@ -187,17 +187,28 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): npy_in = "%s/input_0.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"] = [] self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) def docompute(self): @@ -206,13 +217,15 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): if is_2d: self.code_gen_dict["$DOCOMPUTE$"] = [ """UpsampleNearestNeighbour_Batch<OFMDim, IFMDim, IFMChannels, - ap_uint<Input_precision> > (in0, out, numReps);""" + ap_uint<Input_precision> > (in0_%s, out_%s, numReps);""" + % (self.hls_sname(), self.hls_sname()) ] else: assert batch == 1, "1D upsampler currently needs numReps=1" self.code_gen_dict["$DOCOMPUTE$"] = [ """UpsampleNearestNeighbour_1D<OFMDim, IFMDim, IFMChannels, - ap_uint<Input_precision> > (in0, out);""" + ap_uint<Input_precision> > (in0_%s, out_%s);""" + % (self.hls_sname(), self.hls_sname()) ] def dataoutstrm(self): @@ -231,12 +244,13 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s");' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s");' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), oshape_cpp_str, npy_out, ) @@ -249,16 +263,22 @@ class UpsampleNearestNeighbour_Batch(HLSCustomOp): packed_bits = self.get_instream_width() packed_hls_type = "ap_uint<%d>" % packed_bits self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)" - % (self.onnx_node.name, packed_hls_type, packed_hls_type) + "void %s(hls::stream<%s > &in0_%s, hls::stream<%s > &out_%s)" + % ( + self.onnx_node.name, + packed_hls_type, + self.hls_sname(), + packed_hls_type, + self.hls_sname(), + ) ] def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index ede572f1a4c5fcf60476aa84beba8244eb9ec5a8..64fb5dcbe17adf726e6e982dd2203683bb86d1a8 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -802,8 +802,15 @@ class VectorVectorActivation(HLSCustomOp): self.code_gen_dict["$READNPYDATA$"] = [] # note: the innermost dim is reversed for the input self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in0, false);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", in0_%s, false);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) mem_mode = self.get_nodeattr("mem_mode") @@ -817,23 +824,34 @@ class VectorVectorActivation(HLSCustomOp): npy_in = "%s/weights.npy" % code_gen_dir self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", weights, false, numReps);' - % (packed_hls_type, elem_hls_type, elem_bits, npy_type, npy_in) + 'npy2apintstream<%s, %s, %d, %s>("%s", weights_%s, false, numReps);' + % ( + packed_hls_type, + elem_hls_type, + elem_bits, + npy_type, + npy_in, + self.hls_sname(), + ) ) def strm_decl(self): mem_mode = self.get_nodeattr("mem_mode") self.code_gen_dict["$STREAMDECLARATIONS$"] = [] self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width()) + 'hls::stream<ap_uint<{}>> in0_{} ("in0_{}");'.format( + self.get_instream_width(), self.hls_sname(), self.hls_sname() + ) ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width()) + 'hls::stream<ap_uint<{}>> out_{} ("out_{}");'.format( + self.get_outstream_width(), self.hls_sname(), self.hls_sname() + ) ) if mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<ap_uint<{}>> weights ("weights");'.format( - self.get_weightstream_width() + 'hls::stream<ap_uint<{}>> weights_{} ("weights_{}");'.format( + self.get_weightstream_width(), self.hls_sname(), self.hls_sname() ) ) @@ -854,10 +872,12 @@ class VectorVectorActivation(HLSCustomOp): if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ """Vector_Vector_Activate_Batch<Channels1, InnerProdDim, SIMD1, PE1, 1, {}, {}, {}> - (in0, out, weights, {}, numReps, {});""".format( + (in0_{}, out_{}, weights, {}, numReps, {});""".format( tmpl_args["TSrcI"], tmpl_args["TDstI"], tmpl_args["TWeightI"], + self.hls_sname(), + self.hls_sname(), threshs, map_to_hls_mult_style[self.get_nodeattr("resType")], ) @@ -871,12 +891,15 @@ class VectorVectorActivation(HLSCustomOp): wdtype_hls_str = export_wdt.get_hls_datatype_str() self.code_gen_dict["$DOCOMPUTE$"] = [ """{}<Channels1, InnerProdDim, SIMD1, PE1, 1, {}, {}, {}, {}> - (in0, out, weights, {}, numReps, {});""".format( + (in0_{}, out_{}, weights_{}, {}, numReps, {});""".format( "Vector_Vector_Activate_Stream_Batch", tmpl_args["TSrcI"], tmpl_args["TDstI"], tmpl_args["TWeightI"], wdtype_hls_str, + self.hls_sname(), + self.hls_sname(), + self.hls_sname(), threshs, map_to_hls_mult_style[self.get_nodeattr("resType")], ) @@ -904,12 +927,13 @@ class VectorVectorActivation(HLSCustomOp): # note: the innermost dim is not reversed for the output self.code_gen_dict["$DATAOUTSTREAM$"] = [ - 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", false);' + 'apintstream2npy<%s, %s, %d, %s>(out_%s, %s, "%s", false);' % ( packed_hls_type, elem_hls_type, elem_bits, npy_type, + self.hls_sname(), shape_cpp_str, npy_out, ) @@ -922,25 +946,30 @@ class VectorVectorActivation(HLSCustomOp): mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "const": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ - """void {}(hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &out + """void {}(hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ """void {}( - hls::stream<ap_uint<{}>> &in0, - hls::stream<ap_uint<{}>> &weights, - hls::stream<ap_uint<{}>> &out + hls::stream<ap_uint<{}>> &in0_{}, + hls::stream<ap_uint<{}>> &weights_{}, + hls::stream<ap_uint<{}>> &out_{} )""".format( self.onnx_node.name, self.get_instream_width(), + self.hls_sname(), self.get_weightstream_width(), + self.hls_sname(), self.get_outstream_width(), + self.hls_sname(), ) ] else: @@ -952,10 +981,10 @@ class VectorVectorActivation(HLSCustomOp): def pragmas(self): mem_mode = self.get_nodeattr("mem_mode") self.code_gen_dict["$PRAGMAS$"] = [ - "#pragma HLS INTERFACE axis port=in0 name=in0_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=in0_" + self.hls_sname() ] self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=out name=out_" + self.hls_sname() + "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE ap_ctrl_none port=return" @@ -973,11 +1002,10 @@ class VectorVectorActivation(HLSCustomOp): ) elif mem_mode == "decoupled" or mem_mode == "external": self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS INTERFACE axis port=weights name=weights_" - + self.hls_sname() + "#pragma HLS INTERFACE axis port=weights_" + self.hls_sname() ) self.code_gen_dict["$PRAGMAS$"].append( - "#pragma HLS stream depth=8 variable=weights" + "#pragma HLS stream depth=8 variable=weights_" + self.hls_sname() ) else: raise Exception(