From a976e5bc8a9d360485f8a7e2077dc39a58368c73 Mon Sep 17 00:00:00 2001 From: Lucian Petrica <lucianp@xilinx.com> Date: Fri, 23 Jul 2021 11:58:47 +0100 Subject: [PATCH] Removed template for streaming thresholds (moved to hlslib); added h/w dimensions for imdim (non-square) --- src/finn/custom_op/fpgadataflow/templates.py | 53 ------------------- .../fpgadataflow/thresholding_batch.py | 22 ++++---- 2 files changed, 11 insertions(+), 64 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 40221ce3b..7f8dbc787 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -355,56 +355,3 @@ $LAYER_NAME$ endmodule """ - -decoupled_thresholding_template = """ -template < - unsigned ImgDim, unsigned NumChannels, unsigned PE, - typename TSrcI = Identity, typename TDstI = Identity, - int ActVal=0, typename TT, unsigned int NumSteps, - typename TI, typename TO> -void Thresholding_Stream_Batch(hls::stream<TI> &in, - hls::stream<TO> &out, - hls::stream<ap_uint<PE*NumSteps*TT::width>> &weight, - int const reps) -{ - - // how many different rows each neuron will compute - // alternatively: number of vertical matrix chunks - unsigned const NF = NumChannels / PE; - - ThresholdsActivation<1, PE, NumSteps, TT, TO, ActVal, comp::less_equal<TT>> internal_thr; - #pragma HLS ARRAY_PARTITION variable=internal_thr.m_thresholds complete dim=0 - - // everything merged into a common iteration space (one "big" loop instead - // of smaller nested loops) to get the pipelinening the way we want - for (unsigned i = 0; i < reps * ImgDim * ImgDim * NF; i++) - { - #pragma HLS PIPELINE II=1 - - ap_uint<PE*NumSteps*TT::width> packed_thr; - packed_thr = weight.read(); - // slicer to get 1 PE's worth of thresholds - auto const pe_slicer = Slice<ap_uint<NumSteps*TT::width>>()(packed_thr); - - TI inElem; - inElem = in.read(); - auto outElem = TDstI().template operator()<TO>(); - - for (unsigned pe = 0; pe < PE; pe++) - { -#pragma HLS UNROLL - // slicer to get individual thresholds - auto const thr_slicer = Slice<TT>()(pe_slicer(pe, 0)); - for (unsigned nt = 0; nt < NumSteps; nt++) - { - #pragma HLS UNROLL - internal_thr.m_thresholds[pe][0][nt] = thr_slicer(nt, 0); - } - - auto const act = TSrcI()(inElem); - outElem(pe,0,1) = internal_thr.activate(0, pe, act(pe,0)); - } - out.write(outElem); - } -} -""" diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py index 2944aeaa3..b4dc32943 100644 --- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py +++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py @@ -621,10 +621,6 @@ class Thresholding_Batch(HLSCustomOp): self.code_gen_dict["$DEFINES$"].append( "#define NumSteps1 %d" % self.get_nodeattr("numSteps") ) - # TODO remove once Thresholding_Stream_Batch is in hlslib: - self.code_gen_dict["$DEFINES$"].append( - templates.decoupled_thresholding_template - ) def read_npy_data(self): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -679,28 +675,32 @@ class Thresholding_Batch(HLSCustomOp): node = self.onnx_node ishape = self.get_folded_input_shape() if len(ishape) == 3: - imgdim = 1 + imgdimh = 1 + imgdimw = 1 elif len(ishape) == 5: - imgdim = ishape[1] + imgdimh = ishape[1] + imgdimw = ishape[2] else: - raise Exception("""Unexpeted input shape""") + raise Exception("""Unexpected input shape""") mem_mode = self.get_nodeattr("mem_mode") if mem_mode == "const": self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, NumChannels1, PE1, {}, {}> + """{}<{}, {}, NumChannels1, PE1, {}, {}> (in0, out, threshs, numReps);""".format( node.op_type, - imgdim, + imgdimh, + imgdimw, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) ] elif mem_mode == "decoupled": self.code_gen_dict["$DOCOMPUTE$"] = [ - """{}<{}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> + """{}<{}, {}, NumChannels1, PE1, {}, {}, ActVal1, ThresType1, NumSteps1> (in0, out, weights, numReps);""".format( "Thresholding_Stream_Batch", - imgdim, + imgdimh, + imgdimw, tmpl_args["TSrcI"], tmpl_args["TDstI"], ) -- GitLab