diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py index 3088ceaeb057c61040390d007a1cfee878978a9e..e43d73b1cd3ec7902fc743bfdf4d2fcad1c01dfe 100644 --- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py +++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator1d.py @@ -608,9 +608,9 @@ class ConvolutionInputGenerator1D(HLSCustomOp): # pass the number of pixels in the folded output to apintstream2npy, needed # to unpack the ouput correctly and reverse only the inner SIMD dimension k_h, k_w = self.get_nodeattr("ConvKernelDim") - mmv = k_h * k_w + multi_pixel_out = k_h * k_w else: - mmv = 1 + multi_pixel_out = 1 self.code_gen_dict["$DATAOUTSTREAM$"] = [ 'apintstream2npy<%s, %s, %d, %s>(out, %s, "%s", true, 1, %d);' @@ -621,7 +621,7 @@ class ConvolutionInputGenerator1D(HLSCustomOp): npy_type, oshape_cpp_str, npy_out, - mmv, + multi_pixel_out, ) ] diff --git a/src/finn/qnn-data/cpp/npy2apintstream.hpp b/src/finn/qnn-data/cpp/npy2apintstream.hpp index 33a3913ec9e3941670ef9fe6f693edc1cff524e0..6aade3a2bbe2ba9914728802a8a6a448ef2d9fb2 100644 --- a/src/finn/qnn-data/cpp/npy2apintstream.hpp +++ b/src/finn/qnn-data/cpp/npy2apintstream.hpp @@ -45,24 +45,24 @@ void npy2apintstream(const char * npy_path, hls::stream<PackedT> & out_stream, b } template <typename PackedT, typename ElemT, int ElemBits, typename NpyT> -void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1, size_t mmv = 1) { +void apintstream2npy(hls::stream<PackedT> & in_stream, const std::vector<size_t> & shape, const char * npy_path, bool reverse_inner = true, size_t numReps = 1, size_t multi_pixel_out = 1) { for(size_t rep = 0; rep < numReps; rep++) { std::vector<NpyT> data_to_save; size_t outer_dim_elems = 1; for(size_t dim = 0; dim < shape.size()-1; dim++) { outer_dim_elems *= shape[dim]; } - size_t inner_dim_elems = shape[shape.size()-1] / mmv; - DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems << " n_MMV " << mmv) + size_t inner_dim_elems = shape[shape.size()-1] / multi_pixel_out; + DEBUG_APINTSTREAM2NPY("n_outer " << outer_dim_elems << " n_inner " << inner_dim_elems << " n_multi_pixel_out " << multi_pixel_out) for(size_t outer_elem = 0; outer_elem < outer_dim_elems; outer_elem++) { PackedT packed_elem; in_stream >> packed_elem; DEBUG_APINTSTREAM2NPY("packed hls elem " << std::hex << packed_elem << std::dec) - for(size_t ii_mmv = 0; ii_mmv < mmv; ii_mmv++) { - // loop over MMV blocks of inner_dim_elems separately, - // so that reverse_inner is not applied across MMV dimension (e.g. multiple pixels) + for(size_t ii_multi_pixel_out = 0; ii_multi_pixel_out < multi_pixel_out; ii_multi_pixel_out++) { + // loop over multi_pixel_out blocks of inner_dim_elems separately, + // so that reverse_inner is not applied across multiple pixels for(size_t ii = 0; ii < inner_dim_elems; ii++) { - size_t i = ii_mmv*inner_dim_elems; + size_t i = ii_multi_pixel_out*inner_dim_elems; i += reverse_inner ? inner_dim_elems-ii-1 : ii; ap_uint<ElemBits> tmp_elem = packed_elem((i+1)*ElemBits-1, i*ElemBits); // important: don't init elem = reinterpret_cast.. directly here