Merge branch 'dev' into feature/datawidthconverter

282a658e · auphelia · 981f64d3 · e02dc5bd · 282a658e · 282a658e
Commit 282a658e authored 5 years ago by auphelia
--- a/docs/finn/source_code/finn.analysis.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.analysis.fpgadataflow.rst
-***********************
 Analysis - fpgadataflow 
 ***********************


--- a/docs/finn/source_code/finn.analysis.rst
+++ b/docs/finn/source_code/finn.analysis.rst
@@ -31,4 +31,3 @@ finn.analysis.verify\_custom\_nodes
   :undoc-members:
   :show-inheritance:

-
--- a/docs/finn/source_code/finn.core.rst
+++ b/docs/finn/source_code/finn.core.rst
@@ -54,4 +54,3 @@ finn.core.rtlsim\_exec
   :members:
   :undoc-members:
   :show-inheritance:
-
--- a/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.custom_op.fpgadataflow.rst
@@ -13,7 +13,6 @@ Base Class
   :undoc-members:
   :show-inheritance:

-
 finn.custom\_op.fpgadataflow.convolutioninputgenerator
 ------------------------------------------------------

@@ -22,6 +21,14 @@ finn.custom\_op.fpgadataflow.convolutioninputgenerator
   :undoc-members:
   :show-inheritance:

+finn.custom\_op.fpgadataflow.streamingdatawidthconverter\_batch
+---------------------------------------------------------------
+
+.. automodule:: finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.fpgadataflow.streamingfclayer\_batch
 ----------------------------------------------------


--- a/docs/finn/source_code/finn.custom_op.rst
+++ b/docs/finn/source_code/finn.custom_op.rst
@@ -21,6 +21,23 @@ Base Class
   :undoc-members:
   :show-inheritance:

+
+finn.custom\_op.im2col
+----------------------
+
+.. automodule:: finn.custom_op.im2col
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+finn.custom\_op.maxpoolnhwc
+---------------------------
+
+.. automodule:: finn.custom_op.maxpoolnhwc
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.custom\_op.multithreshold
 ------------------------------

@@ -53,3 +70,4 @@ finn.custom\_op.xnorpopcount
   :undoc-members:
   :show-inheritance:

+
--- a/docs/finn/source_code/finn.rst
+++ b/docs/finn/source_code/finn.rst
@@ -14,3 +14,4 @@ Modules
   finn.custom_op
   finn.transformation
   finn.util
+
--- a/docs/finn/source_code/finn.transformation.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
@@ -69,6 +69,14 @@ finn.transformation.fpgadataflow.hlssynth\_ipgen
   :undoc-members:
   :show-inheritance:

+finn.transformation.fpgadataflow.insert\_dwc
+--------------------------------------------
+
+.. automodule:: finn.transformation.fpgadataflow.insert_dwc
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.fpgadataflow.insert\_tlastmarker
 ----------------------------------------------------

@@ -132,3 +140,4 @@ finn.transformation.fpgadataflow.templates
   :members:
   :undoc-members:
   :show-inheritance:
+
--- a/docs/finn/source_code/finn.transformation.rst
+++ b/docs/finn/source_code/finn.transformation.rst
@@ -32,6 +32,14 @@ finn.transformation.bipolar\_to\_xnor
   :undoc-members:
   :show-inheritance:

+finn.transformation.double\_to\_single\_float
+---------------------------------------------
+
+.. automodule:: finn.transformation.double_to_single_float
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 finn.transformation.fold\_constants
 -----------------------------------

@@ -63,3 +71,12 @@ finn.transformation.infer\_shapes
   :members:
   :undoc-members:
   :show-inheritance:
+
+finn.transformation.lower\_convs\_to\_matmul
+--------------------------------------------
+
+.. automodule:: finn.transformation.lower_convs_to_matmul
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
--- a/docs/finn/source_code/finn.util.rst
+++ b/docs/finn/source_code/finn.util.rst
@@ -44,3 +44,4 @@ finn.util.test
   :members:
   :undoc-members:
   :show-inheritance:
+
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -138,7 +138,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
        """Returns FINN DataType of output."""
        return DataType[self.get_nodeattr("outputDataType")]

-    def get_stream_width(self):
+    def get_instream_width(self):
        """Returns stream width, input and output stream width are equal for
        the sliding window function"""
        ibits = self.get_input_datatype().bitwidth()
@@ -147,6 +147,12 @@ class ConvolutionInputGenerator(HLSCustomOp):
        assert simd == ifm_ch, "SWG currently requires SIMD=IFM"
        return simd * ibits

+    def get_outstream_width(self):
+        """Returns stream width, input and output stream width are equal for
+        the sliding window function, so the function to determine the input
+        stream width can be reused."""
+        return self.get_instream_width()
+
    def get_number_output_values(self):
        folded_oshape = self.get_folded_output_shape()
        num_output_elems = np.prod(folded_oshape[:-1])
@@ -206,7 +212,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
                code_gen_dir, node.name, prefixed_top_name
            )
            if os.path.isfile(verilog_file):
-                nbits = self.get_stream_width()
+                nbits = self.get_instream_width()
                rtlsim_inp = npy_to_rtlsim_input(
                    "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
                )
@@ -223,7 +229,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
                rtlsim_output = self.rtlsim(sim, rtlsim_inp)
                odt = export_idt
                target_bits = odt.bitwidth()
-                packed_bits = self.get_stream_width()
+                packed_bits = self.get_outstream_width()
                out_npy_path = "{}/output.npy".format(code_gen_dir)
                out_shape = self.get_folded_output_shape()
                rtlsim_output_to_npy(
@@ -287,7 +293,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
            # use binary for bipolar storage
            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
-        packed_bits = self.get_stream_width()
+        packed_bits = self.get_instream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        elem_hls_type = dtype.get_hls_datatype_str()
        npy_type = "float"
@@ -301,10 +307,10 @@ class ConvolutionInputGenerator(HLSCustomOp):
    def strm_decl(self):
        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
-            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_stream_width())
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
        )
        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
-            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_stream_width())
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
        )

    def docompute(self):
@@ -323,7 +329,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
            # use binary for bipolar storage
            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
-        packed_bits = self.get_stream_width()
+        packed_bits = self.get_outstream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        elem_hls_type = dtype.get_hls_datatype_str()
        npy_type = "float"

--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -150,10 +150,10 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
        folded_ishape = self.get_folded_input_shape()
        return np.prod(folded_ishape[:-1])

-    def get_in_stream_width(self):
+    def get_instream_width(self):
        return self.get_nodeattr("inWidth")

-    def get_out_stream_width(self):
+    def get_outstream_width(self):
        return self.get_nodeattr("outWidth")

    def make_shape_compatible_op(self, model):
@@ -236,7 +236,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
            # use binary for bipolar storage
            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
-        packed_bits = self.get_in_stream_width()
+        packed_bits = self.get_instream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        elem_hls_type = dtype.get_hls_datatype_str()
        npy_type = "float"
@@ -250,10 +250,10 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
    def strm_decl(self):
        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
-            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_in_stream_width())
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
        )
        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
-            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_out_stream_width())
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
        )

    def docompute(self):
@@ -270,7 +270,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
            # use binary for bipolar storage
            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
-        packed_bits = self.get_out_stream_width()
+        packed_bits = self.get_outstream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        elem_hls_type = dtype.get_hls_datatype_str()
        npy_type = "float"
@@ -294,9 +294,9 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
        self.code_gen_dict["$SAVEASCNPY$"] = []

    def blackboxfunction(self):
-        in_packed_bits = self.get_in_stream_width()
+        in_packed_bits = self.get_instream_width()
        in_packed_hls_type = "ap_uint<%d>" % in_packed_bits
-        out_packed_bits = self.get_out_stream_width()
+        out_packed_bits = self.get_outstream_width()
        out_packed_hls_type = "ap_uint<%d>" % out_packed_bits
        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
            "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)"
@@ -359,7 +359,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
                code_gen_dir, node.name, prefixed_top_name
            )
            if os.path.isfile(verilog_file):
-                nbits = self.get_in_stream_width()
+                nbits = self.get_instream_width()
                rtlsim_inp = npy_to_rtlsim_input(
                    "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
                )
@@ -376,7 +376,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
                rtlsim_output = self.rtlsim(sim, rtlsim_inp)
                odt = export_idt
                target_bits = odt.bitwidth()
-                packed_bits = self.get_out_stream_width()
+                packed_bits = self.get_outstream_width()
                out_npy_path = "{}/output.npy".format(code_gen_dir)
                out_shape = self.get_folded_output_shape()
                rtlsim_output_to_npy(

--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -84,11 +84,15 @@ class StreamingMaxPool_Batch(HLSCustomOp):
        folded_oshape = self.get_folded_output_shape()
        return np.prod(folded_oshape[:-1])

-    def get_stream_width(self):
+    def get_instream_width(self):
        dt_bits = self.get_input_datatype().bitwidth()
        ifm_ch = self.get_nodeattr("NumChannels")
        return int(dt_bits * ifm_ch)

+    def get_outstream_width(self):
+        """For streaming maxpool out stream with is the same as in stream width"""
+        return self.get_instream_width()
+
    def make_shape_compatible_op(self, model):
        exp_ishape = self.get_normal_input_shape()
        oshape = self.get_normal_output_shape()
@@ -167,7 +171,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
            # use binary for bipolar storage
            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
-        packed_bits = self.get_stream_width()
+        packed_bits = self.get_instream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        elem_hls_type = dtype.get_hls_datatype_str()
        npy_type = "float"
@@ -181,10 +185,10 @@ class StreamingMaxPool_Batch(HLSCustomOp):
    def strm_decl(self):
        self.code_gen_dict["$STREAMDECLARATIONS$"] = []
        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
-            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_stream_width())
+            'hls::stream<ap_uint<{}>> in0 ("in0");'.format(self.get_instream_width())
        )
        self.code_gen_dict["$STREAMDECLARATIONS$"].append(
-            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_stream_width())
+            'hls::stream<ap_uint<{}>> out ("out");'.format(self.get_outstream_width())
        )

    def docompute(self):
@@ -211,7 +215,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
            # use binary for bipolar storage
            dtype = DataType.BINARY
        elem_bits = dtype.bitwidth()
-        packed_bits = self.get_stream_width()
+        packed_bits = self.get_outstream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        elem_hls_type = dtype.get_hls_datatype_str()
        npy_type = "float"
@@ -235,7 +239,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
        self.code_gen_dict["$SAVEASCNPY$"] = []

    def blackboxfunction(self):
-        packed_bits = self.get_stream_width()
+        packed_bits = self.get_instream_width()
        packed_hls_type = "ap_uint<%d>" % packed_bits
        self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
            "void %s(hls::stream<%s > &in0, hls::stream<%s > &out)"
@@ -303,7 +307,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
                code_gen_dir, node.name, prefixed_top_name
            )
            if os.path.isfile(verilog_file):
-                nbits = self.get_stream_width()
+                nbits = self.get_instream_width()
                rtlsim_inp = npy_to_rtlsim_input(
                    "{}/input_0.npy".format(code_gen_dir), export_idt, nbits
                )
@@ -320,7 +324,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
                rtlsim_output = self.rtlsim(sim, rtlsim_inp)
                odt = export_idt
                target_bits = odt.bitwidth()
-                packed_bits = self.get_stream_width()
+                packed_bits = self.get_outstream_width()
                out_npy_path = "{}/output.npy".format(code_gen_dir)
                out_shape = self.get_folded_output_shape()
                rtlsim_output_to_npy(

--- a/src/finn/transformation/fpgadataflow/insert_dwc.py
+++ b/src/finn/transformation/fpgadataflow/insert_dwc.py
@@ -62,15 +62,9 @@ class InsertDWC(Transformation):
                    if n0_out_shape[-1] != n1_in_shape[-1]:
                        graph_modified = True
                        # determine dwc inwidth
-                        if hasattr(n0, "get_outstream_width"):
-                            dwc_in_width = n0.get_outstream_width()
-                        else:
-                            dwc_in_width = n0.get_stream_width()
+                        dwc_in_width = n0.get_outstream_width()
                        # determine dwc outwidth
-                        if hasattr(n1, "get_instream_width"):
-                            dwc_out_width = n1.get_instream_width()
-                        else:
-                            dwc_out_width = n1.get_stream_width()
+                        dwc_out_width = n1.get_instream_width()

                        # determine shape for dwc
                        dwc_shape = n0.get_normal_output_shape()