diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 332f0263859504230c91f6d11147c28aa4e0d617..0aea65fdd7999b56989239685f6606a8e1b2e618 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -12,8 +12,8 @@ gecho () {
 
 # checkout the correct dependency repo commits
 # the repos themselves are cloned in the Dockerfile
-FINN_BASE_COMMIT=8b33862ea6955234e59cc52888f268cc690acf90
-BREVITAS_COMMIT=d579814b62ab33af0cd24fef49a6a34dc7e2f9b3
+FINN_BASE_COMMIT=f2e5f0582ef2b7cbc134168993816c337ca8d3a6
+BREVITAS_COMMIT=b75e0408d9759ed519296e3af29b9c16fb94b0b8
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
 HLSLIB_COMMIT=cfafe11a93b79ab1af7529d68f08886913a6466e
 PYVERILATOR_COMMIT=06c29ecf3ba0361e3d0a75c98f6918ba67bf0e27
diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py
index 4d1a0bfa76d01f10706748b0200ac7fd3d312db7..2c714b1f12b75e9789f1865d6737422f4d9d9a97 100644
--- a/src/finn/analysis/fpgadataflow/res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/res_estimation.py
@@ -45,3 +45,41 @@ def res_estimation(model):
             res_dict[node.name] = inst.node_res_estimation()
 
     return res_dict
+
+
+def res_estimation_complete(model):
+    """Estimates the resources needed for the given model and all values for
+    resource-related switches.
+    Ensure that all nodes have unique names (by calling the GiveUniqueNodeNames
+    transformation) prior to calling this analysis pass to ensure all nodes are
+    visible in the results.
+
+    Returns {node name : [resource estimation(s)]}."""
+
+    res_dict = {}
+    for node in model.graph.node:
+        if is_fpgadataflow_node(node) is True:
+            op_type = node.op_type
+            inst = registry.getCustomOp(node)
+            if op_type == "StreamingFCLayer_Batch" or op_type == "Vector_Vector_Activate_Batch":
+                orig_restype = inst.get_nodeattr("resType")
+                res_dict[node.name] = []
+                inst.set_nodeattr("resType", "dsp")
+                res_dict[node.name].append(inst.node_res_estimation())
+                inst.set_nodeattr("resType", "lut")
+                res_dict[node.name].append(inst.node_res_estimation())
+                inst.set_nodeattr("resType", orig_restype)
+            elif op_type == "ConvolutionInputGenerator":
+                orig_ramstyle = inst.get_nodeattr("ram_style")
+                res_dict[node.name] = []
+                inst.set_nodeattr("ram_style", "block")
+                res_dict[node.name].append(inst.node_res_estimation())
+                inst.set_nodeattr("ram_style", "distributed")
+                res_dict[node.name].append(inst.node_res_estimation())
+                inst.set_nodeattr("ram_style", "ultra")
+                res_dict[node.name].append(inst.node_res_estimation())
+                inst.set_nodeattr("ram_style", orig_ramstyle)
+            else:
+                res_dict[node.name] = [inst.node_res_estimation()]
+
+    return res_dict
diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index ddd431cb2a9ba5ae64d21d12ed0313c2379ee497..635f37d5695a56d7c22f2287030ccb7331ab347b 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -95,11 +95,11 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         my_attrs = {
             # channelwise "map" function to apply:
             # one of cmp_le, cmp_ge, add, mul
-            "Func": ("s", False, "cmp_le"),
+            "Func": ("s", False, "cmp_le", {"cmp_le", "cmp_ge", "add", "mul"}),
             "PE": ("i", True, 0),
             "NumChannels": ("i", True, 0),
             # string defining memory resource type for parameters
-            "ram_style": ("s", False, "distributed"),
+            "ram_style": ("s", False, "distributed", {"distributed", "block"}),
             # FINN DataTypes for inputs, weights, outputs
             "inputDataType": ("s", True, ""),
             "paramDataType": ("s", True, ""),
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 3044ed46f9524e035b95661eed5b7043c84dfdc6..3f400053df8de6ec1e53e39fb5a3edee15f3ab30 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -28,6 +28,7 @@
 
 import os
 
+import math
 import numpy as np
 
 from finn.core.datatype import DataType
@@ -69,13 +70,18 @@ class ConvolutionInputGenerator(HLSCustomOp):
             # FINN DataTypes for inputs, weights, outputs
             "inputDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
-            "depthwise": ("i", False, 0),
+            "depthwise": ("i", False, 0, {0, 1}),
             # FPGA resource type for ConvolutionInputGenerator input buffer
             # auto -- let Vivado HLS decide
             # block -- use BRAM
             # distributed -- use LUTRAM
             # ultra -- use URAM
-            "ram_style": ("s", False, "distributed"),
+            "ram_style": (
+                "s",
+                False,
+                "distributed",
+                {"auto", "block", "distributed", "ultra"},
+            ),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -194,6 +200,75 @@ class ConvolutionInputGenerator(HLSCustomOp):
 
         return int(exp_cycles)
 
+    def bram_estimation(self):
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ifm_dim = self.get_nodeattr("IFMDim")
+        k = self.get_nodeattr("ConvKernelDim")
+        stride = self.get_nodeattr("Stride")
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "block" or ram_style == "auto":
+            ram_depth = ifm_dim * ifm_ch / simd
+            if ram_depth <= 512:
+                ram_width = 36
+            elif ram_depth <= 1024:
+                ram_width = 18
+            elif ram_depth <= 2048:
+                ram_width = 9
+            elif ram_depth <= 4096:
+                ram_width = 4
+            elif ram_depth <= 8192:
+                ram_width = 2
+            else:
+                ram_width = 1
+            return int(
+                (k + stride)
+                * (
+                    math.ceil(simd * self.get_input_datatype().bitwidth() / ram_width)
+                    * math.ceil(ifm_dim * ifm_ch / simd / ram_depth)
+                )
+            )
+        else:
+            return 0
+
+    def lut_estimation(self):
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ifm_dim = self.get_nodeattr("IFMDim")
+        k = self.get_nodeattr("ConvKernelDim")
+        stride = self.get_nodeattr("Stride")
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "distributed":
+            ram_luts = int(
+                (k + stride)
+                * (
+                    simd
+                    * self.get_input_datatype().bitwidth()
+                    * math.ceil(ifm_dim * ifm_ch / simd / 64)
+                )
+            )
+        else:
+            ram_luts = 0
+        return 300 + ram_luts
+
+    def uram_estimation(self):
+        simd = self.get_nodeattr("SIMD")
+        ifm_ch = self.get_nodeattr("IFMChannels")
+        ifm_dim = self.get_nodeattr("IFMDim")
+        k = self.get_nodeattr("ConvKernelDim")
+        stride = self.get_nodeattr("Stride")
+        ram_style = self.get_nodeattr("ram_style")
+        if ram_style == "ultra":
+            return int(
+                (k + stride)
+                * (
+                    math.ceil(simd * self.get_input_datatype().bitwidth() / 64)
+                    * math.ceil(ifm_dim * ifm_ch / simd / 4096)
+                )
+            )
+        else:
+            return 0
+
     def execute_node(self, context, graph):
         mode = self.get_nodeattr("exec_mode")
         node = self.onnx_node
diff --git a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
index f4f2b89f076d5c181fe57bf030d1a59706e301db..e8efa3abb4e75830bf31cd88c8cb21f517e0a9f7 100644
--- a/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding_batch.py
@@ -28,7 +28,7 @@ class FMPadding_Batch(HLSCustomOp):
             # controls distribution of padded pixels
             # in case of uneven padding -- see FMPadding fxn
             # in hlslib
-            "PaddingStyle": ("i", False, 2),
+            "PaddingStyle": ("i", False, 2, {2, 1}),
             # shape describing input vecs per execution
             "numInputVectors": ("i", False, 1),
         }
diff --git a/src/finn/custom_op/fpgadataflow/hlscustomop.py b/src/finn/custom_op/fpgadataflow/hlscustomop.py
index 436384c51a2e629af051354619744928c1187feb..06cc2d253d577fe14ae965e07868dea4e656d927 100644
--- a/src/finn/custom_op/fpgadataflow/hlscustomop.py
+++ b/src/finn/custom_op/fpgadataflow/hlscustomop.py
@@ -82,7 +82,7 @@ class HLSCustomOp(CustomOp):
             "ipgen_path": ("s", False, ""),
             "ip_path": ("s", False, ""),
             "ip_vlnv": ("s", False, ""),
-            "exec_mode": ("s", False, ""),
+            "exec_mode": ("s", False, "", {"", "rtlsim", "cppsim"}),
             "cycles_rtlsim": ("i", False, 0),
             "cycles_estimate": ("i", False, 0),
             "rtlsim_trace": ("s", False, ""),
@@ -194,6 +194,8 @@ class HLSCustomOp(CustomOp):
         ret["BRAM_18K"] = self.bram_estimation()
         ret["BRAM_efficiency"] = self.bram_efficiency_estimation()
         ret["LUT"] = self.lut_estimation()
+        ret["URAM"] = self.uram_estimation()
+        ret["DSP"] = self.dsp_estimation()
         return ret
 
     def bram_efficiency_estimation(self):
@@ -206,11 +208,21 @@ class HLSCustomOp(CustomOp):
         HLSCustomOp class but has to be filled by every node"""
         return 0
 
+    def uram_estimation(self):
+        """Function for UltraRAM resource estimation, is member function of
+        HLSCustomOp class but has to be filled by every node"""
+        return 0
+
     def lut_estimation(self):
         """Function for LUT resource estimation, is member function of
         HLSCustomOp class but has to be filled by every node"""
         return 0
 
+    def dsp_estimation(self):
+        """Function for DSP resource estimation, is member function of
+        HLSCustomOp class but has to be filled by every node"""
+        return 0
+
     def get_exp_cycles(self):
         """Function for estimation of expected cycles for set folding,
         is member function of HLSCustomOp class but has to be filled
diff --git a/src/finn/custom_op/fpgadataflow/iodma.py b/src/finn/custom_op/fpgadataflow/iodma.py
index 2976dae223420ae17e8d92562866d08cda890a64..dc5c37619dae26ceedbcede0032caa930c16f9dd 100644
--- a/src/finn/custom_op/fpgadataflow/iodma.py
+++ b/src/finn/custom_op/fpgadataflow/iodma.py
@@ -87,8 +87,8 @@ class IODMA(HLSCustomOp):
             "streamWidth": ("i", False, 32),
             # DMA-specific parameters
             "intfWidth": ("i", False, 32),
-            "burstMode": ("s", False, "increment"),
-            "direction": ("s", False, "in"),
+            "burstMode": ("s", False, "increment", {"wrap", "increment"}),
+            "direction": ("s", False, "in", {"in", "out"}),
             # shape describing input vecs per execution
             "numInputVectors": ("ints", False, [1]),
             # name of axi-mm interface
diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py
index b76e5432dc51efc604fc69b31c10f08442e9600d..edba084b5258de37198520257e438f90f8cc65e3 100644
--- a/src/finn/custom_op/fpgadataflow/pool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/pool_batch.py
@@ -60,9 +60,9 @@ class Pool_Batch(HLSCustomOp):
             "KernelSize": ("i", True, 0),
             # Function:
             #  - MaxPool
-            #  - AvgPool (not yet supported, but HLSLIB does)
-            #  - AccPool (not yet supported, but HLSLIB does)
-            "Function": ("s", True, ""),
+            #  - QuantAvgPool
+            # TODO add support for AvgPool and AccPool
+            "Function": ("s", True, "", {"MaxPool", "QuantAvgPool"}),
             "OutImgDim": ("i", True, 0),
             # FINN DataTypes for inputs/outputs
             "InputDataType": ("s", True, ""),
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index 1bc20fb3febe3ff411056664c1f1d0c439d9cda1..16ec6587861c7de6829f812fb539d6fc40c2ece4 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -55,7 +55,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
             # Toggle between hls or IPI implementation
             # hls - use the hls generated IP during stitching
             # vivado - use the AXI Infrastructure DWC
-            "impl_style": ("s", False, "hls"),
+            "impl_style": ("s", False, "hls", {"hls", "vivado"}),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 0d7fa341d0e8ecd465731a26403a95b97de8cd98..b4f85c29bd5233e65b40b2bd580b33c714baf378 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -68,7 +68,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             "SIMD": ("i", True, 0),
             "MW": ("i", True, 0),
             "MH": ("i", True, 0),
-            "resType": ("s", True, ""),
+            "resType": ("s", False, "lut", {"auto", "lut", "dsp"}),
             "ActVal": ("i", False, 0),
             # FINN DataTypes for inputs, weights, outputs
             "inputDataType": ("s", True, ""),
@@ -78,9 +78,9 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             "accDataType": ("s", False, "INT32"),
             # use xnor-popcount for binary weights/inputs, thus treating them
             # as bipolar
-            "binaryXnorMode": ("i", False, 0),
+            "binaryXnorMode": ("i", False, 0, {0, 1}),
             # no-activation mode (produce accumulators)
-            "noActivation": ("i", False, 0),
+            "noActivation": ("i", False, 0, {0, 1}),
             # number of input vectors, examples:
             # [1] is a single vector (like a FC layer with batch=1)
             # [4] is four vectors (like a FC layer with batch=4)
@@ -90,13 +90,13 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # const -- embedded weights, default, long compile/synth times
             # decoupled -- streaming weights with weight streamer packaged inside IP
             # external -- streaming weights with external streamer
-            "mem_mode": ("s", False, "const"),
+            "mem_mode": ("s", False, "const", {"const", "decoupled", "external"}),
             # FPGA resource type for memories in decoupled mode
             # auto -- let Vivado decide
             # block -- use BRAM
             # distributed -- use LUTRAM
             # see also https://www.xilinx.com/support/answers/38070.html
-            "ram_style": ("s", False, "auto"),
+            "ram_style": ("s", False, "auto", {"auto", "block", "distributed"}),
             # (mem_mode = decoupled only) whether weights will be writable through
             # an AXI-lite interface during runtime
             # 1 for enabled, 0 for disabled.
@@ -106,7 +106,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # always "flush" the accelerator by first passing a dummy input
             # vector through the accelerator. This will get rid of any old
             # weight data from the weight FIFOs.
-            "runtime_writeable_weights": ("i", False, 0),
+            "runtime_writeable_weights": ("i", False, 0, {0, 1}),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -231,7 +231,27 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         D_in = self.get_nodeattr("MW")
         D_out = self.get_nodeattr("MH")
         omega = (D_in * D_out) / (Q * P)
-        return P * (math.ceil(omega / 512)) * (math.ceil((Q * W) / 36))
+        mem_width = Q * W * P
+        mmode = self.get_nodeattr("mem_mode")
+        mstyle = self.get_nodeattr("ram_style")
+        if (mmode == "decoupled" and mstyle == "distributed") or (
+            mmode == "const" and self.calc_wmem() <= 128
+        ):
+            return 0
+        # assuming SDP mode RAMB18s (see UG573 Table 1-10)
+        # assuming decoupled (RTL) memory, which is more efficient than const (HLS)
+        if mem_width == 1:
+            return math.ceil(omega / 16384)
+        elif mem_width == 2:
+            return math.ceil(omega / 8192)
+        elif mem_width <= 4:
+            return (math.ceil(omega / 4096)) * (math.ceil(mem_width / 4))
+        elif mem_width <= 9:
+            return (math.ceil(omega / 2048)) * (math.ceil(mem_width / 9))
+        elif mem_width <= 18 or omega > 512:
+            return (math.ceil(omega / 1024)) * (math.ceil(mem_width / 18))
+        else:
+            return (math.ceil(omega / 512)) * (math.ceil(mem_width / 36))
 
     def bram_efficiency_estimation(self):
         wdt = self.get_weight_datatype()
@@ -239,6 +259,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         D_in = self.get_nodeattr("MW")
         D_out = self.get_nodeattr("MH")
         bram16_est = self.bram_estimation()
+        if bram16_est == 0:
+            return 1
         wbits = W * D_in * D_out
         bram16_est_capacity = bram16_est * 36 * 512
         return wbits / bram16_est_capacity
@@ -254,6 +276,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         # TODO add in/out FIFO contributions
         P = self.get_nodeattr("PE")
         Q = self.get_nodeattr("SIMD")
+        MW = self.get_nodeattr("MW")
         wdt = self.get_weight_datatype()
         W = wdt.bitwidth()
         # determine tdt with input and weight data types
@@ -262,8 +285,55 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         # parameters from experiments in paper mentioned above
         c0 = 300
         c1 = 1.1
+        c2 = 0
+        mmode = self.get_nodeattr("mem_mode")
+        mstyle = self.get_nodeattr("ram_style")
+        if (mmode == "decoupled" and mstyle == "distributed") or (
+            mmode == "const" and self.calc_wmem() <= 128
+        ):
+            c2 = (P * Q * W) * math.ceil(self.calc_wmem() / 64)
+
+        # multiplication
+        res_type = self.get_nodeattr("resType")
+        if res_type == "dsp":
+            mult_luts = 0
+        else:
+            mult_luts = Q * (2 * math.ceil((W + A) / 6) - 1) * (W + A)
+        # adder tree
+        addertree_luts = (W + A) * (2 * Q - 1)
+        # accumulator
+        acc_bits = W + A + np.ceil(math.log(MW, 2))
+        acc_luts = acc_bits
+        # thresholds and threshold comparators
+        thr_luts = 0
+        comp_luts = 0
+        noact = self.get_nodeattr("noActivation")
+        if noact == 0:
+            odt = self.get_output_datatype()
+            B = odt.bitwidth()
+            thr_luts = (2 ** B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64)
+            comp_luts = (2 ** B - 1) * acc_bits
+
+        return int(
+            c0
+            + c1 * (P * (mult_luts + addertree_luts + acc_luts + thr_luts + comp_luts))
+            + c2
+        )
 
-        return c0 + c1 * (P * Q) * (W * A)
+    def dsp_estimation(self):
+        # multiplication
+        P = self.get_nodeattr("PE")
+        res_type = self.get_nodeattr("resType")
+        Q = self.get_nodeattr("SIMD")
+        wdt = self.get_weight_datatype()
+        W = wdt.bitwidth()
+        idt = self.get_input_datatype()
+        A = idt.bitwidth()
+        if res_type == "dsp":
+            mult_dsp = P * Q * np.ceil((W + A) / 48)  # TODO: more accurate modelling
+        else:
+            mult_dsp = 0
+        return int(mult_dsp)
 
     def get_exp_cycles(self):
         pe = self.get_nodeattr("PE")
@@ -915,6 +985,11 @@ class StreamingFCLayer_Batch(HLSCustomOp):
 
     def docompute(self):
         mem_mode = self.get_nodeattr("mem_mode")
+        map_to_hls_mult_style = {
+            "auto": "ap_resource_dflt()",
+            "lut": "ap_resource_lut()",
+            "dsp": "ap_resource_dsp()",
+        }
         tmpl_args = self.get_template_param_values()
         if self.calc_tmem() == 0:
             odtype_hls_str = self.get_output_datatype().get_hls_datatype_str()
@@ -931,7 +1006,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                     tmpl_args["TDstI"],
                     tmpl_args["TWeightI"],
                     threshs,
-                    self.get_nodeattr("resType"),
+                    map_to_hls_mult_style[self.get_nodeattr("resType")],
                 )
             ]
         elif mem_mode == "decoupled" or mem_mode == "external":
@@ -949,7 +1024,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                     tmpl_args["TWeightI"],
                     wdtype_hls_str,
                     threshs,
-                    self.get_nodeattr("resType"),
+                    map_to_hls_mult_style[self.get_nodeattr("resType")],
                 )
             ]
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index 358e2231483453a9e38a9e6e1d96c88ebef514d5..21534f9ab0b7d571c8c492115930ecd05e098856 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -56,13 +56,18 @@ class StreamingFIFO(HLSCustomOp):
             # Toggle between hls or IPI implementation
             # rtl - use the hls generated IP during stitching
             # vivado - use the AXI Infrastructure FIFO
-            "impl_style": ("s", False, "rtl"),
+            "impl_style": ("s", False, "rtl", {"rtl", "vivado"}),
             # FPGA resource type for FIFOs when impl_style is vivado
             # auto -- let Vivado decide
             # block -- use BRAM
             # distributed -- use LUTRAM
             # ultra -- use URAM (on UltraScale+)
-            "ram_style": ("s", False, "auto"),
+            "ram_style": (
+                "s",
+                False,
+                "auto",
+                {"auto", "block", "distributed", "ultra"},
+            ),
         }
         my_attrs.update(super().get_nodeattr_types())
 
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index d896c530baed6c9ab175961f359dde1fbc70c303..8a944fe77dc938db4154bb0a2ffcff8fdaefbd72 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -70,7 +70,7 @@ class Thresholding_Batch(HLSCustomOp):
             # number of steps in thresholding function
             "numSteps": ("i", True, 1),
             # string defining memory type
-            "ram_style": ("s", False, "distributed"),
+            "ram_style": ("s", False, "distributed", {"distributed", "block"}),
             # FINN DataTypes for inputs, outputs
             "inputDataType": ("s", True, ""),
             "weightDataType": ("s", True, ""),
@@ -88,7 +88,7 @@ class Thresholding_Batch(HLSCustomOp):
             # memory mode for the thresholds
             # const -- embedded thresholds, default
             # decoupled -- streaming thresholds with  streamer packaged inside IP
-            "mem_mode": ("s", False, "const"),
+            "mem_mode": ("s", False, "const", {"const", "decoupled"}),
             # (mem_mode = decoupled only) whether weights (thresholds) will be
             # writable through an AXI-lite interface during runtime
             # 1 for enabled, 0 for disabled.
@@ -98,7 +98,7 @@ class Thresholding_Batch(HLSCustomOp):
             # always "flush" the accelerator by first passing a dummy input
             # vector through the accelerator. This will get rid of any old
             # weight data from the weight FIFOs.
-            "runtime_writeable_weights": ("i", False, 0),
+            "runtime_writeable_weights": ("i", False, 0, {0, 1}),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
diff --git a/src/finn/custom_op/fpgadataflow/tlastmarker.py b/src/finn/custom_op/fpgadataflow/tlastmarker.py
index bdc64c9d2aa894f416bf5d99213908160970ea4b..bedaf0984c39ef7603e6829961d7a3efb6ff489f 100644
--- a/src/finn/custom_op/fpgadataflow/tlastmarker.py
+++ b/src/finn/custom_op/fpgadataflow/tlastmarker.py
@@ -47,14 +47,14 @@ class TLastMarker(HLSCustomOp):
             # whether static or dynamic (from AXI lite) number of iterations are used
             "DynIters": ("i", False, 1),
             # direction: whether to insert or remove TLAST
-            "Direction": ("s", False, "out"),
+            "Direction": ("s", False, "out", {"out", "in"}),
             # width of input-output data streams, in bits
             "StreamWidth": ("i", True, 0),
             # width of individual element in stream, in bits
             "ElemWidth": ("i", True, 0),
             # Protocol: external or internal
             # Vitis docs recommend using qdma_axis for external, ap_axiu for internal
-            "Protocol": ("s", False, "external"),
+            "Protocol": ("s", False, "external", {"external", "internal"}),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
diff --git a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
index b50a56419bce5b9db21da930451a7b3db11e5a0c..333884f361983e2a465715f3f4119c9c6384558e 100644
--- a/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
+++ b/src/finn/custom_op/fpgadataflow/vector_vector_activate_batch.py
@@ -1,5 +1,6 @@
 import os
 import numpy as np
+import math
 
 from onnx import TensorProto, helper
 from finn.core.datatype import DataType
@@ -24,14 +25,14 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
             "Dim": ("i", True, 0),
             "Channels": ("i", True, 0),
             "Kernel": ("i", True, 0),
-            "resType": ("s", True, ""),
+            "resType": ("s", False, "auto", {"auto", "lut", "dsp"}),
             "ActVal": ("i", False, 0),
             # FINN DataTypes for inputs, weights, outputs
             "inputDataType": ("s", True, ""),
             "weightDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
             # no-activation mode (produce accumulators)
-            "noActivation": ("i", False, 0),
+            "noActivation": ("i", False, 0, {0, 1}),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -408,6 +409,11 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
         )
 
     def docompute(self):
+        map_to_hls_mult_style = {
+            "auto": "ap_resource_dflt()",
+            "lut": "ap_resource_lut()",
+            "dsp": "ap_resource_dsp()",
+        }
         tmpl_args = self.get_template_param_values()
         if self.calc_tmem() == 0:
             odtype_hls_str = self.get_output_datatype().get_hls_datatype_str()
@@ -423,7 +429,7 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                 tmpl_args["TDstI"],
                 tmpl_args["TWeightI"],
                 threshs,
-                self.get_nodeattr("resType"),
+                map_to_hls_mult_style[self.get_nodeattr("resType")],
             )
         ]
 
@@ -504,3 +510,99 @@ class Vector_Vector_Activate_Batch(HLSCustomOp):
                     "complete dim=3"
                 )
             )
+
+    def bram_estimation(self):
+        """Calculates resource estimation for BRAM"""
+        # TODO add in/out FIFO contributions
+        P = self.get_nodeattr("PE")
+        wdt = self.get_weight_datatype()
+        W = wdt.bitwidth()
+        omega = self.calc_wmem()
+        # assuming SDP mode RAMB18s (see UG573 Table 1-10)
+        # since this is HLS memory, not using the full width of a BRAM
+        # assuming memories up to 128 deep get implemented in LUTs
+        if self.calc_wmem() <= 128:
+            return 0
+
+        if W == 1:
+            return math.ceil(omega / 16384) * P
+        elif W == 2:
+            return math.ceil(omega / 8192) * P
+        elif W <= 4:
+            return (math.ceil(omega / 4096)) * (math.ceil(W / 4)) * P
+        elif W <= 9:
+            return (math.ceil(omega / 2048)) * (math.ceil(W / 8)) * P
+        elif W <= 18 or omega > 512:
+            return (math.ceil(omega / 1024)) * (math.ceil(W / 16)) * P
+        else:
+            return (math.ceil(omega / 512)) * (math.ceil(W / 32)) * P
+
+    def bram_efficiency_estimation(self):
+        P = self.get_nodeattr("PE")
+        wdt = self.get_weight_datatype()
+        W = wdt.bitwidth()
+        omega = self.calc_wmem()
+        bram16_est = self.bram_estimation()
+        if bram16_est == 0:
+            return 1
+        wbits = W * P * omega
+        bram16_est_capacity = bram16_est * 36 * 512
+        return wbits / bram16_est_capacity
+
+    def lut_estimation(self):
+        """Calculates resource estimations for LUTs based on:
+        - FINN-R: An End-to-End Deep-Learning Framework for Fast
+        Exploration of Quantized Neural Networks
+        - M. Blott, T. B. Preusser, N. J. Fraser, G. Gambardella, K. O'Brien,
+        Y. Umuroglu, M. Leeser and K. Vissers
+        - 12. Sep 2018
+        """
+        # TODO add in/out FIFO contributions
+        P = self.get_nodeattr("PE")
+        wdt = self.get_weight_datatype()
+        W = wdt.bitwidth()
+        # determine tdt with input and weight data types
+        idt = self.get_input_datatype()
+        A = idt.bitwidth()
+        # parameters from experiments in paper mentioned above
+        c0 = 300
+        c1 = 1.1
+        c2 = 0
+        if self.calc_wmem() <= 128:
+            c2 = P * W * math.ceil(self.calc_wmem() / 64)
+
+        # multiplication
+        res_type = self.get_nodeattr("resType")
+        if res_type == "dsp":
+            mult_luts = 0
+        else:
+            mult_luts = (2 * math.ceil((W + A) / 6) - 1) * (W + A)
+        # accumulator
+        k = self.get_nodeattr("Kernel")
+        acc_bits = W + A + math.ceil(math.log(k * k, 2))
+        acc_luts = acc_bits
+        # thresholds and threshold comparators
+        thr_luts = 0
+        comp_luts = 0
+        noact = self.get_nodeattr("noActivation")
+        if noact == 0:
+            odt = self.get_output_datatype()
+            B = odt.bitwidth()
+            thr_luts = (2 ** B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64)
+            comp_luts = (2 ** B - 1) * acc_bits
+
+        return int(c0 + c1 * (P * (mult_luts + acc_luts + thr_luts + comp_luts)) + c2)
+
+    def dsp_estimation(self):
+        # multiplication
+        P = self.get_nodeattr("PE")
+        res_type = self.get_nodeattr("resType")
+        wdt = self.get_weight_datatype()
+        W = wdt.bitwidth()
+        idt = self.get_input_datatype()
+        A = idt.bitwidth()
+        if res_type == "dsp":
+            mult_dsp = P * np.ceil((W + A) / 48)  # TODO: more accurate modelling
+        else:
+            mult_dsp = 0
+        return int(mult_dsp)
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 215cdc9a0b3cba172961a010eda24afa50687373..749cf6c91a975a2ffaffedefa77b2f3fcb793e32 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -442,7 +442,6 @@ class InferBinaryStreamingFCLayer(Transformation):
                         [mt_output],
                         domain="finn.custom_op.fpgadataflow",
                         backend="fpgadataflow",
-                        resType="ap_resource_lut()",
                         MW=mw,
                         MH=mh,
                         SIMD=simd,
@@ -473,7 +472,6 @@ class InferBinaryStreamingFCLayer(Transformation):
                         [mm_output],
                         domain="finn.custom_op.fpgadataflow",
                         backend="fpgadataflow",
-                        resType="ap_resource_lut()",
                         MW=mw,
                         MH=mh,
                         SIMD=simd,
@@ -577,7 +575,6 @@ class InferQuantizedStreamingFCLayer(Transformation):
                             [mt_output],
                             domain="finn.custom_op.fpgadataflow",
                             backend="fpgadataflow",
-                            resType="ap_resource_lut()",
                             MW=mw,
                             MH=mh,
                             SIMD=simd,
@@ -608,7 +605,6 @@ class InferQuantizedStreamingFCLayer(Transformation):
                             [mm_output],
                             domain="finn.custom_op.fpgadataflow",
                             backend="fpgadataflow",
-                            resType="ap_resource_lut()",
                             MW=mw,
                             MH=mh,
                             SIMD=simd,
@@ -728,7 +724,7 @@ class InferVVAU(Transformation):
                             [mt_output],
                             domain="finn.custom_op.fpgadataflow",
                             backend="fpgadataflow",
-                            resType="ap_resource_lut()",
+                            resType="lut",
                             PE=pe,
                             Dim=mm_in_shape[1],
                             Channels=channels,
@@ -756,7 +752,7 @@ class InferVVAU(Transformation):
                             [mm_output],
                             domain="finn.custom_op.fpgadataflow",
                             backend="fpgadataflow",
-                            resType="ap_resource_lut()",
+                            resType="lut",
                             PE=pe,
                             Dim=mm_in_shape[1],
                             Channels=channels,
diff --git a/src/finn/util/create.py b/src/finn/util/create.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9c5d7b1b59916edfc8730992535f3ddb57c4d60
--- /dev/null
+++ b/src/finn/util/create.py
@@ -0,0 +1,178 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.util.basic import calculate_signed_dot_prod_range, gen_finn_dt_tensor
+
+
+def hls_random_mlp_maker(layer_spec):
+    """Create an MLP of given specification using HLSCustomOp instances.
+    Generate random weights/thresholds of appropriate size."""
+    ret = []
+    for lyr in layer_spec:
+        idt = lyr["idt"]
+        wdt = lyr["wdt"]
+        mw = lyr["mw"]
+        mh = lyr["mh"]
+        act = lyr["act"]
+        lyr["W"] = gen_finn_dt_tensor(wdt, (mw, mh))
+        if act is None:
+            # no activation, produce accumulators
+            T = None
+            tdt = None
+            if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+                odt = DataType.UINT32
+            else:
+                odt = DataType.INT32
+        else:
+            odt = act
+            (min, max) = calculate_signed_dot_prod_range(idt, wdt, mw)
+            n_steps = act.get_num_possible_values() - 1
+            T = np.random.randint(min, max - 1, (mh, n_steps)).astype(np.float32)
+            # provide non-decreasing thresholds
+            T = np.sort(T, axis=1)
+            # generate thresholds for activation
+            if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+                tdt = DataType.UINT32
+                # bias thresholds to be positive
+                T = np.ceil((T + mw) / 2)
+                assert (T >= 0).all()
+            else:
+                tdt = DataType.INT32
+        lyr["T"] = T
+        lyr["tdt"] = tdt
+        lyr["odt"] = odt
+        ret.append(lyr)
+
+    return hls_mlp_maker(ret)
+
+
+def hls_mlp_maker(layer_spec):
+    """Create an MLP of given specification using HLSCustomOp instances."""
+
+    current_in_name = ""
+    current_out_name = ""
+    i = 0
+
+    graph = helper.make_graph(nodes=[], name="mlp", inputs=[], outputs=[])
+
+    model = helper.make_model(graph, producer_name="finn")
+    model = ModelWrapper(model)
+
+    for lyr in layer_spec:
+        current_W_name = "W_%d" % i
+        current_T_name = "T_%d" % i
+        current_in_name = "act_%d" % i
+        current_out_name = "act_%d" % (i + 1)
+
+        W = lyr["W"]
+        (mw, mh) = W.shape
+        T = lyr["T"]
+        pe = lyr["pe"]
+        simd = lyr["simd"]
+        wdt = lyr["wdt"]
+        idt = lyr["idt"]
+        tdt = lyr["tdt"]
+        odt = lyr["odt"]
+
+        if i == 0:
+            global_in = helper.make_tensor_value_info(
+                current_in_name, TensorProto.FLOAT, [1, mw]
+            )
+            model.graph.input.append(global_in)
+
+        if i == len(layer_spec) - 1:
+            global_out = helper.make_tensor_value_info(
+                current_out_name, TensorProto.FLOAT, [1, mh]
+            )
+            model.graph.output.append(global_out)
+
+        # there are two ways to implement bipolar weights and inputs for
+        # StreamingFC:
+        # - specify their datatypes as such
+        # - specify their datatypes as BINARY as use binaryXnorMode
+        if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
+            # we'll internally convert weights/inputs to binary and specify the
+            # datatypes as such, and also set the binaryXnorMode attribute to 1
+            export_wdt = DataType.BINARY
+            export_idt = DataType.BINARY
+            binary_xnor_mode = 1
+        else:
+            export_wdt = wdt
+            export_idt = idt
+            binary_xnor_mode = 0
+
+        if T is not None:
+            no_act = 0
+            node_inp_list = [current_in_name, current_W_name, current_T_name]
+            if odt == DataType.BIPOLAR:
+                actval = 0
+            else:
+                actval = odt.min()
+        else:
+            # no thresholds
+            node_inp_list = [current_in_name, current_W_name]
+            actval = 0
+            no_act = 1
+        FCLayer_node = helper.make_node(
+            "StreamingFCLayer_Batch",
+            node_inp_list,
+            [current_out_name],
+            domain="finn.custom_op.fpgadataflow",
+            backend="fpgadataflow",
+            MW=mw,
+            MH=mh,
+            SIMD=simd,
+            PE=pe,
+            inputDataType=export_idt.name,
+            weightDataType=export_wdt.name,
+            outputDataType=odt.name,
+            ActVal=actval,
+            binaryXnorMode=binary_xnor_mode,
+            noActivation=no_act,
+        )
+
+        model.graph.node.append(FCLayer_node)
+        model.set_tensor_datatype(current_in_name, idt)
+        model.set_tensor_datatype(current_out_name, odt)
+        model.set_tensor_datatype(current_W_name, wdt)
+        if binary_xnor_mode:
+            # convert bipolar to binary
+            model.set_initializer(current_W_name, (W + 1) / 2)
+        else:
+            model.set_initializer(current_W_name, W)
+        if T is not None:
+            model.set_tensor_datatype(current_T_name, tdt)
+            model.set_initializer(current_T_name, T)
+        i += 1
+
+    return model
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index 287cb83752f8b1f935373dd2492fc9859cadd354..cf3e064804216e192909eae75f01880554f03d9f 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -55,7 +55,6 @@ def test_code_gen_trafo():
         backend="fpgadataflow",
         code_gen_dir="",
         executable_path="",
-        resType="ap_resource_lut()",
         MW=mw,
         MH=mh,
         SIMD=simd,
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index 811b2741c83a1354cfe9a44384e1c2fbbe3f4e3b..a12c69285b7b335f075d8ffd7ba27e039ebc6f8c 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -57,7 +57,6 @@ def test_compilation_trafo():
         backend="fpgadataflow",
         code_gen_dir="",
         executable_path="",
-        resType="ap_resource_lut()",
         MW=mw,
         MH=mh,
         SIMD=simd,
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 436d67231266faa4699ef2b10916fec13e875f2c..00f1ba5d59288b1a463fadbd684ff872269d6970 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -90,7 +90,6 @@ def make_single_fclayer_modelwrapper(W, pe, simd, wdt, idt, odt, T=None, tdt=Non
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        resType="ap_resource_lut()",
         MW=mw,
         MH=mh,
         SIMD=simd,
diff --git a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
index a1ca0365669111e60b7302d86d03d3d6469af783..306844c7ef3828d8483d3b0006491864f1525e21 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ipstitch.py
@@ -90,7 +90,6 @@ def create_one_fc_model(mem_mode="const"):
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        resType="ap_resource_lut()",
         MW=m,
         MH=m,
         SIMD=simd,
@@ -145,7 +144,6 @@ def create_two_fc_model(mem_mode="decoupled"):
         ["mid"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        resType="ap_resource_lut()",
         MW=m,
         MH=m,
         SIMD=simd,
@@ -165,7 +163,6 @@ def create_two_fc_model(mem_mode="decoupled"):
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        resType="ap_resource_lut()",
         MW=m,
         MH=m,
         SIMD=simd,
diff --git a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
index 61b83f12758593cb8832f412de5c3aaf93053fd8..06ebd90000e7466b2781d3284c5a0a0e56733dea 100644
--- a/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
+++ b/tests/fpgadataflow/test_fpgadataflow_res_estimate.py
@@ -28,7 +28,10 @@
 
 from onnx import TensorProto, helper
 
-from finn.analysis.fpgadataflow.res_estimation import res_estimation
+from finn.analysis.fpgadataflow.res_estimation import (
+    res_estimation,
+    res_estimation_complete,
+)
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.general import GiveUniqueNodeNames
@@ -53,7 +56,7 @@ def test_res_estimate():
     pe = 1
     idt = DataType.INT2
     wdt = DataType.INT2
-    odt = DataType.INT32
+    odt = DataType.INT2
     actval = odt.min()
 
     inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, mw])
@@ -66,7 +69,6 @@ def test_res_estimate():
         ["outp"],
         domain="finn.custom_op.fpgadataflow",
         backend="fpgadataflow",
-        resType="ap_resource_lut()",
         MW=mw,
         MH=mh,
         SIMD=simd,
@@ -93,13 +95,28 @@ def test_res_estimate():
     prod_resource_estimation = model.analysis(res_estimation)
     expect_resource_estimation = {
         "StreamingFCLayer_Batch_0": {
-            "BRAM_18K": 1,
-            "BRAM_efficiency": 0.001736111111111111,
-            "LUT": 304.4,
+            "BRAM_18K": 0,
+            "BRAM_efficiency": 1,
+            "LUT": 357,
+            "DSP": 0,
+            "URAM": 0,
         }
     }
 
     assert check_two_dict_for_equality(
         prod_resource_estimation, expect_resource_estimation
     ), """The produced output of
-    the resource estimation analysis pass is not equal to the expected one"""
+    the res_estimation analysis pass is not equal to the expected one"""
+
+    prod_resource_estimation = model.analysis(res_estimation_complete)
+    expect_resource_estimation = {
+        "StreamingFCLayer_Batch_0": [
+            {"BRAM_18K": 0, "BRAM_efficiency": 1, "LUT": 352, "DSP": 1, "URAM": 0},
+            {"BRAM_18K": 0, "BRAM_efficiency": 1, "LUT": 357, "DSP": 0, "URAM": 0},
+        ]
+    }
+
+    assert check_two_dict_for_equality(
+        prod_resource_estimation, expect_resource_estimation
+    ), """The produced output of
+    the res_estimation_complete analysis pass is not equal to the expected one"""
diff --git a/tests/util/test_create.py b/tests/util/test_create.py
new file mode 100644
index 0000000000000000000000000000000000000000..42a288b74ecda9746296519b1b86563c75b2752e
--- /dev/null
+++ b/tests/util/test_create.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2020 Xilinx, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import finn.util.create as create
+from finn.core.datatype import DataType
+
+
+@pytest.mark.parametrize("bitwidth", [DataType.BIPOLAR, DataType.INT2, DataType.INT4])
+def test_hls_random_mlp_maker(bitwidth):
+    w = bitwidth
+    a = bitwidth
+    layer_spec = [
+        {
+            "mw": 185,
+            "mh": 100,
+            "simd": 185,
+            "pe": 100,
+            "idt": DataType.BIPOLAR,
+            "wdt": w,
+            "act": a,
+        },
+        {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a},
+        {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a},
+        {"mw": 100, "mh": 100, "simd": 100, "pe": 100, "idt": a, "wdt": w, "act": a},
+        {
+            "mw": 100,
+            "mh": 1,
+            "simd": 100,
+            "pe": 1,
+            "idt": a,
+            "wdt": w,
+            "act": DataType.BIPOLAR,
+        },
+    ]
+
+    ret = create.hls_random_mlp_maker(layer_spec)
+    assert len(ret.graph.node) == 5
+    # ret.save("mlp-%s.onnx" % str(bitwidth))