diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
index c7db5b1d9d22ea89740f4c82633c96746a6fa5ee..7b4ca37cd78c6299fa824ecfc16d79ae013bab37 100644
--- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
@@ -30,7 +30,7 @@ import os
 import xml.etree.ElementTree as ET
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def hls_synth_res_estimation(model):
@@ -40,36 +40,30 @@ def hls_synth_res_estimation(model):
 
     res_dict = {}
     for node in model.graph.node:
-        if node.domain == "finn":
-            backend_attribute = util.get_by_name(node.attribute, "backend")
-            if backend_attribute is None:
-                continue
-            backend_value = backend_attribute.s.decode("UTF-8")
-            if backend_value == "fpgadataflow":
-                op_type = node.op_type
-                inst = registry.custom_op[op_type](node)
-                code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
-                if code_gen_dir == "":
-                    raise Exception(
-                        """Please run "CodeGen_ipgen" transformation and
-                            "HLSSynth_IPGen" first to generate the report files"""
-                    )
+        if is_fpgadataflow_node(node) is True:
+            op_type = node.op_type
+            inst = registry.custom_op[op_type](node)
+            code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
+            if code_gen_dir == "":
+                raise Exception(
+                    """Please run "CodeGen_ipgen" transformation and
+                        "HLSSynth_IPGen" first to generate the report files"""
+                )
+            else:
+                xmlfile = "{}/project_{}/sol1/syn/report/{}_csynth.xml".format(
+                    code_gen_dir, node.name, node.name
+                )
+
+                if os.path.isfile(xmlfile):
+                    res_dict[node.name] = dict()
+                    tree = ET.parse(xmlfile)
+                    root = tree.getroot()
+                    for item in root.findall("AreaEstimates/Resources"):
+                        for child in item:
+                            res_dict[node.name][child.tag] = child.text
                 else:
-                    xmlfile = "{}/project_{}/sol1/syn/report/{}_csynth.xml".format(
-                        code_gen_dir, node.name, node.name
+                    raise Exception(
+                        """Please run "HLSSynth_IPGen" first
+                            to generate the report files"""
                     )
-
-                    if os.path.isfile(xmlfile):
-                        res_dict[node.name] = dict()
-                        tree = ET.parse(xmlfile)
-                        root = tree.getroot()
-                        for item in root.findall("AreaEstimates/Resources"):
-                            for child in item:
-                                res_dict[node.name][child.tag] = child.text
-                    else:
-                        raise Exception(
-                            """Please run "HLSSynth_IPGen" first
-                                to generate the report files"""
-                        )
-
     return res_dict
diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py
index 3585868906fb2c66aef045f49f0da919f933d012..c190059eceb0cc111477c84f843f4a9f9bf2f393 100644
--- a/src/finn/analysis/fpgadataflow/res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/res_estimation.py
@@ -27,7 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def res_estimation(model):
@@ -37,14 +37,9 @@ def res_estimation(model):
 
     res_dict = {}
     for node in model.graph.node:
-        if node.domain == "finn":
-            backend_attribute = util.get_by_name(node.attribute, "backend")
-            if backend_attribute is None:
-                continue
-            backend_value = backend_attribute.s.decode("UTF-8")
-            if backend_value == "fpgadataflow":
-                op_type = node.op_type
-                inst = registry.custom_op[op_type](node)
-                res_dict[node.name] = inst.node_res_estimation()
+        if is_fpgadataflow_node(node) is True:
+            op_type = node.op_type
+            inst = registry.custom_op[op_type](node)
+            res_dict[node.name] = inst.node_res_estimation()
 
     return res_dict
diff --git a/src/finn/core/modelwrapper.py b/src/finn/core/modelwrapper.py
index 3ddcaa03c3d62daaf1ffd9f5ae6c3857460994fc..e99a6ef4cd40d6323d77354d3c9b4be341d7649c 100644
--- a/src/finn/core/modelwrapper.py
+++ b/src/finn/core/modelwrapper.py
@@ -253,14 +253,12 @@ class ModelWrapper:
             return None
 
     def find_producer(self, tensor_name):
-        """Finds and returns the node that produces the tensor with given name.
-        Currently only works for linear graphs."""
-        all_outputs = [x.output[0] for x in self._model_proto.graph.node]
-        try:
-            producer_ind = all_outputs.index(tensor_name)
-            return self._model_proto.graph.node[producer_ind]
-        except ValueError:
-            return None
+        """Finds and returns the node that produces the tensor with given name."""
+        ret = None
+        for x in self._model_proto.graph.node:
+            if tensor_name in x.output:
+                ret = x
+        return ret
 
     def find_upstream(self, tensor_name, finder_fxn):
         """Follow the producer chain upstream, calling finder_fxn on each upstream
diff --git a/src/finn/core/onnx_exec.py b/src/finn/core/onnx_exec.py
index 0f47a9104e3d2ef3ee06ef908e302344d78e0b17..172ba25b223fd087df134add460a42d0a9935e0e 100644
--- a/src/finn/core/onnx_exec.py
+++ b/src/finn/core/onnx_exec.py
@@ -61,6 +61,10 @@ def execute_node(node, context, graph):
             # onnxruntime unfortunately does not implement run_node as defined by ONNX,
             # it can only execute entire models -- so we create a model which solely
             # consists of our current node.
+            # note: ensure that the same ValueInfo does not appear both in
+            # graph.value_info as well as graph.output or graph.input
+            # nodes with multiple outputs that are a mix of value_info and
+            # input/outputs may get them reordered below
             node_inputs = list(filter(lambda x: x.name in node.input, graph.input))
             node_inputs += list(
                 filter(lambda x: x.name in node.input, graph.value_info)
@@ -84,17 +88,25 @@ def execute_node(node, context, graph):
             output_list = sess.run(None, input_dict)
 
             for output_ind in range(len(node.output)):
+                # get the name of the target buffer from node.output
                 outp = node.output[output_ind]
-                if output_list[output_ind].shape != context[outp].shape:
+
+                # retrieve the index of that name in node_outputs
+                for i in range(len(node_outputs)):
+                    if outp == node_outputs[i].name:
+                        list_ind = i
+
+                # use that index to index output_list
+                if output_list[list_ind].shape != context[outp].shape:
                     raise Exception(
                         """Output shapes disagree after node execution:
                         found %s vs expected %s"""
                         % (
-                            str(output_list[output_ind].shape.shape),
+                            str(output_list[list_ind].shape.shape),
                             str(context[outp].shape),
                         )
                     )
-                context[outp] = output_list[output_ind]
+                context[outp] = output_list[list_ind]
 
 
 def execute_onnx(model, input_dict, return_full_exec_context=False):
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 6d63f3b0888c2b17e9bf5c766ce382649fa6f2be..eab3decc696cb86622bbdd8f22f015515ea936d5 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -97,7 +97,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             # block -- use BRAM
             # distributed -- use LUTRAM
             # see also https://www.xilinx.com/support/answers/38070.html
-            "decoupled_ram_style": ("s", False, "auto"),
+            "ram_style": ("s", False, "auto"),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
@@ -989,9 +989,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             self.code_gen_dict["$MEM_DEPTH$"] = [
                 str(roundup_to_integer_multiple(self.calc_wmem(), 1024))
             ]
-            self.code_gen_dict["$RAM_STYLE$"] = [
-                self.get_nodeattr("decoupled_ram_style")
-            ]
+            self.code_gen_dict["$RAM_STYLE$"] = [self.get_nodeattr("ram_style")]
 
             template = self.decoupled_wrapper
 
diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py
index e1bf53f7ef53c986fffe3dcc507e6886660eb611..a31cbfa7dd30eff37ceb2d7bf3c162093a5a3a1c 100644
--- a/src/finn/transformation/fpgadataflow/cleanup.py
+++ b/src/finn/transformation/fpgadataflow/cleanup.py
@@ -30,7 +30,7 @@ import os
 import shutil
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import Transformation
 
 
@@ -53,36 +53,33 @@ class CleanUp(Transformation):
         model.set_metadata_prop("vivado_stitch_proj", "")
         for node in model.graph.node:
             op_type = node.op_type
-            if node.domain == "finn":
-                backend_attribute = util.get_by_name(node.attribute, "backend")
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    try:
-                        # lookup op_type in registry of CustomOps
-                        inst = registry.custom_op[op_type](node)
-                        # delete code_gen_dir from npysim
-                        code_gen_dir = inst.get_nodeattr("code_gen_dir_npysim")
-                        if os.path.isdir(code_gen_dir):
-                            shutil.rmtree(code_gen_dir)
-                        inst.set_nodeattr("code_gen_dir_npysim", "")
-                        inst.set_nodeattr("executable_path", "")
-                        # delete code_gen_dir from ipgen and project folder
-                        code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
-                        ipgen_path = inst.get_nodeattr("ipgen_path")
-                        if os.path.isdir(code_gen_dir):
-                            shutil.rmtree(code_gen_dir)
-                        if os.path.isdir(ipgen_path):
-                            shutil.rmtree(ipgen_path)
-                        inst.set_nodeattr("code_gen_dir_ipgen", "")
-                        inst.set_nodeattr("ipgen_path", "")
-                        # delete Java HotSpot Performance data log
-                        for d_name in os.listdir("/tmp/"):
-                            if "hsperfdata" in d_name:
-                                shutil.rmtree("/tmp/" + str(d_name))
+            if is_fpgadataflow_node(node) is True:
+                try:
+                    # lookup op_type in registry of CustomOps
+                    inst = registry.custom_op[op_type](node)
+                    # delete code_gen_dir from npysim
+                    code_gen_dir = inst.get_nodeattr("code_gen_dir_npysim")
+                    if os.path.isdir(code_gen_dir):
+                        shutil.rmtree(code_gen_dir)
+                    inst.set_nodeattr("code_gen_dir_npysim", "")
+                    inst.set_nodeattr("executable_path", "")
+                    # delete code_gen_dir from ipgen and project folder
+                    code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
+                    ipgen_path = inst.get_nodeattr("ipgen_path")
+                    if os.path.isdir(code_gen_dir):
+                        shutil.rmtree(code_gen_dir)
+                    if os.path.isdir(ipgen_path):
+                        shutil.rmtree(ipgen_path)
+                    inst.set_nodeattr("code_gen_dir_ipgen", "")
+                    inst.set_nodeattr("ipgen_path", "")
+                    # delete Java HotSpot Performance data log
+                    for d_name in os.listdir("/tmp/"):
+                        if "hsperfdata" in d_name:
+                            shutil.rmtree("/tmp/" + str(d_name))
 
-                    except KeyError:
-                        # exception if op_type is not supported
-                        raise Exception(
-                            "Custom op_type %s is currently not supported." % op_type
-                        )
+                except KeyError:
+                    # exception if op_type is not supported
+                    raise Exception(
+                        "Custom op_type %s is currently not supported." % op_type
+                    )
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/codegen_ipgen.py b/src/finn/transformation/fpgadataflow/codegen_ipgen.py
index 45db9db04f17325cafe04aad1016580054daf554..fa7725ae1fa03cc204aa58969d6fbc6cf71e7d97 100644
--- a/src/finn/transformation/fpgadataflow/codegen_ipgen.py
+++ b/src/finn/transformation/fpgadataflow/codegen_ipgen.py
@@ -30,7 +30,8 @@ import os
 
 import finn.custom_op.registry as registry
 from finn.transformation import Transformation
-from finn.util.basic import get_by_name, make_build_dir
+from finn.util.basic import make_build_dir
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def _codegen_single_node(node, model, fpgapart, clk):
@@ -77,11 +78,6 @@ class CodeGen_ipgen(Transformation):
 
     def apply(self, model):
         for node in model.graph.node:
-            if node.domain == "finn":
-                backend_attribute = get_by_name(node.attribute, "backend")
-                if backend_attribute is None:
-                    continue
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    _codegen_single_node(node, model, self.fpgapart, self.clk)
+            if is_fpgadataflow_node(node) is True:
+                _codegen_single_node(node, model, self.fpgapart, self.clk)
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/codegen_npysim.py b/src/finn/transformation/fpgadataflow/codegen_npysim.py
index fe758ec28d67bd2f46edc864574d2edddfe6e3a3..02200e76db3f9c8207605bb93c4b07f0ebc76cab 100644
--- a/src/finn/transformation/fpgadataflow/codegen_npysim.py
+++ b/src/finn/transformation/fpgadataflow/codegen_npysim.py
@@ -30,7 +30,8 @@ import os
 
 import finn.custom_op.registry as registry
 from finn.transformation import Transformation
-from finn.util.basic import get_by_name, make_build_dir
+from finn.util.basic import make_build_dir
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def _codegen_single_node(node, model):
@@ -67,11 +68,6 @@ class CodeGen_npysim(Transformation):
 
     def apply(self, model):
         for node in model.graph.node:
-            if node.domain == "finn":
-                backend_attribute = get_by_name(node.attribute, "backend")
-                if backend_attribute is None:
-                    continue
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    _codegen_single_node(node, model)
+            if is_fpgadataflow_node(node) is True:
+                _codegen_single_node(node, model)
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/compile.py b/src/finn/transformation/fpgadataflow/compile.py
index a76ab683209bbb1219517075ff29a75540dc7bfc..40c7da8f77efeaa655459402699a401b642b776c 100644
--- a/src/finn/transformation/fpgadataflow/compile.py
+++ b/src/finn/transformation/fpgadataflow/compile.py
@@ -27,7 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import NodeLocalTransformation
 
 
@@ -49,31 +49,27 @@ class Compile(NodeLocalTransformation):
 
     def applyNodeLocal(self, node):
         op_type = node.op_type
-        if node.domain == "finn":
-            backend_attribute = util.get_by_name(node.attribute, "backend")
-            if backend_attribute is not None:
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    try:
-                        # lookup op_type in registry of CustomOps
-                        inst = registry.custom_op[op_type](node)
-                        # ensure that code is generated
-                        assert (
-                            inst.get_nodeattr("code_gen_dir_npysim") != ""
-                        ), """Node
-                        attribute "code_gen_dir_npysim" is not set. Please run
-                        Transformation CodeGen_npysim first."""
-                        # call the compilation function for this node
-                        inst.compile_singlenode_code()
-                        # ensure that executable path is now set
-                        assert (
-                            inst.get_nodeattr("executable_path") != ""
-                        ), """Transformation
-                        compile was not successful, there is no path to executables set
-                        in node attribute "executable_path"."""
-                    except KeyError:
-                        # exception if op_type is not supported
-                        raise Exception(
-                            "Custom op_type %s is currently not supported." % op_type
-                        )
+        if is_fpgadataflow_node(node) is True:
+            try:
+                # lookup op_type in registry of CustomOps
+                inst = registry.custom_op[op_type](node)
+                # ensure that code is generated
+                assert (
+                    inst.get_nodeattr("code_gen_dir_npysim") != ""
+                ), """Node
+                attribute "code_gen_dir_npysim" is not set. Please run
+                Transformation CodeGen_npysim first."""
+                # call the compilation function for this node
+                inst.compile_singlenode_code()
+                # ensure that executable path is now set
+                assert (
+                    inst.get_nodeattr("executable_path") != ""
+                ), """Transformation
+                compile was not successful, there is no path to executables set
+                in node attribute "executable_path"."""
+            except KeyError:
+                # exception if op_type is not supported
+                raise Exception(
+                    "Custom op_type %s is currently not supported." % op_type
+                )
         return (node, False)
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py b/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
index 2a40b3c2302a432937d45e807515e795f02e0365..2af623818fe0e830883ef5065e5e7c9c7364ef1e 100644
--- a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
@@ -27,7 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import NodeLocalTransformation
 
 
@@ -49,33 +49,27 @@ class HLSSynth_IPGen(NodeLocalTransformation):
 
     def applyNodeLocal(self, node):
         op_type = node.op_type
-        if node.domain == "finn":
-            backend_attribute = util.get_by_name(node.attribute, "backend")
-            if backend_attribute is None:
-                return (node, False)
-            backend_value = backend_attribute.s.decode("UTF-8")
-            if backend_value == "fpgadataflow":
-                try:
-                    # lookup op_type in registry of CustomOps
-                    inst = registry.custom_op[op_type](node)
-                    # ensure that code is generated
-                    assert (
-                        inst.get_nodeattr("code_gen_dir_ipgen") != ""
-                    ), """Node
-                    attribute "code_gen_dir_ipgen" is empty. Please run
-                    transformation CodeGen_ipgen first."""
-                    # call the compilation function for this node
-                    inst.ipgen_singlenode_code()
-                    # ensure that executable path is now set
-                    assert (
-                        inst.get_nodeattr("ipgen_path") != ""
-                    ), """Transformation
-                    HLSSynth_IPGen was not successful. Node attribute "ipgen_path"
-                    is empty."""
-                except KeyError:
-                    # exception if op_type is not supported
-                    raise Exception(
-                        "Custom op_type %s is currently not supported." % op_type
-                    )
-
+        if is_fpgadataflow_node(node) is True:
+            try:
+                # lookup op_type in registry of CustomOps
+                inst = registry.custom_op[op_type](node)
+                # ensure that code is generated
+                assert (
+                    inst.get_nodeattr("code_gen_dir_ipgen") != ""
+                ), """Node
+                attribute "code_gen_dir_ipgen" is empty. Please run
+                transformation CodeGen_ipgen first."""
+                # call the compilation function for this node
+                inst.ipgen_singlenode_code()
+                # ensure that executable path is now set
+                assert (
+                    inst.get_nodeattr("ipgen_path") != ""
+                ), """Transformation
+                HLSSynth_IPGen was not successful. Node attribute "ipgen_path"
+                is empty."""
+            except KeyError:
+                # exception if op_type is not supported
+                raise Exception(
+                    "Custom op_type %s is currently not supported." % op_type
+                )
         return (node, False)
diff --git a/src/finn/transformation/fpgadataflow/insert_dwc.py b/src/finn/transformation/fpgadataflow/insert_dwc.py
index 7121434dedef428cbd20c324d39193469e9b1a04..3fe60292e8f54a8cdf394b5e09f8a3d2bca7605c 100644
--- a/src/finn/transformation/fpgadataflow/insert_dwc.py
+++ b/src/finn/transformation/fpgadataflow/insert_dwc.py
@@ -3,7 +3,7 @@ from onnx import helper as oh
 
 from finn.custom_op.registry import getCustomOp
 from finn.transformation import Transformation
-from finn.util.basic import get_by_name
+from finn.util.fpgadataflow import is_fpgadataflow_node
 
 
 def _is_dwc_node(node):
@@ -13,21 +13,9 @@ def _is_dwc_node(node):
         return False
 
 
-def _is_fpgadataflow_node(node):
-    if node.domain == "finn":
-        n_backend = get_by_name(node.attribute, "backend")
-        if n_backend is None:
-            return False
-        backend_value = n_backend.s.decode("UTF-8")
-        if backend_value == "fpgadataflow":
-            return True
-    else:
-        return False
-
-
 def _suitable_node(node):
     if node is not None:
-        if _is_fpgadataflow_node(node) is True:
+        if is_fpgadataflow_node(node) is True:
             if _is_dwc_node(node) is False:
                 return True
             else:
diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
index 4474831381425268d2a59e7de835bba31c55a733..baac3e7bdf5ac936a963ac8346f01638657b042c 100644
--- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
@@ -27,7 +27,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
+
 from finn.transformation import NodeLocalTransformation
 
 try:
@@ -55,22 +56,18 @@ class PrepareRTLSim(NodeLocalTransformation):
 
     def applyNodeLocal(self, node):
         op_type = node.op_type
-        if node.domain == "finn":
-            backend_attribute = util.get_by_name(node.attribute, "backend")
-            if backend_attribute is not None:
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    try:
-                        # lookup op_type in registry of CustomOps
-                        inst = registry.custom_op[op_type](node)
-                        inst.prepare_rtlsim()
-                        # ensure that executable path is now set
-                        assert (
-                            inst.get_nodeattr("rtlsim_so") != ""
-                        ), "Failed to prepare RTLSim, no rtlsim_so attribute found."
-                    except KeyError:
-                        # exception if op_type is not supported
-                        raise Exception(
-                            "Custom op_type %s is currently not supported." % op_type
-                        )
+        if is_fpgadataflow_node(node) is True:
+            try:
+                # lookup op_type in registry of CustomOps
+                inst = registry.custom_op[op_type](node)
+                inst.prepare_rtlsim()
+                # ensure that executable path is now set
+                assert (
+                    inst.get_nodeattr("rtlsim_so") != ""
+                ), "Failed to prepare RTLSim, no rtlsim_so attribute found."
+            except KeyError:
+                # exception if op_type is not supported
+                raise Exception(
+                    "Custom op_type %s is currently not supported." % op_type
+                )
         return (node, False)
diff --git a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
index dce62c20b99097feee7208cbf57aa8921ddb3566..e63ae4e0203188d9664f432f75e36994e8a71ac5 100644
--- a/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
+++ b/src/finn/transformation/fpgadataflow/replace_verilog_relpaths.py
@@ -29,7 +29,7 @@
 import os
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import Transformation
 
 
@@ -42,32 +42,27 @@ class ReplaceVerilogRelPaths(Transformation):
     def apply(self, model):
         for node in model.graph.node:
             op_type = node.op_type
-            if node.domain == "finn":
-                backend_attribute = util.get_by_name(node.attribute, "backend")
-                if backend_attribute is None:
-                    continue
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    try:
-                        # lookup op_type in registry of CustomOps
-                        inst = registry.custom_op[op_type](node)
-                        # find the IP gen dir
-                        ipgen_path = inst.get_nodeattr("ipgen_path")
-                        if ipgen_path is not None and os.path.isdir(ipgen_path):
-                            for dname, dirs, files in os.walk(ipgen_path):
-                                for fname in files:
-                                    if fname.endswith(".v"):
-                                        fpath = os.path.join(dname, fname)
-                                        with open(fpath, "r") as f:
-                                            s = f.read()
-                                        old = '$readmemh(".'
-                                        new = '$readmemh("%s' % dname
-                                        s = s.replace(old, new)
-                                        old = '"./'
-                                        new = '"%s/' % dname
-                                        s = s.replace(old, new)
-                                        with open(fpath, "w") as f:
-                                            f.write(s)
-                    except KeyError:
-                        pass
+            if is_fpgadataflow_node(node) is True:
+                try:
+                    # lookup op_type in registry of CustomOps
+                    inst = registry.custom_op[op_type](node)
+                    # find the IP gen dir
+                    ipgen_path = inst.get_nodeattr("ipgen_path")
+                    if ipgen_path is not None and os.path.isdir(ipgen_path):
+                        for dname, dirs, files in os.walk(ipgen_path):
+                            for fname in files:
+                                if fname.endswith(".v"):
+                                    fpath = os.path.join(dname, fname)
+                                    with open(fpath, "r") as f:
+                                        s = f.read()
+                                    old = '$readmemh(".'
+                                    new = '$readmemh("%s' % dname
+                                    s = s.replace(old, new)
+                                    old = '"./'
+                                    new = '"%s/' % dname
+                                    s = s.replace(old, new)
+                                    with open(fpath, "w") as f:
+                                        f.write(s)
+                except KeyError:
+                    pass
         return (model, False)
diff --git a/src/finn/transformation/fpgadataflow/set_exec_mode.py b/src/finn/transformation/fpgadataflow/set_exec_mode.py
index 1f9c4c42189950e456da2dda77dee98fda49d522..83dda7ceccfd26fa1c43ab517ade2e19ccae4a61 100644
--- a/src/finn/transformation/fpgadataflow/set_exec_mode.py
+++ b/src/finn/transformation/fpgadataflow/set_exec_mode.py
@@ -27,7 +27,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import finn.custom_op.registry as registry
-import finn.util.basic as util
+from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import Transformation
 
 
@@ -42,25 +42,20 @@ class SetExecMode(Transformation):
     def apply(self, model):
         for node in model.graph.node:
             op_type = node.op_type
-            if node.domain == "finn":
-                backend_attribute = util.get_by_name(node.attribute, "backend")
-                if backend_attribute is None:
-                    continue
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    try:
-                        # lookup op_type in registry of CustomOps
-                        inst = registry.custom_op[op_type](node)
-                        # set sim_mode accordingly to argument mode
-                        inst.set_nodeattr("exec_mode", self.mode)
-                        # ensure that sim_mode is now set
-                        assert (
-                            inst.get_nodeattr("exec_mode") != ""
-                        ), """Transformation
+            if is_fpgadataflow_node(node) is True:
+                try:
+                    # lookup op_type in registry of CustomOps
+                    inst = registry.custom_op[op_type](node)
+                    # set sim_mode accordingly to argument mode
+                    inst.set_nodeattr("exec_mode", self.mode)
+                    # ensure that sim_mode is now set
+                    assert (
+                        inst.get_nodeattr("exec_mode") != ""
+                    ), """Transformation
                         was not successful. Node attribute "exec_mode" is not set"""
-                    except KeyError:
-                        # exception if op_type is not supported
-                        raise Exception(
-                            "Custom op_type %s is currently not supported." % op_type
-                        )
+                except KeyError:
+                    # exception if op_type is not supported
+                    raise Exception(
+                        "Custom op_type %s is currently not supported." % op_type
+                    )
         return (model, False)
diff --git a/src/finn/transformation/insert_topk.py b/src/finn/transformation/insert_topk.py
new file mode 100644
index 0000000000000000000000000000000000000000..213d2cedf92c0276e33fcf2b50e6966aeee8c847
--- /dev/null
+++ b/src/finn/transformation/insert_topk.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+
+from onnx import TensorProto
+from onnx import helper as oh
+
+from finn.transformation import Transformation
+from finn.core.datatype import DataType
+
+
+class InsertTopK(Transformation):
+    """Add TopK node at the network output and replace the graph output with
+    the TopK indices."""
+
+    def __init__(self, k=5, axis=-1, largest=1, sorted=1):
+        super().__init__()
+        self.k = k
+        self.axis = axis
+        self.largest = largest
+        self.sorted = sorted
+
+    def apply(self, model):
+        # get name of output tensor
+        graph_out_name = model.graph.output[0].name
+        # find final node
+        final_node = model.find_producer(graph_out_name)
+        # if a top-select op is already present, do nothing
+        if final_node.op_type == "TopK":
+            return (model, False)
+        else:
+            out_shape = model.get_tensor_shape(graph_out_name)
+            out_dtype = model.get_tensor_datatype(graph_out_name)
+            # adjust shape
+            out_shape[self.axis] = self.k
+            # make new buffer
+            k_tensor = np.array([self.k]).astype(np.int64)
+            k_value = oh.make_tensor_value_info(
+                model.make_new_valueinfo_name(), TensorProto.INT64, [1]
+            )
+            topk_values = oh.make_tensor_value_info(
+                model.make_new_valueinfo_name(), TensorProto.FLOAT, out_shape
+            )
+            topk_indices = oh.make_tensor_value_info(
+                model.make_new_valueinfo_name(), TensorProto.INT64, out_shape
+            )
+            model.graph.value_info.append(k_value)
+            model.set_tensor_datatype(k_value.name, out_dtype)  # TODO set to int64
+            model.graph.value_info.append(topk_values)
+            model.set_tensor_datatype(topk_values.name, out_dtype)
+            # create and append topk node
+            model.set_initializer(k_value.name, k_tensor)
+            topk_node = oh.make_node(
+                "TopK",
+                inputs=[graph_out_name, k_value.name],
+                outputs=[topk_values.name, topk_indices.name],
+                axis=self.axis,
+                largest=self.largest,
+                sorted=self.sorted,
+            )
+            model.graph.node.append(topk_node)
+            # replace the existing output definition with topk indices
+            model.graph.output.insert(0, topk_indices)
+            model.graph.output.pop(1)
+            # set quantization annotation for indices
+            # minimal output dtype for TopK indices dependens on num. classes
+            # assuming UINT32 is large enough for now (FINN has currently no
+            # DataType.INT64)
+            model.set_tensor_datatype(topk_indices.name, DataType.UINT32)
+            return (model, True)
diff --git a/tests/end2end/test_end2end_tfc_w1a1.py b/tests/end2end/test_end2end_tfc_w1a1.py
index 7db8bedd64619198aa60d289bf78451932289fd6..8a670fce2e7e6585c98efa9e4a6e27a660edf925 100644
--- a/tests/end2end/test_end2end_tfc_w1a1.py
+++ b/tests/end2end/test_end2end_tfc_w1a1.py
@@ -137,6 +137,7 @@ def test_end2end_tfc_w1a1_fold_and_tlastmarker():
     fc0w.set_nodeattr("SIMD", 16)
     fc0w.set_nodeattr("PE", 16)
     fc0w.set_nodeattr("outFIFODepth", 4)
+    fc0w.set_nodeattr("ram_style", "block")
     fc1w.set_nodeattr("SIMD", 8)
     fc1w.set_nodeattr("PE", 8)
     fc1w.set_nodeattr("outFIFODepth", 4)
@@ -146,6 +147,7 @@ def test_end2end_tfc_w1a1_fold_and_tlastmarker():
     fc3w.set_nodeattr("SIMD", 16)
     fc3w.set_nodeattr("PE", 10)
     fc3w.set_nodeattr("outFIFODepth", 50)
+    fc3w.set_nodeattr("ram_style", "distributed")
     model = model.transform(InsertDWC())
     model = model.transform(InsertTLastMarker())
     model = model.transform(GiveUniqueNodeNames())
diff --git a/tests/transformation/test_topk_insert.py b/tests/transformation/test_topk_insert.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac32c30edbbf466b2b441bcc92975a7d50f42bda
--- /dev/null
+++ b/tests/transformation/test_topk_insert.py
@@ -0,0 +1,58 @@
+import onnx
+from finn.util.test import get_test_model_trained
+import brevitas.onnx as bo
+import numpy as np
+import onnx.numpy_helper as nph
+import torch
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.insert_topk import InsertTopK
+
+import finn.core.onnx_exec as oxe
+from pkgutil import get_data
+
+import pytest
+
+export_onnx_path = "test_output_lfc.onnx"
+
+
+@pytest.mark.parametrize("k", [1, 5, 10])
+def test_topk_insert(k):
+    tfc = get_test_model_trained("TFC", 1, 1)
+    bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path)
+    model = ModelWrapper(export_onnx_path)
+
+    # do transformations (no topk)
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataTypes())
+
+    # verification: generate random input, run through net, streamline,
+    # run again, check that output is top-k
+    raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
+    input_tensor = onnx.load_tensor_from_string(raw_i)
+    input_brevitas = torch.from_numpy(nph.to_array(input_tensor)).float()
+    output_golden = tfc.forward(input_brevitas).detach().numpy()
+    output_golden_topk = np.flip(output_golden.flatten().argsort())[:k]
+    output_golden_topk = output_golden_topk.flatten()
+
+    input_dict = {"global_in": nph.to_array(input_tensor)}
+
+    # insert top-k
+    model = model.transform(InsertTopK(k))
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferShapes())
+
+    # verify output of top-k
+    output_dict_topk = oxe.execute_onnx(model, input_dict)
+    output_pysim_topk = output_dict_topk[list(output_dict_topk.keys())[0]]
+    output_pysim_topk = output_pysim_topk.astype(np.int).flatten()
+
+    assert np.array_equal(output_golden_topk, output_pysim_topk)