diff --git a/src/finn/core/data_layout.py b/src/finn/core/data_layout.py
new file mode 100644
index 0000000000000000000000000000000000000000..3971d221527d3862346c06cf415831c27e5cba8b
--- /dev/null
+++ b/src/finn/core/data_layout.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# predefined lists of strings to have a cannonical way of expresing data layout
+# annotations
+
+NHWC = ["N", "H", "W", "C"]
+NCHW = ["N", "C", "H", "W"]
+NC = ["N", "C"]
+UNKNOWN = []
diff --git a/src/finn/core/modelwrapper.py b/src/finn/core/modelwrapper.py
index dc5b36920a5639933463d682dc66fb8bc15b35f2..cdf99dc3bd8b698bec60d79ef6e34640ac3b740c 100644
--- a/src/finn/core/modelwrapper.py
+++ b/src/finn/core/modelwrapper.py
@@ -137,11 +137,16 @@ class ModelWrapper:
         qnt_annotations = graph.quantization_annotation
         ret = util.get_by_name(qnt_annotations, tensor_name, "tensor_name")
         if ret is not None:
-            ret = util.get_by_name(
+            ret_dt = util.get_by_name(
                 ret.quant_parameter_tensor_names, "finn_datatype", "key"
             )
-            if ret is not None:
-                ret.value = datatype.name
+            if ret_dt is not None:
+                ret_dt.value = datatype.name
+            else:
+                dt = onnx.StringStringEntryProto()
+                dt.key = "finn_datatype"
+                dt.value = datatype.name
+                ret.quant_parameter_tensor_names.append(dt)
         else:
             qa = onnx.TensorAnnotation()
             dt = onnx.StringStringEntryProto()
@@ -434,3 +439,58 @@ class ModelWrapper:
                 n_ind += 1
         except ValueError:
             return None
+
+    def get_tensor_layout(self, tensor_name):
+        """Returns the data layout annotation of tensor with given name.
+        The data layout is expressed as a list of strings with as many
+        elements as the number of dimensions in the tensor shape. Each
+        string annotates what is contained in that dimension. If there is no
+        data layout annotation, None will be returned.
+        Examples of data layout annotations:
+        ["N", "C"] is tensor[batch][channel]
+        ["N", "C", "H", "W"] is tensor[batch][channel][height][width]
+        ["N", "H", "W", "C"] is tensor[batch][height][width][channel]
+        """
+        graph = self._model_proto.graph
+        qnt_annotations = graph.quantization_annotation
+        ret = util.get_by_name(qnt_annotations, tensor_name, "tensor_name")
+        if ret is not None:
+            ret = util.get_by_name(
+                ret.quant_parameter_tensor_names, "tensor_layout", "key"
+            )
+            if ret is not None:
+                return eval(ret.value)
+        return None
+
+    def set_tensor_layout(self, tensor_name, data_layout):
+        """Sets the data layout annotation of tensor with given name. See
+        get_tensor_layout for examples."""
+        tensor_shape = self.get_tensor_shape(tensor_name)
+        assert type(data_layout) == list, "data_layout must be a list"
+        if tensor_shape is not None:
+            assert len(tensor_shape) == len(
+                data_layout
+            ), """Mismatch between number
+            of dimensions of tensor shape and data layout annotation."""
+        graph = self._model_proto.graph
+        qnt_annotations = graph.quantization_annotation
+        ret = util.get_by_name(qnt_annotations, tensor_name, "tensor_name")
+        if ret is not None:
+            ret_tl = util.get_by_name(
+                ret.quant_parameter_tensor_names, "tensor_layout", "key"
+            )
+            if ret_tl is not None:
+                ret_tl.value = str(data_layout)
+            else:
+                tl = onnx.StringStringEntryProto()
+                tl.key = "tensor_layout"
+                tl.value = str(data_layout)
+                ret.quant_parameter_tensor_names.append(tl)
+        else:
+            qa = onnx.TensorAnnotation()
+            dt = onnx.StringStringEntryProto()
+            dt.key = "tensor_layout"
+            dt.value = str(data_layout)
+            qa.tensor_name = tensor_name
+            qa.quant_parameter_tensor_names.append(dt)
+            qnt_annotations.append(qa)
diff --git a/src/finn/transformation/infer_data_layouts.py b/src/finn/transformation/infer_data_layouts.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ac75578ffb911cc44cfddc2b2119b55e6abf2dd
--- /dev/null
+++ b/src/finn/transformation/infer_data_layouts.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import finn.custom_op.registry as registry
+import finn.core.data_layout as DataLayout
+from finn.transformation import Transformation
+import warnings
+from finn.util.basic import get_by_name
+
+
+def _dims_to_layout(model, node, ndims):
+    if ndims == 2:
+        return DataLayout.NC
+    else:
+        if node.domain == "finn":
+            if node.op_type == "MultiThreshold":
+                mt_inst = registry.getCustomOp(node)
+                layout = mt_inst.get_nodeattr("data_layout")
+                if layout == "NHWC" and ndims == 4:
+                    return DataLayout.NHWC
+                elif layout == "NCHW" and ndims == 4:
+                    return DataLayout.NCHW
+                else:
+                    return DataLayout.UNKNOWN
+            else:
+                if ndims == 4:
+                    return DataLayout.NHWC
+                else:
+                    return DataLayout.UNKNOWN
+        else:
+            # propagate input layout to output
+            # TODO this won't work for concat, squeeze/unsqueeze/reshape...
+            return model.get_tensor_layout(node.input[0])
+
+
+def _infer_node_data_layout(model, node):
+    """Infer output data layout annotation(s) for a particular node.
+    Returns True if any changes were made."""
+    old_layouts = list(map(lambda x: model.get_tensor_layout(x), node.output))
+    if node.domain == "finn":
+        # try to guess based on number of output dims
+        for o in node.output:
+            ndims = len(model.get_tensor_shape(o))
+            new_layout = _dims_to_layout(model, node, ndims)
+            model.set_tensor_layout(o, new_layout)
+    else:
+        if node.op_type == "Transpose":
+            # grab input annotation and switch it around using perm
+            perm = get_by_name(node.attribute, "perm").ints
+            inp_layout = model.get_tensor_layout(node.input[0])
+            out_layout = [inp_layout[i] for i in perm]
+            model.set_tensor_layout(node.output[0], out_layout)
+        else:
+            # try to guess based on number of output dims
+            for o in node.output:
+                ndims = len(model.get_tensor_shape(o))
+                model.set_tensor_layout(o, _dims_to_layout(model, node, ndims))
+    # compare old and new output dtypes to see if anything changed
+    new_layouts = list(map(lambda x: model.get_tensor_layout(x), node.output))
+    graph_modified = new_layouts != old_layouts
+    return graph_modified
+
+
+class InferDataLayouts(Transformation):
+    """Try to infer data layout annotations info for all input/intermediate/output
+    tensors based on inputs and node type."""
+
+    def apply(self, model):
+        graph = model.graph
+        graph_modified = False
+        # first, make sure that the global input has an annotation
+        # this is really hard to do in general, so we do some bad guesswork
+        inp_name = graph.input[0].name
+        if model.get_tensor_layout(inp_name) is None:
+            inp_shape = model.get_tensor_shape(inp_name)
+            if len(inp_shape) == 4:
+                warnings.warn("Assuming 4D input is NCHW")
+                model.set_tensor_layout(inp_name, DataLayout.NCHW)
+                graph_modified = True
+            elif len(inp_shape) == 2:
+                graph_modified = True
+                warnings.warn("Assuming 2D input is NC")
+                model.set_tensor_layout(inp_name, DataLayout.NC)
+            else:
+                raise Exception(
+                    """Unknown number of dims for input, don't know
+                how to annotate"""
+                )
+        for node in graph.node:
+            graph_modified |= _infer_node_data_layout(model, node)
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index 96046602efb32a9262a4cf0bbb21a8367d719910..1886c785705161c3a13493de44dc3f3f86463f4f 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -34,8 +34,6 @@ from finn.transformation.infer_shapes import InferShapes
 from finn.core.onnx_exec import execute_node
 from finn.util.basic import get_by_name
 
-def is_scalar(x):
-    return np.prod(x.shape) == 1
 
 class MoveAddPastMul(Transformation):
     """Move add operations past multiply operations. The aim is to have them
@@ -273,12 +271,12 @@ class MoveScalarMulPastConv(Transformation):
         return (model, graph_modified)
 
 
-class MoveScalarLinearPastEltwiseAdd(Transformation):
-    """Move scalar linear operations (mul, add) past elementwise add operations where possible. Specifically,
-       matches and transforms the following patterns:
+class MoveLinearPastEltwiseAdd(Transformation):
+    """Move linear operations (mul, add) past elementwise add operations where possible.
+       Specifically,matches and transforms the following patterns:
        (x*C) + (y*C) -> (x + y) * C
        (x+A) + (y+B) -> (x + y) + (A + B)
-       where x and y are dynamic inputs, A, B, C are constants.
+       where x and y are dynamic inputs, A, B, C are constant tensors (in general).
     """
 
     def move_node(self, graph, n, prod0, prod1, node_ind):
@@ -305,7 +303,8 @@ class MoveScalarLinearPastEltwiseAdd(Transformation):
         graph = model.graph
         node_ind = 0
         graph_modified = False
-        for n in graph.node:
+        nodes = [n for n in graph.node]
+        for n in nodes:
             node_ind += 1
             if n.op_type == "Add":
                 # check for tensors on both inputs (eltwise add)
@@ -321,17 +320,16 @@ class MoveScalarLinearPastEltwiseAdd(Transformation):
                 # check for mul with same initializer on both inputs
                 prod0 = model.find_producer(in0)
                 prod1 = model.find_producer(in1)
-                if prod0 is None or prod1 is None:
+                # Also check case when both branches are empty and come
+                # from the same node: (prod0 == prod1)
+                # Other transform should handle that
+                if prod0 is None or prod1 is None or (prod0 == prod1):
                     continue
                 init0 = model.get_initializer(prod0.input[1])
                 init1 = model.get_initializer(prod1.input[1])
                 # if either initializer is None, skip
                 if init0 is None or init1 is None:
                     continue
-                # if either initializer is non-scalar, skip
-                # TODO relax this to 1D tensors?
-                if (not is_scalar(init0)) or (not is_scalar(init1)):
-                    continue
                 if prod0.op_type == "Mul" and prod1.op_type == "Mul":
                     if np.array_equal(init0, init1):
                         self.move_node(graph, n, prod0, prod1, node_ind)
diff --git a/tests/core/test_modelwrapper.py b/tests/core/test_modelwrapper.py
index 839710681640deca01aa40d3ab420016f0e48165..d1da6934a5db07aabe41a9ca40b5de497b6460a1 100644
--- a/tests/core/test_modelwrapper.py
+++ b/tests/core/test_modelwrapper.py
@@ -31,6 +31,7 @@ import onnx
 from collections import Counter
 import brevitas.onnx as bo
 import numpy as np
+import finn.core.data_layout as DataLayout
 
 from finn.core.modelwrapper import ModelWrapper
 from finn.util.test import get_test_model_trained
@@ -67,6 +68,11 @@ def test_modelwrapper():
     assert inp_cons.op_type == "MatMul"
     out_prod = model.find_producer(l0_inp_tensor_name)
     assert out_prod.op_type == "MultiThreshold"
+    inp_layout = model.get_tensor_layout(inp_name)
+    assert inp_layout is None
+    inp_layout = DataLayout.NCHW
+    model.set_tensor_layout(inp_name, inp_layout)
+    assert model.get_tensor_layout(inp_name) == inp_layout
     os.remove(export_onnx_path)
 
 
diff --git a/tests/transformation/test_infer_data_layouts.py b/tests/transformation/test_infer_data_layouts.py
new file mode 100644
index 0000000000000000000000000000000000000000..fccc7813da6f98c8af4ade7ae562c99b32247a8b
--- /dev/null
+++ b/tests/transformation/test_infer_data_layouts.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+import brevitas.onnx as bo
+import finn.transformation.streamline.absorb as absorb
+from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.fold_constants import FoldConstants
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.streamline import Streamline
+from finn.util.test import get_test_model_trained
+from finn.transformation.double_to_single_float import DoubleToSingleFloat
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+from finn.transformation.infer_data_layouts import InferDataLayouts
+import finn.core.data_layout as DataLayout
+
+export_onnx_path_cnv = "test_output_cnv.onnx"
+
+
+def test_infer_data_layouts():
+    cnv = get_test_model_trained("CNV", 1, 1)
+    bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv)
+    model = ModelWrapper(export_onnx_path_cnv)
+    model = model.transform(DoubleToSingleFloat())
+    model = model.transform(InferShapes())
+    model = model.transform(FoldConstants())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(Streamline())
+    model = model.transform(InferDataLayouts())
+
+    assert model.get_tensor_layout("global_in") == DataLayout.NCHW
+    assert model.get_tensor_layout("Conv_0_out0") == DataLayout.NCHW
+    assert model.get_tensor_layout("MaxPool_0_out0") == DataLayout.NCHW
+    assert model.get_tensor_layout("MultiThreshold_6_out0") == DataLayout.NCHW
+    assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC
+    assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NC
+    assert model.get_tensor_layout("global_out") == DataLayout.NC
+
+    model = model.transform(LowerConvsToMatMul())
+    model = model.transform(MakeMaxPoolNHWC())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataLayouts())
+
+    assert model.get_tensor_layout("global_in") == DataLayout.NCHW
+    assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC
+    assert model.get_tensor_layout("Im2Col_0_out0") == DataLayout.NHWC
+    # note: im2col output isn't really NHWC or any other common layout
+    # since the concept of channels changes with lowering... but it is
+    # conceptually close to NHWC since the innermost dim gets multiplied
+    assert model.get_tensor_layout("MatMul_0_out0") == DataLayout.NHWC
+    assert model.get_tensor_layout("Transpose_1_out0") == DataLayout.NCHW
+    assert model.get_tensor_layout("Transpose_2_out0") == DataLayout.NHWC
+    assert model.get_tensor_layout("MaxPoolNHWC_0_out0") == DataLayout.NHWC
+    assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC
+    assert model.get_tensor_layout("global_out") == DataLayout.NC
+
+    model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold())
+    model = model.transform(ConvertBipolarMatMulToXnorPopcount())
+    model = model.transform(Streamline())
+    model = model.transform(to_hls.InferBinaryStreamingFCLayer())
+    model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
+    model = model.transform(to_hls.InferConvInpGen())
+    model = model.transform(to_hls.InferStreamingMaxPool())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataLayouts())
+
+    assert model.get_tensor_layout("global_in") == DataLayout.NCHW
+    assert model.get_tensor_layout("Transpose_0_out0") == DataLayout.NHWC
+    # note: im2col output isn't really NHWC or any other common layout
+    # since the concept of channels changes with lowering... but it is
+    # conceptually close to NHWC since the innermost dim gets multiplied
+    assert (
+        model.get_tensor_layout("ConvolutionInputGenerator_0_out0") == DataLayout.NHWC
+    )
+    assert model.get_tensor_layout("StreamingFCLayer_Batch_3_out0") == DataLayout.NHWC
+    assert model.get_tensor_layout("Reshape_0_out0") == DataLayout.NC
+    assert model.get_tensor_layout("StreamingFCLayer_Batch_6_out0") == DataLayout.NC
+    assert model.get_tensor_layout("global_out") == DataLayout.NC
+
+    os.remove(export_onnx_path_cnv)
diff --git a/tests/transformation/test_scalar_past_eltwise.py b/tests/transformation/test_linear_past_eltwise.py
similarity index 69%
rename from tests/transformation/test_scalar_past_eltwise.py
rename to tests/transformation/test_linear_past_eltwise.py
index e845f32176a9293046b297b7d9e2ab64fabc1791..b77f59779a5e8559f80e017d13b66bcb67249830 100644
--- a/tests/transformation/test_scalar_past_eltwise.py
+++ b/tests/transformation/test_linear_past_eltwise.py
@@ -35,7 +35,7 @@ import finn.core.onnx_exec as oxe
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fold_constants import FoldConstants
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
-from finn.transformation.streamline.reorder import MoveScalarLinearPastEltwiseAdd
+from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.double_to_single_float import DoubleToSingleFloat
 
@@ -95,7 +95,7 @@ def make_model(shape):
 @pytest.mark.parametrize("ch", [64])
 # ifmdim
 @pytest.mark.parametrize("ifmdim", [-1, 7])
-def test_scalar_past_eltwise(ch, ifmdim):
+def test_linear_past_eltwise_add(ch, ifmdim):
     # generate test vectors of correct shape
     if ifmdim == -1:
         input_tensor_shape = (1, ch)
@@ -124,7 +124,7 @@ def test_scalar_past_eltwise(ch, ifmdim):
     assert len(model.get_nodes_by_op_type("Add")) == 3
     assert len(model.get_nodes_by_op_type("Mul")) == 2
 
-    model = model.transform(MoveScalarLinearPastEltwiseAdd())
+    model = model.transform(MoveLinearPastEltwiseAdd())
 
     # verify again, to check we didnt break anything
     output_dict = oxe.execute_onnx(model, input_dict, True)
@@ -134,3 +134,68 @@ def test_scalar_past_eltwise(ch, ifmdim):
     assert len(model.get_nodes_by_op_type("Mul")) == 1
 
     os.remove(export_onnx_path)
+
+
+@pytest.mark.parametrize("ch", [64, 1])
+# ifmdim
+@pytest.mark.parametrize("ifmdim", [-1, 7])
+def test_linear_past_eltwise_add_multiple_forks(ch, ifmdim):
+    # generate test vectors of correct shape
+    if ifmdim == -1:
+        input_shape = (1, ch)
+    else:
+        input_shape = (1, ch, ifmdim, ifmdim)
+
+    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape)
+    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape)
+
+    num_of_params = 6
+    value_info = []
+    for i in range(num_of_params):
+        value_info += [
+            helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape)
+        ]
+
+    modelproto = helper.make_model(
+        helper.make_graph(
+            name="test",
+            inputs=[top_in],
+            outputs=[top_out],
+            value_info=value_info,
+            nodes=[
+                helper.make_node("Add", ["top_in", "p0"], ["fork1"]),
+                helper.make_node("Mul", ["fork1", "p1"], ["t2"]),
+                helper.make_node("Mul", ["fork1", "p2"], ["t3"]),
+                helper.make_node("Add", ["t2", "t3"], ["t4"]),
+                helper.make_node("Mul", ["t4", "p3"], ["fork2"]),
+                helper.make_node("Add", ["fork2", "p4"], ["t5"]),
+                helper.make_node("Add", ["fork2", "p5"], ["t6"]),
+                helper.make_node("Add", ["t5", "t6"], ["top_out"]),
+            ],
+        )
+    )
+    model = ModelWrapper(modelproto)
+    model = model.transform(InferShapes())
+
+    np.random.seed(0)
+    for i in range(num_of_params):
+        model.set_initializer(
+            "p" + str(i), np.random.rand(*input_shape).astype(np.float32)
+        )
+
+    # need equal mults:
+    model.set_initializer("p2", model.get_initializer("p1"))
+
+    # Transform
+    new_model = model.transform(MoveLinearPastEltwiseAdd())
+    inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)}
+
+    # Test
+    assert oxe.compare_execution(model, new_model, inp_dict)
+    assert new_model.graph.node[0].op_type == "Add"
+    assert new_model.graph.node[1].op_type == "Add"
+    assert new_model.graph.node[2].op_type == "Mul"
+    assert new_model.graph.node[3].op_type == "Mul"
+    assert new_model.graph.node[4].op_type == "Add"
+    assert new_model.graph.node[5].op_type == "Add"
+    assert len(new_model.graph.node) == 6