diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 2a6e41797307f350a8827050f9d027e72d547e57..b312737c317517ca0ab19c74cf22284b5977b661 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -13,7 +13,7 @@ gecho () {
 
 # checkout the correct dependency repo commits
 # the repos themselves are cloned in the Dockerfile
-BREVITAS_COMMIT=026a509186b7e7b0b65d46a2f905043d41069306
+BREVITAS_COMMIT=f9a27226d4acf1661dd38bc449f71f89e0983cce
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
 HLSLIB_COMMIT=8f9f2018762f654f196b666838aeaf6fc730ad9a
 PYVERILATOR_COMMIT=c97a5ba41bbc7c419d6f25c74cdf3bdc3393174f
diff --git a/src/finn/custom_op/fpgadataflow/pool_batch.py b/src/finn/custom_op/fpgadataflow/pool_batch.py
index 1aedf7056b1381a8904ac63b3a64f86b1243cb36..c7edc24d0e24eef1154293caca2519ab3aa68358 100644
--- a/src/finn/custom_op/fpgadataflow/pool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/pool_batch.py
@@ -45,7 +45,7 @@ class Pool_Batch(HLSCustomOp):
 
     # note: the actual data layout produced by the hlslib kernels is different
     # for depthwise ops.
-    # * depthwise SWG: (1, OFMDim, OFMDim, IFMChannels/SIMD, K, K, SIMD)
+    # * depthwise SWG: (1, OFMDim, OFMDim, IFMChannels/PE, K, K, PE)
 
     Channels can be folded using PE (SIMD from the input perspective)
     TODO: doc
@@ -159,7 +159,7 @@ class Pool_Batch(HLSCustomOp):
     def infer_node_datatype(self, model):
         node = self.onnx_node
         # data type stays the same
-        dtype = model.get_tensor_datatype(node.input[0])
+        dtype = self.get_output_datatype()
         model.set_tensor_datatype(node.output[0], dtype)
 
     def verify_node(self):
diff --git a/src/finn/transformation/streamline/absorb.py b/src/finn/transformation/streamline/absorb.py
index dbcf97361017144174f9fbfca35a84361b5abd26..4266488c7d1b86f2997d4c77d70b80f88bf37442 100644
--- a/src/finn/transformation/streamline/absorb.py
+++ b/src/finn/transformation/streamline/absorb.py
@@ -28,11 +28,13 @@
 
 import numpy as np
 from onnx import helper as oh
+import warnings
 
 from finn.core.datatype import DataType
 from finn.transformation import Transformation
 from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
+from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.infer_datatypes import InferDataTypes
 
 
@@ -290,3 +292,38 @@ class AbsorbTransposeIntoMultiThreshold(Transformation):
         if graph_modified:
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
+
+
+class AbsorbScalarMulIntoTopK(Transformation):
+    """Absorb a mul node into a suceeding topk node if the mul is scalar."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for n in graph.node:
+            node_ind += 1
+            if n.op_type == "TopK":
+                prod = model.find_producer(n.input[0])
+                if prod is not None and prod.op_type == "Mul":
+                    prod_input = prod.input[0]
+                    param_name = prod.input[1]
+                    A = model.get_initializer(param_name)
+                    if A is None:
+                        warnings.warn("Param is not constant, skipping")
+                        continue
+                    if all(x == 1 for x in A.shape) and A > 0:
+                        # if the mul is scalar and positive, we can just delete the
+                        # mul node and rewire the top k node. Because the top k node
+                        # works with probabilities and their relation to each other
+                        # the relation doesn't change if every value is multiplied
+                        # with a scalar
+                        graph.node.remove(prod)
+                        n.input[0] = prod_input
+                        # to avoid error the dataype is set to float32
+                        model.set_tensor_datatype(n.input[0], DataType.FLOAT32)
+                        graph_modified = True
+        if graph_modified:
+            model = model.transform(InferShapes())
+            model = model.transform(InferDataTypes())
+        return (model, graph_modified)
diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py
index b46b82c77a3f1b70a3b05d87cd3c48fc1d94fd45..a1bd16f6d0b70193122d5d067ccdee395260c7b1 100644
--- a/src/finn/transformation/streamline/reorder.py
+++ b/src/finn/transformation/streamline/reorder.py
@@ -32,6 +32,7 @@ from onnx import helper as oh
 
 from finn.transformation import Transformation
 from finn.transformation.infer_shapes import InferShapes
+from finn.core.datatype import DataType
 from finn.core.onnx_exec import execute_node
 from finn.util.basic import get_by_name
 from finn.custom_op.registry import getCustomOp
@@ -338,6 +339,71 @@ class MoveScalarMulPastConv(Transformation):
         return (model, graph_modified)
 
 
+class MoveMulPastDWConv(Transformation):
+    """Move channelwise mul operations past depthwise conv operations. We want to have muls
+    next to each other such that they can be collapsed into a single mul."""
+
+    def apply(self, model):
+        graph = model.graph
+        node_ind = 0
+        graph_modified = False
+        for n in graph.node:
+            node_ind += 1
+            if (
+                n.op_type == "Mul"
+                and not model.is_fork_node(n)
+                and not model.is_join_node(n)
+            ):
+                consumer = model.find_consumer(n.output[0])
+                if (
+                    consumer is not None
+                    and consumer.op_type == "Conv"
+                    and not model.is_join_node(consumer)
+                ):
+                    mul_weight_name = n.input[1]
+                    A = model.get_initializer(mul_weight_name)
+                    if A is None:
+                        warnings.warn(
+                            """Mul weight tensor is not set. If it is a constant,
+                                please use set_initializer to set the tensor."""
+                        )
+                        continue
+                    conv_node = consumer
+                    mul_node = n
+                    start_name = mul_node.input[0]
+                    conv_in_name = conv_node.input[0]
+                    conv_in_shape = model.get_tensor_shape(conv_in_name)
+                    ifm_ch = conv_in_shape[1]
+                    group_attribute = get_by_name(consumer.attribute, "group")
+                    if group_attribute is None:
+                        continue
+                    group_attribute = group_attribute.i
+                    conv_out_name = conv_node.output[0]
+                    conv_out_shape = model.get_tensor_shape(conv_out_name)
+                    if A.shape == (1, ifm_ch, 1, 1) and ifm_ch == group_attribute:
+                        # if the mul is channelwise and conv is depthwise,
+                        # we can simply swap the order of ops
+                        # rewire mul input to be conv input
+                        conv_node.input[0] = start_name
+                        model.set_tensor_shape(start_name, conv_in_shape)
+                        model.set_tensor_datatype(start_name, DataType.FLOAT32)
+                        # use old conv input tensor as conv output
+                        conv_node.output[0] = conv_in_name
+                        model.set_tensor_shape(conv_in_name, conv_out_shape)
+                        model.set_tensor_datatype(conv_in_name, DataType.FLOAT32)
+                        # use new conv output as new mul node input
+                        mul_node.input[0] = conv_in_name
+                        # use old conv output as new mul node output
+                        mul_node.output[0] = conv_out_name
+                        model.set_tensor_datatype(conv_out_name, DataType.FLOAT32)
+                        # move mul node past conv node
+                        graph.node.remove(mul_node)
+                        graph.node.insert(node_ind, mul_node)
+                        graph_modified = True
+        model = model.transform(InferShapes())
+        return (model, graph_modified)
+
+
 class MoveLinearPastEltwiseAdd(Transformation):
     """Move linear operations (mul, add) past elementwise add operations where possible.
        Specifically,matches and transforms the following patterns:
diff --git a/tests/transformation/test_absorb_mul_into_topk.py b/tests/transformation/test_absorb_mul_into_topk.py
new file mode 100644
index 0000000000000000000000000000000000000000..1394220f7c336ccea8fe9c494734c4175bf2e847
--- /dev/null
+++ b/tests/transformation/test_absorb_mul_into_topk.py
@@ -0,0 +1,108 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import pytest
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.general import GiveUniqueNodeNames, GiveReadableTensorNames
+from finn.transformation.insert_topk import InsertTopK
+from finn.transformation.streamline.absorb import AbsorbScalarMulIntoTopK
+import finn.core.onnx_exec as oxe
+
+# parameter to indicate if mul parameter is negative or positive
+@pytest.mark.parametrize("mul_positive", [True, False])
+# parameter to indicate if mul parameter is scalar or not
+@pytest.mark.parametrize("scalar", [True, False])
+def test_absorb_mul_into_topk(mul_positive, scalar):
+    if scalar is True:
+        shape = [1]
+    else:
+        shape = [1, 1, 1, 1000]
+    inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, 1, 1, 1000])
+    a0 = helper.make_tensor_value_info("a0", TensorProto.FLOAT, shape)
+    outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, 1, 1, 1000])
+
+    mul_node = helper.make_node("Mul", ["inp", "a0"], ["outp"])
+    mul_graph = helper.make_graph(
+        nodes=[mul_node],
+        name="mul-graph",
+        inputs=[inp],
+        outputs=[outp],
+        value_info=[a0],
+    )
+
+    model = helper.make_model(mul_graph, producer_name="mul_model")
+    model = ModelWrapper(model)
+    # initialize values
+    if mul_positive is True:
+        a0_values = np.random.uniform(low=0.1, high=1, size=tuple(shape)).astype(
+            np.float32
+        )
+    else:
+        a0_values = np.random.uniform(low=-1, high=-0.1, size=tuple(shape)).astype(
+            np.float32
+        )
+    model.set_initializer("a0", a0_values)
+    model = model.transform(InsertTopK())
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model_transformed = model.transform(AbsorbScalarMulIntoTopK())
+
+    # compare execution results
+    inp_values = np.random.uniform(low=-10, high=10, size=(1, 1, 1, 1000)).astype(
+        np.float32
+    )
+    idict = {"global_in": inp_values}
+    odict = oxe.execute_onnx(model, idict, True)
+    y_indices = odict["global_out"]
+    y_values = odict["TopK_0_out0"]
+    odict = oxe.execute_onnx(model_transformed, idict, True)
+    y_tr_indices = odict["global_out"]
+    y_tr_values = odict["TopK_0_out0"]
+
+    # the indices stay the same, if the model is transformed or not
+    assert (y_indices == y_tr_indices).all()
+
+    if scalar is True and mul_positive is True:
+        # the values change if the model was transformed
+        assert (y_values != y_tr_values).all()
+
+        # check for new order
+        assert model.graph != model_transformed.graph
+        assert len(model.graph.node) - 1 == len(model_transformed.graph.node)
+        assert model_transformed.graph.node[0].op_type == "TopK"
+
+    else:
+        assert (y_values == y_tr_values).all()
+        assert model.graph == model_transformed.graph
diff --git a/tests/transformation/test_move_mul_past_dw_conv.py b/tests/transformation/test_move_mul_past_dw_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ae8fbfe89986d58d3d71f5f8735a98469d9d1e3
--- /dev/null
+++ b/tests/transformation/test_move_mul_past_dw_conv.py
@@ -0,0 +1,93 @@
+import pytest
+
+from onnx import helper, TensorProto
+from finn.custom_op.im2col import compute_conv_output_dim
+import finn.core.onnx_exec as oxe
+from finn.core.datatype import DataType
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.infer_shapes import InferShapes
+from finn.util.basic import gen_finn_dt_tensor
+from finn.transformation.streamline.reorder import MoveMulPastDWConv
+
+
+# input dimension
+@pytest.mark.parametrize("ifm_dim", [4, 7])
+# input channels
+@pytest.mark.parametrize("ifm_ch", [2, 3])
+# kernel size
+@pytest.mark.parametrize("k", [2, 3])
+# stride
+@pytest.mark.parametrize("stride", [1, 2])
+# padding
+@pytest.mark.parametrize("pad_amt", [0, 1])
+# depthwise
+@pytest.mark.parametrize("dw", [0, 1])
+def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw):
+    if dw == 1:
+        ofm_ch = ifm_ch
+        groups = ifm_ch
+        W_shape = [ofm_ch, 1, k, k]
+    else:
+        ofm_ch = ifm_ch + 2
+        groups = 1
+        W_shape = [ofm_ch, ifm_ch, k, k]
+
+    ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt)
+
+    # set up onnx model
+    inp = helper.make_tensor_value_info(
+        "inp", TensorProto.FLOAT, [1, ifm_ch, ifm_dim, ifm_dim]
+    )
+    mul = helper.make_tensor_value_info("mul", TensorProto.FLOAT, [1, ifm_ch, 1, 1])
+    W = helper.make_tensor_value_info("W", TensorProto.FLOAT, W_shape)
+    outp = helper.make_tensor_value_info(
+        "outp", TensorProto.FLOAT, [1, ofm_ch, ofm_dim, ofm_dim]
+    )
+
+    Mul_node = helper.make_node("Mul", ["inp", "mul"], ["mul_out"])
+
+    Conv_node = helper.make_node(
+        "Conv",
+        ["mul_out", "W"],
+        ["outp"],
+        group=groups,
+        kernel_shape=[k, k],
+        pads=[pad_amt, pad_amt, pad_amt, pad_amt],
+        strides=[stride, stride],
+    )
+
+    graph = helper.make_graph(
+        nodes=[Mul_node, Conv_node],
+        name="mulpastconv_graph",
+        inputs=[inp],
+        outputs=[outp],
+        value_info=[mul, W],
+    )
+
+    model = helper.make_model(graph, producer_name="mulpastconv-model")
+    model = ModelWrapper(model)
+    inp_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, ifm_dim, ifm_dim])
+    mul_values = gen_finn_dt_tensor(DataType.INT2, [1, ifm_ch, 1, 1])
+    W_values = gen_finn_dt_tensor(DataType.INT2, W_shape)
+    model.set_initializer("W", W_values)
+    model.set_initializer("mul", mul_values)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataTypes())
+    idict = {"inp": inp_values}
+    odict = oxe.execute_onnx(model, idict, True)
+    out_before = odict["outp"]
+
+    # move channelwise multiplication past depthwise conv
+    model_transformed = model.transform(MoveMulPastDWConv())
+    odict = oxe.execute_onnx(model_transformed, idict, True)
+    out_after = odict["outp"]
+
+    assert (out_before == out_after).all()
+
+    if dw == 0:
+        assert model.graph.node[0].op_type == model_transformed.graph.node[0].op_type
+        assert model.graph.node[1].op_type == model_transformed.graph.node[1].op_type
+    else:
+        assert model.graph.node[0].op_type == model_transformed.graph.node[1].op_type
+        assert model.graph.node[1].op_type == model_transformed.graph.node[0].op_type