diff --git a/requirements.txt b/requirements.txt
index 2427f9490a3dd5a7ffe0e0a8cf2ad19af0934cdf..6b8e4d02c8ca1dcdbe607aabdccd27cec8056332 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ pre-commit
 pyverilator
 scipy
 sphinx
+toposort
 wget
diff --git a/src/finn/core/modelwrapper.py b/src/finn/core/modelwrapper.py
index 2896b09e0f54d6d0492c5330ec5da4110e257d30..646add188c5d475cf37ccd33cf24d29d61754ae1 100644
--- a/src/finn/core/modelwrapper.py
+++ b/src/finn/core/modelwrapper.py
@@ -259,11 +259,10 @@ class ModelWrapper:
 
     def find_producer(self, tensor_name):
         """Finds and returns the node that produces the tensor with given name."""
-        ret = None
         for x in self._model_proto.graph.node:
             if tensor_name in x.output:
-                ret = x
-        return ret
+                return x
+        return None
 
     def find_upstream(self, tensor_name, finder_fxn):
         """Follow the producer chain upstream, calling finder_fxn on each upstream
diff --git a/src/finn/transformation/general.py b/src/finn/transformation/general.py
index f51ffbcfd9f62e06bf4942409fbb163e92ff6370..488391740fc25f1f7caa657adc9ed55bdc2f9722 100644
--- a/src/finn/transformation/general.py
+++ b/src/finn/transformation/general.py
@@ -28,6 +28,7 @@
 
 import finn.util.basic as util
 from finn.transformation import Transformation
+from toposort import toposort_flatten
 
 
 class GiveUniqueNodeNames(Transformation):
@@ -104,11 +105,13 @@ class GiveUniqueParameterTensors(Transformation):
                     # first occurance
                     seen_parameters += [node_input]
                     continue
-                    
+
                 new_param_name = model.make_new_valueinfo_name()
 
                 model.set_initializer(new_param_name, input_init)
-                model.set_tensor_datatype(new_param_name, model.get_tensor_datatype(node_input))
+                model.set_tensor_datatype(
+                    new_param_name, model.get_tensor_datatype(node_input)
+                )
 
                 # point node input to new tensor
                 n.input[input_idx] = new_param_name
@@ -116,6 +119,56 @@ class GiveUniqueParameterTensors(Transformation):
         return (model, graph_modified)
 
 
+class SortGraph(Transformation):
+    """ Returns the model with its node list sorted topologically.
+    Any ONNX graph to be executed must have a topologically sorted node list, as dictated
+    by the ONNX standard.
+    """
+    
+    # Notes on SortGraph performance:
+    #         benchmark in  tests/transformation/test_sort_graph.py
+    # 
+    #         The algorithm doesn't move initializers so its performance should only depend on
+    #         the number of nodes
+    # 
+    #         Relative order of magnitudes for time per step:
+    #             - Gather graph structure:       base
+    #             - Sort nodes:                   0.1 of base
+    #             - Remove and insert in order :  0.001 of base
+    # 
+    #     Notes:
+    #         Remove nodes and insert them in order:
+    #           Probably this is faster than copying initializers and more robust in general
+
+    def apply(self, model):
+        # Gather graph structure
+        graph_dependencies = {}
+        node_list = [
+            n for n in model.graph.node
+        ]  # I also need the list to remove the nodes
+        for node_idx, n in enumerate(node_list):
+            node_pred = model.find_direct_predecessors(n)
+            if node_pred is None:
+                # Will also eliminate nodes that are floating around for some reason
+                continue
+
+            node_dependencies = [node_list.index(pred) for pred in node_pred]
+            graph_dependencies[node_idx] = set(node_dependencies)
+
+        # Sort nodes
+        sorted_node_indexes = toposort_flatten(graph_dependencies)
+
+        # Remove nodes and insert them in order
+        # Can't remove nodes before if I want to use model.find_direct_predecessors()
+        for n in node_list:
+            model.graph.node.remove(n)
+
+        for new_idx, sorted_idx in enumerate(sorted_node_indexes):
+            model.graph.node.insert(new_idx, node_list[sorted_idx])
+
+        return model, False
+
+
 class ConvertSubToAdd(Transformation):
     """Convert subtract-a-constant nodes to add-a-constant nodes."""
 
diff --git a/tests/transformation/test_sort_graph.py b/tests/transformation/test_sort_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..05842504c13b144bb34e8084fb12b5086fa84115
--- /dev/null
+++ b/tests/transformation/test_sort_graph.py
@@ -0,0 +1,150 @@
+from onnx import TensorProto, helper
+import numpy as np
+
+from finn.core.modelwrapper import ModelWrapper
+from finn.transformation.general import SortGraph
+from finn.transformation.infer_shapes import InferShapes
+import pytest
+import finn.analysis.topology as ta
+
+
+def make_randomly_sorted_linear_model(num_of_nodes, seed=None):
+    if seed is not None:
+        np.random.seed(seed)
+
+    ch = 2
+    ifmdim = 16
+    input_shape = (1, ch, ifmdim, ifmdim)
+
+    top_in = helper.make_tensor_value_info("t0", TensorProto.FLOAT, input_shape)
+    top_out = helper.make_tensor_value_info(
+        "t" + str(num_of_nodes), TensorProto.FLOAT, input_shape
+    )
+
+    value_info = []
+    nodes = []
+    for i in range(num_of_nodes):
+        nodes += [
+            helper.make_node("Add", ["t" + str(i), "p" + str(i)], ["t" + str(i + 1)])
+        ]
+        value_info += [
+            helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape)
+        ]
+
+    nodes = np.random.permutation(nodes)
+
+    modelproto = helper.make_model(
+        helper.make_graph(
+            name="test",
+            inputs=[top_in],
+            outputs=[top_out],
+            value_info=value_info,
+            nodes=nodes,
+        )
+    )
+    model = ModelWrapper(modelproto)
+    model = model.transform(InferShapes())
+
+    for i in range(num_of_nodes):
+        model.set_initializer(
+            "p" + str(i), np.random.rand(*input_shape).astype(np.float32)
+        )
+
+    return model
+
+
+@pytest.mark.parametrize("num_of_nodes", [64])
+def test_sort_linear_graph(num_of_nodes):
+    model = make_randomly_sorted_linear_model(num_of_nodes, seed=0)
+    new_model = model.transform(SortGraph())
+
+    # Test
+    ret = new_model.analysis(ta.nodes_topologically_sorted)
+    assert ret["nodes_topologically_sorted"], "Nodes are not topologically sorted."
+
+
+def test_sort_nonlinear_graph():
+    ch = 2
+    ifmdim = 16
+    input_shape = (1, ch, ifmdim, ifmdim)
+
+    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape)
+    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape)
+
+    num_of_params = 8
+    value_info = []
+    for i in range(num_of_params):
+        value_info += [
+            helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape)
+        ]
+
+    modelproto = helper.make_model(
+        helper.make_graph(
+            name="test",
+            inputs=[top_in],
+            outputs=[top_out],
+            value_info=value_info,
+            nodes=[
+                # Not sorted nodes
+                helper.make_node("Mul", ["fork1", "p2"], ["t3"]),
+                helper.make_node("Add", ["t4", "p3"], ["t5"]),
+                helper.make_node("Add", ["t2", "t3"], ["t4"]),
+                helper.make_node("Add", ["t6", "t7"], ["t8"]),
+                helper.make_node("Add", ["fork3", "fork3"], ["top_out"]),
+                helper.make_node("Mul", ["t5", "p4"], ["fork2"]),
+                helper.make_node("Add", ["top_in", "p0"], ["fork1"]),
+                helper.make_node("Mul", ["fork1", "p1"], ["t2"]),
+                helper.make_node("Add", ["fork2", "p5"], ["t6"]),
+                helper.make_node("Add", ["fork2", "p6"], ["t7"]),
+                helper.make_node("Mul", ["t8", "p7"], ["fork3"]),
+            ],
+        )
+    )
+    model = ModelWrapper(modelproto)
+    model = model.transform(InferShapes())
+
+    np.random.seed(0)
+    for i in range(num_of_params):
+        model.set_initializer(
+            "p" + str(i), np.random.rand(*input_shape).astype(np.float32)
+        )
+
+    new_model = model.transform(SortGraph())
+
+    # Test
+    ret = new_model.analysis(ta.nodes_topologically_sorted)
+    assert ret["nodes_topologically_sorted"], "Nodes are not topologically sorted."
+
+
+if __name__ == "__main__":
+    import time
+
+    sizes = [10, 50, 100, 500, 1000]
+    times = []
+    reps = 10
+
+    print("SortGraph performance test:")
+    print("Test sizes", sizes)
+    print("Repetitions per size:", reps)
+    for sz in sizes:
+        acc_time = 0
+        print(" Testing size ", sz)
+        for i in range(reps):
+            # it should take the same time even with the sorted one
+            # but better new model each time as it is a more general approach
+            model = make_randomly_sorted_linear_model(sz)  # new model as seed is None
+            bef = time.time()
+            new_model = model.transform(SortGraph(), make_deepcopy=False)
+            acc_time += time.time() - bef
+
+        times += [acc_time / reps]
+
+    # print csv
+    print("\nnum_of_nodes,  seconds")
+    for sz, tm in zip(sizes, times):
+        print("{:12d}, {:6.4e}".format(sz, tm))
+
+    # plot
+    # import matplotlib.pyplot as plt
+    # plt.plot(sizes,times,"--o")
+    # plt.grid(True)