diff --git a/src/finn/transformation/__init__.py b/src/finn/transformation/__init__.py
index a4e0bcf330a8ad1797eb76e61ba63511eb903dcf..e9f5fe15f6bdefe1e739394495f67a972ccff669 100644
--- a/src/finn/transformation/__init__.py
+++ b/src/finn/transformation/__init__.py
@@ -48,6 +48,8 @@ Guide to writing FINN transformations
 """
 
 from abc import ABC, abstractmethod
+from finn.util.basic import get_num_default_workers
+import multiprocessing as mp
 
 
 class Transformation(ABC):
@@ -60,3 +62,54 @@ class Transformation(ABC):
     @abstractmethod
     def apply(self, model):
         pass
+
+
+class NodeLocalTransformation(Transformation):
+    """
+    Parent class for transformations, which can be executed locally to one node
+    by accessing and modifying the attributes of only that node.
+    This class can then automatically parallelize the transformation.
+    Transformations sublcassing NodeLocalTransformation must implement the
+    abstract method applyNodeLocal().
+
+    To control the degree of parallelization, specify the num_workers argument
+    in the constructor, using one of the following values:
+    * None: use NUM_DEFAULT_WORKERS environment variable
+    * 0: use all available CPU cores
+    * (any other int>0): set number of parallel workers
+    """
+
+    def __init__(self, num_workers=None):
+        super().__init__()
+        if num_workers is None:
+            self._num_workers = get_num_default_workers()
+        else:
+            self._num_workers = num_workers
+        assert self._num_workers >= 0, "Number of workers must be nonnegative."
+        if self._num_workers == 0:
+            self._num_workers = mp.cpu_count()
+
+    @abstractmethod
+    def applyNodeLocal(self, node):
+        pass
+
+    def apply(self, model):
+        # Remove old nodes from the current model
+        old_nodes = []
+        for i in range(len(model.graph.node)):
+            old_nodes.append(model.graph.node.pop())
+
+        # Execute transformation in parallel
+        with mp.Pool(self._num_workers) as p:
+            new_nodes_and_bool = p.map(self.applyNodeLocal, old_nodes, chunksize=1)
+
+        # extract nodes and check if the transformation needs to run again
+        # Note: .pop() had initially reversed the node order
+        run_again = False
+        for node, run in reversed(new_nodes_and_bool):
+            # Reattach new nodes to old model
+            model.graph.node.append(node)
+            if run is True:
+                run_again = True
+
+        return (model, run_again)
diff --git a/src/finn/transformation/fpgadataflow/compile.py b/src/finn/transformation/fpgadataflow/compile.py
index e577c3af6d2b92d8a2c63e89e3b1bca21d3d7c0a..a76ab683209bbb1219517075ff29a75540dc7bfc 100644
--- a/src/finn/transformation/fpgadataflow/compile.py
+++ b/src/finn/transformation/fpgadataflow/compile.py
@@ -28,28 +28,30 @@
 
 import finn.custom_op.registry as registry
 import finn.util.basic as util
-from finn.transformation import Transformation
+from finn.transformation import NodeLocalTransformation
 
 
-class Compile(Transformation):
+class Compile(NodeLocalTransformation):
     """For every node: compile C++ code in node attribute "code_gen_dir_npysim"
     and save path to executables in node attribute "executable_path".
     All nodes in the graph must have the fpgadataflow backend attribute.
 
     To use these executables, exec_mode must be set to "npysim" (using transformation
     SetExecMode) and the model has to be executed using execute_onnx() from
-    finn.core.onnx_exec"""
+    finn.core.onnx_exec
 
-    def __init__(self):
-        super().__init__()
+    * num_workers (int or None) number of parallel workers, see documentation in
+      NodeLocalTransformation for more details.
+    """
 
-    def apply(self, model):
-        for node in model.graph.node:
-            op_type = node.op_type
-            if node.domain == "finn":
-                backend_attribute = util.get_by_name(node.attribute, "backend")
-                if backend_attribute is None:
-                    continue
+    def __init__(self, num_workers=None):
+        super().__init__(num_workers=num_workers)
+
+    def applyNodeLocal(self, node):
+        op_type = node.op_type
+        if node.domain == "finn":
+            backend_attribute = util.get_by_name(node.attribute, "backend")
+            if backend_attribute is not None:
                 backend_value = backend_attribute.s.decode("UTF-8")
                 if backend_value == "fpgadataflow":
                     try:
@@ -74,4 +76,4 @@ class Compile(Transformation):
                         raise Exception(
                             "Custom op_type %s is currently not supported." % op_type
                         )
-        return (model, False)
+        return (node, False)
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py b/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
index 9fb7f8652d1fa5e624776a81ff6946d67882aa2a..2a40b3c2302a432937d45e807515e795f02e0365 100644
--- a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
@@ -28,50 +28,54 @@
 
 import finn.custom_op.registry as registry
 import finn.util.basic as util
-from finn.transformation import Transformation
+from finn.transformation import NodeLocalTransformation
 
 
-class HLSSynth_IPGen(Transformation):
+class HLSSynth_IPGen(NodeLocalTransformation):
     """For each node: generate IP block from code in folder
     that is referenced in node attribute "code_gen_dir_ipgen"
     and save path of generated project in node attribute "ipgen_path".
     All nodes in the graph must have the fpgadataflow backend attribute.
 
     This transformation calls Vivado HLS for synthesis, so it will run for
-    some time (several minutes)"""
+    some time (several minutes)
 
-    def __init__(self):
-        super().__init__()
+    * num_workers (int or None) number of parallel workers, see documentation in
+      NodeLocalTransformation for more details.
+    """
 
-    def apply(self, model):
-        for node in model.graph.node:
-            op_type = node.op_type
-            if node.domain == "finn":
-                backend_attribute = util.get_by_name(node.attribute, "backend")
-                if backend_attribute is None:
-                    continue
-                backend_value = backend_attribute.s.decode("UTF-8")
-                if backend_value == "fpgadataflow":
-                    try:
-                        # lookup op_type in registry of CustomOps
-                        inst = registry.custom_op[op_type](node)
-                        # ensure that code is generated
-                        assert (
-                            inst.get_nodeattr("code_gen_dir_ipgen") != ""
-                        ), """Node
-                        attribute "code_gen_dir_ipgen" is empty. Please run
-                        transformation CodeGen_ipgen first."""
-                        # call the compilation function for this node
-                        inst.ipgen_singlenode_code()
-                        # ensure that executable path is now set
-                        assert (
-                            inst.get_nodeattr("ipgen_path") != ""
-                        ), """Transformation
-                        HLSSynth_IPGen was not successful. Node attribute "ipgen_path"
-                        is empty."""
-                    except KeyError:
-                        # exception if op_type is not supported
-                        raise Exception(
-                            "Custom op_type %s is currently not supported." % op_type
-                        )
-        return (model, False)
+    def __init__(self, num_workers=None):
+        super().__init__(num_workers=num_workers)
+
+    def applyNodeLocal(self, node):
+        op_type = node.op_type
+        if node.domain == "finn":
+            backend_attribute = util.get_by_name(node.attribute, "backend")
+            if backend_attribute is None:
+                return (node, False)
+            backend_value = backend_attribute.s.decode("UTF-8")
+            if backend_value == "fpgadataflow":
+                try:
+                    # lookup op_type in registry of CustomOps
+                    inst = registry.custom_op[op_type](node)
+                    # ensure that code is generated
+                    assert (
+                        inst.get_nodeattr("code_gen_dir_ipgen") != ""
+                    ), """Node
+                    attribute "code_gen_dir_ipgen" is empty. Please run
+                    transformation CodeGen_ipgen first."""
+                    # call the compilation function for this node
+                    inst.ipgen_singlenode_code()
+                    # ensure that executable path is now set
+                    assert (
+                        inst.get_nodeattr("ipgen_path") != ""
+                    ), """Transformation
+                    HLSSynth_IPGen was not successful. Node attribute "ipgen_path"
+                    is empty."""
+                except KeyError:
+                    # exception if op_type is not supported
+                    raise Exception(
+                        "Custom op_type %s is currently not supported." % op_type
+                    )
+
+        return (node, False)
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index f99a453d05d7cb3c824784e80103b6021f072a79..4eb0e6cb874f80620e3cb25017abcc29368b261b 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -44,6 +44,17 @@ pynq_part_map["Pynq-Z2"] = "xc7z020clg400-1"
 pynq_part_map["ZCU104"] = "xczu7ev-ffvc1156-2-e"
 
 
+def get_num_default_workers():
+    """Return the number of workers for parallel transformations. Controllable
+    via the NUM_DEFAULT_WORKERS environment variable. If the env.var. is
+    undefined, the default value of 1 is returned.
+    """
+
+    try:
+        return int(os.environ["NUM_DEFAULT_WORKERS"])
+    except KeyError:
+        return 1
+
 
 def get_finn_root():
     "Return the root directory that FINN is cloned into."
diff --git a/tests/end2end/test_end2end_cnv_w1a1.py b/tests/end2end/test_end2end_cnv_w1a1.py
index 1a59191a085616d08d0910b28a9e62cb6596b7c4..75dc6c84f28fb03197e36bed3588670b5d37d2db 100644
--- a/tests/end2end/test_end2end_cnv_w1a1.py
+++ b/tests/end2end/test_end2end_cnv_w1a1.py
@@ -112,8 +112,8 @@ def test_end2end_cnv_w1a1_streamline():
 
 def test_end2end_cnv_w1a1_convert_to_hls_layers():
     model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
-    model = model.transform(to_hls.InferBinaryStreamingFCLayer())
-    model = model.transform(to_hls.InferQuantizedStreamingFCLayer())
+    model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
+    model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
     model = model.transform(to_hls.InferConvInpGen())
     model = model.transform(to_hls.InferStreamingMaxPool())
     model = model.transform(MoveReshape())