diff --git a/docs/finn/example_networks.rst b/docs/finn/example_networks.rst
index 0f919f6df8d3bbaea8e5dc266095322567aa1401..47c9a976cb14a3e175dff6800ad8a5da60b44ecb 100644
--- a/docs/finn/example_networks.rst
+++ b/docs/finn/example_networks.rst
@@ -28,7 +28,7 @@ version, this is indicated by an x mark in the table.
 +-----------------------+------------+----------+----------+----------+----------+----------+----------+
 | Hardware test         | x          | x        | x        |          | x        |          |          |
 +-----------------------+------------+----------+----------+----------+----------+----------+----------+
-| npysim                | x          | x        | x        | x        | x        |          |          |
+| cppsim                | x          | x        | x        | x        | x        |          |          |
 +-----------------------+------------+----------+----------+----------+----------+----------+----------+
 | rtlsim node-by-node   | x          | x        | x        | x        | x        |          |          |
 +-----------------------+------------+----------+----------+----------+----------+----------+----------+
diff --git a/docs/finn/nw_prep.rst b/docs/finn/nw_prep.rst
index 96dfcc184261b5e24a4e7c51eef74fff55d0d841..2ccbb8d0ff65c4d8b1476e38002cd52dc0e4fdf2 100644
--- a/docs/finn/nw_prep.rst
+++ b/docs/finn/nw_prep.rst
@@ -12,7 +12,7 @@ Network Preparation
 
 The main principle of FINN are analysis and transformation passes. If you like to have more information about these please have a look at section :ref:`analysis_pass` and :ref:`transformation_pass` or at chapter :ref:`tutorials` about the provided Jupyter notebooks.
 
-This page is about the network preparation, the flow step that comes after the :ref:`brevitas_export`. Its main idea is to optimize the network and convert the nodes to custom nodes that correspond to `finn-hlslib <https://github.com/Xilinx/finn-hlslib>`_ functions. In this way we get a network that we can bring to hardware with the help of Vivado. For that we have to apply several transformations on the ONNX model, which this flow step receives wrapped in the :ref:`modelwrapper`. 
+This page is about the network preparation, the flow step that comes after the :ref:`brevitas_export`. Its main idea is to optimize the network and convert the nodes to custom nodes that correspond to `finn-hlslib <https://github.com/Xilinx/finn-hlslib>`_ functions. In this way we get a network that we can bring to hardware with the help of Vivado. For that we have to apply several transformations on the ONNX model, which this flow step receives wrapped in the :ref:`modelwrapper`.
 
 Various transformations are involved in the network preparation. The following is a short overview of these.
 
@@ -42,11 +42,11 @@ Pairs of binary XNORPopcountMatMul layers are converted to StreamingFCLayers and
 Dataflow Partitioning
 =====================
 
-In the next step the graph is split and the part consisting of HLS layers is further processed in the FINN flow. The parent graph containing the non-HLS layers remains. The PE and SIMD are set to 1 by default, so the result is a network of only HLS layers with maximum folding. The model can be verified using the *npysim* simulation. It is a simulation using C++ and is described in more detail in chapter :ref:`verification`.
+In the next step the graph is split and the part consisting of HLS layers is further processed in the FINN flow. The parent graph containing the non-HLS layers remains. The PE and SIMD are set to 1 by default, so the result is a network of only HLS layers with maximum folding. The model can be verified using the *cppsim* simulation. It is a simulation using C++ and is described in more detail in chapter :ref:`verification`.
 
 Folding
 =======
 
-To adjust the folding, the values for PE and SIMD can be increased to achieve also an increase in the performance. The result can be verified using the same simulation flow as for the network with maximum folding (*npysim* using C++), for details please have a look at chapter :ref:`verification`.
+To adjust the folding, the values for PE and SIMD can be increased to achieve also an increase in the performance. The result can be verified using the same simulation flow as for the network with maximum folding (*cppsim* using C++), for details please have a look at chapter :ref:`verification`.
 
 The result is a network of HLS layers with desired folding and it can be passed to :ref:`vivado_synth`.
diff --git a/docs/finn/source_code/finn.transformation.fpgadataflow.rst b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
index e80ddbdd05595ab3ca1e6a81da95f96e92f5452a..4f0fb3e0bc2af41f7237adc8dbde5ee251f4d94b 100644
--- a/docs/finn/source_code/finn.transformation.fpgadataflow.rst
+++ b/docs/finn/source_code/finn.transformation.fpgadataflow.rst
@@ -1,5 +1,5 @@
 *****************************
-Transformation - fpgadataflow 
+Transformation - fpgadataflow
 *****************************
 
 Transformations (fpgadataflow)
@@ -13,34 +13,34 @@ finn.transformation.fpgadataflow.cleanup
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.fpgadataflow.codegen\_ipgen
+finn.transformation.fpgadataflow.prepare\_ip
 -----------------------------------------------
 
-.. automodule:: finn.transformation.fpgadataflow.codegen_ipgen
+.. automodule:: finn.transformation.fpgadataflow.prepare_ip
    :members:
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.fpgadataflow.codegen\_ipstitch
+finn.transformation.fpgadataflow.create\_stitched\_ip
 --------------------------------------------------
 
-.. automodule:: finn.transformation.fpgadataflow.codegen_ipstitch
+.. automodule:: finn.transformation.fpgadataflow.create_stitched_ip
    :members:
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.fpgadataflow.codegen\_npysim
+finn.transformation.fpgadataflow.prepare\_cppsim
 ------------------------------------------------
 
-.. automodule:: finn.transformation.fpgadataflow.codegen_npysim
+.. automodule:: finn.transformation.fpgadataflow.prepare_cppsim
    :members:
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.fpgadataflow.compile
+finn.transformation.fpgadataflow.compile\_cppsim
 ----------------------------------------
 
-.. automodule:: finn.transformation.fpgadataflow.compile
+.. automodule:: finn.transformation.fpgadataflow.compile_cppsim
    :members:
    :undoc-members:
    :show-inheritance:
@@ -61,10 +61,10 @@ finn.transformation.fpgadataflow.create\_dataflow\_partition
    :undoc-members:
    :show-inheritance:
 
-finn.transformation.fpgadataflow.hlssynth\_ipgen
+finn.transformation.fpgadataflow.hlssynth\_ip
 ------------------------------------------------
 
-.. automodule:: finn.transformation.fpgadataflow.hlssynth_ipgen
+.. automodule:: finn.transformation.fpgadataflow.hlssynth_ip
    :members:
    :undoc-members:
    :show-inheritance:
@@ -140,4 +140,3 @@ finn.transformation.fpgadataflow.templates
    :members:
    :undoc-members:
    :show-inheritance:
-
diff --git a/notebooks/advanced/1_custom_transformation_pass.ipynb b/notebooks/advanced/1_custom_transformation_pass.ipynb
index cf749000c2fb887acf69a2f8d6216b5a87de7ba7..a9345401e28b89ba3a206b81e7c3b022bedae023 100644
--- a/notebooks/advanced/1_custom_transformation_pass.ipynb
+++ b/notebooks/advanced/1_custom_transformation_pass.ipynb
@@ -377,12 +377,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "class Compile(NodeLocalTransformation):\n",
-      "    \"\"\"For every node: compile C++ code in node attribute \"code_gen_dir_npysim\"\n",
+      "class CompileCppSim(NodeLocalTransformation):\n",
+      "    \"\"\"For every node: compile C++ code in node attribute \"code_gen_dir_cppsim\"\n",
       "    and save path to executables in node attribute \"executable_path\".\n",
       "    All nodes in the graph must have the fpgadataflow backend attribute.\n",
       "\n",
-      "    To use these executables, exec_mode must be set to \"npysim\" (using transformation\n",
+      "    To use these executables, exec_mode must be set to \"cppsim\" (using transformation\n",
       "    SetExecMode) and the model has to be executed using execute_onnx() from\n",
       "    finn.core.onnx_exec\n",
       "\n",
@@ -401,10 +401,10 @@
       "                inst = registry.custom_op[op_type](node)\n",
       "                # ensure that code is generated\n",
       "                assert (\n",
-      "                    inst.get_nodeattr(\"code_gen_dir_npysim\") != \"\"\n",
+      "                    inst.get_nodeattr(\"code_gen_dir_cppsim\") != \"\"\n",
       "                ), \"\"\"Node\n",
-      "                attribute \"code_gen_dir_npysim\" is not set. Please run\n",
-      "                Transformation CodeGen_npysim first.\"\"\"\n",
+      "                attribute \"code_gen_dir_cppsim\" is not set. Please run\n",
+      "                Transformation PrepareCppSim first.\"\"\"\n",
       "                # call the compilation function for this node\n",
       "                inst.compile_singlenode_code()\n",
       "                # ensure that executable path is now set\n",
@@ -424,9 +424,9 @@
     }
    ],
    "source": [
-    "from finn.transformation.fpgadataflow.compile import Compile\n",
+    "from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim\n",
     "\n",
-    "showSrc(Compile)"
+    "showSrc(CompileCppSim)"
    ]
   },
   {
diff --git a/notebooks/end2end_example/cnv_end2end_example.ipynb b/notebooks/end2end_example/cnv_end2end_example.ipynb
index eda58eebfd666e90e2c79c11290e06d3632dc756..adb34f6d12ab9177490c07d67fbabc446eeb46ab 100644
--- a/notebooks/end2end_example/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/cnv_end2end_example.ipynb
@@ -462,8 +462,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen\n",
-    "from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen\n",
+    "from finn.transformation.fpgadataflow.prepare_ip import PrepareIP\n",
+    "from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP\n",
     "from finn.util.basic import pynq_part_map\n",
     "\n",
     "test_pynq_board = \"Pynq-Z1\"\n",
@@ -471,8 +471,8 @@
     "target_clk_ns = 5\n",
     "\n",
     "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_folded.onnx\")\n",
-    "model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns))\n",
-    "model = model.transform(HLSSynth_IPGen())\n",
+    "model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))\n",
+    "model = model.transform(HLSSynthIP())\n",
     "model.save(build_dir + \"/end2end_cnv_w1a1_ipgen.onnx\")"
    ]
   },
@@ -492,11 +492,11 @@
     "from finn.transformation.fpgadataflow.replace_verilog_relpaths import (\n",
     "    ReplaceVerilogRelPaths,\n",
     ")\n",
-    "from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch\n",
+    "from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP\n",
     "\n",
     "model = ModelWrapper(build_dir + \"/end2end_cnv_w1a1_ipgen.onnx\")\n",
     "model = model.transform(ReplaceVerilogRelPaths())\n",
-    "model = model.transform(CodeGen_ipstitch(test_fpga_part))\n",
+    "model = model.transform(CreateStitchedIP(test_fpga_part))\n",
     "model.save(build_dir + \"/end2end_cnv_w1a1_ipstitch.onnx\")"
    ]
   },
diff --git a/notebooks/end2end_example/tfc_end2end_example.ipynb b/notebooks/end2end_example/tfc_end2end_example.ipynb
index e9f82dced377f98f970c92ae92dfe7bf3da86968..00106ad5668a9982b8ba3498f8b401d61b6623f0 100644
--- a/notebooks/end2end_example/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/tfc_end2end_example.ipynb
@@ -721,7 +721,7 @@
        " 'mem_mode': ('s', False, 'const'),\n",
        " 'ram_style': ('s', False, 'auto'),\n",
        " 'backend': ('s', True, 'fpgadataflow'),\n",
-       " 'code_gen_dir_npysim': ('s', False, ''),\n",
+       " 'code_gen_dir_cppsim': ('s', False, ''),\n",
        " 'code_gen_dir_ipgen': ('s', False, ''),\n",
        " 'executable_path': ('s', False, ''),\n",
        " 'ipgen_path': ('s', False, ''),\n",
@@ -930,10 +930,10 @@
    "metadata": {},
    "source": [
     "Two transformations are required to generate HLS IP blocks for each layer: \n",
-    "* `CodeGen_ipgen` which generates the HLS C++ code for the node and a tcl-script which starts the HLS synthesis and exports the design as IP. \n",
-    "* `HLSSynth_IPGen` which passes the tcl-script to Vivado HLS and thus performs the actual IP generation. \n",
+    "* `PrepareIP` which generates the HLS C++ code for the node and a tcl-script which starts the HLS synthesis and exports the design as IP. \n",
+    "* `HLSSynthIP` which passes the tcl-script to Vivado HLS and thus performs the actual IP generation. \n",
     "\n",
-    "We start off by giving unique node names using the basic transformation `GiveUniqueNodeNames`, and then proceed with the HLS C++ code generation with `CodeGen_ipgen`."
+    "We start off by giving unique node names using the basic transformation `GiveUniqueNodeNames`, and then proceed with the HLS C++ code generation with `PrepareIP`."
    ]
   },
   {
@@ -945,8 +945,8 @@
     "model = ModelWrapper(build_dir+\"/tfc_w1_a1_set_folding_factors.onnx\")\n",
     "model = model.transform(GiveUniqueNodeNames())\n",
     "\n",
-    "from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen\n",
-    "model = model.transform(CodeGen_ipgen(fpga_part, target_clk_ns))"
+    "from finn.transformation.fpgadataflow.prepare_ip import PrepareIP\n",
+    "model = model.transform(PrepareIP(fpga_part, target_clk_ns))"
    ]
   },
   {
@@ -955,7 +955,7 @@
    "source": [
     "### Synthesizing HLS to IP Blocks <a id='hls_synth'></a>\n",
     "\n",
-    "Now that we have generated the HLS code for each layer, we can call the `HLSSynth_IPGen` transformation to convert the generated HLS into Vivado IP blocks. **As this involves calling HLS synthesis, this transformation will run for some time (several minutes).**"
+    "Now that we have generated the HLS code for each layer, we can call the `HLSSynthIP` transformation to convert the generated HLS into Vivado IP blocks. **As this involves calling HLS synthesis, this transformation will run for some time (several minutes).**"
    ]
   },
   {
@@ -964,9 +964,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen\n",
+    "from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP\n",
     "\n",
-    "model = model.transform(HLSSynth_IPGen())\n",
+    "model = model.transform(HLSSynthIP())\n",
     "model.save(build_dir+\"/tfc_w1_a1_ipgen.onnx\")"
    ]
   },
@@ -1149,7 +1149,7 @@
    "source": [
     "### IP Stitching <a id='ip_stitching'></a>\n",
     "\n",
-    "We now have IP blocks for each of our layers, and will stitch them together into a larger IP that implements the whole network using the `CodeGen_ipstitch` transformation. Bear in mind that this transformation can only be applied on a graph that only contains HLS nodes that already have been through the `HLSSynth_IPGen` transformation, which is the last step we performed. Prior to calling IP stitching, we'll also use the `ReplaceVerilogRelPaths` transformation to convert any relative `$readmemh` paths in the generated IP blocks to absolute ones, which prevents errors later on. **This step invokes Vivado and may take a few minutes to run.**"
+    "We now have IP blocks for each of our layers, and will stitch them together into a larger IP that implements the whole network using the `CreateStitchedIP` transformation. Bear in mind that this transformation can only be applied on a graph that only contains HLS nodes that already have been through the `HLSSynthIP` transformation, which is the last step we performed. Prior to calling IP stitching, we'll also use the `ReplaceVerilogRelPaths` transformation to convert any relative `$readmemh` paths in the generated IP blocks to absolute ones, which prevents errors later on. **This step invokes Vivado and may take a few minutes to run.**"
    ]
   },
   {
@@ -1158,11 +1158,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch\n",
+    "from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP\n",
     "from finn.transformation.fpgadataflow.replace_verilog_relpaths import ReplaceVerilogRelPaths\n",
     "model = ModelWrapper(build_dir+\"/tfc_w1_a1_ipgen.onnx\")\n",
     "model = model.transform(ReplaceVerilogRelPaths())\n",
-    "model = model.transform(CodeGen_ipstitch(fpga_part))"
+    "model = model.transform(CreateStitchedIP(fpga_part))"
    ]
   },
   {
diff --git a/notebooks/end2end_example/tfc_end2end_verification.ipynb b/notebooks/end2end_example/tfc_end2end_verification.ipynb
index 09b115fa414c6a477e1fbcc28588f399cdd259ce..54e2e8a471a01ed3d05033cf0ba91b3e90385b69 100644
--- a/notebooks/end2end_example/tfc_end2end_verification.ipynb
+++ b/notebooks/end2end_example/tfc_end2end_verification.ipynb
@@ -193,7 +193,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Simulation (npysim) using C++\n",
+    "## Simulation (cppsim) using C++\n",
     "\n",
     "When dealing with HLS custom op nodes in FINN the simulation using Python is no longer sufficient. After the nodes have been converted to HLS layers, the simulation using C++ can be used. To do this, the input tensor is stored in an .npy file and C++ code is generated that reads the values from the .npy array, streams them to the corresponding finn-hlslib function and writes the result to a new .npy file. This in turn can be read in Python and processed in the FINN flow. For this example the model after setting the folding factors in the HLS layers is used, please be aware that this is not the full model, but the dataflow partition, so before executing at the end of this section we have to integrate the model back into the parent model."
    ]
@@ -204,7 +204,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_for_npysim = ModelWrapper(build_dir+\"/tfc_w1_a1_set_folding_factors.onnx\")"
+    "model_for_cppsim = ModelWrapper(build_dir+\"/tfc_w1_a1_set_folding_factors.onnx\")"
    ]
   },
   {
@@ -212,8 +212,8 @@
    "metadata": {},
    "source": [
     "To generate the code for this simulation and to generate the executable two transformations are used:\n",
-    "* `CodeGen_npysim` which generates the C++ code for the corresponding hls layer\n",
-    "* `Compile` which compules the C++ code and stores the path to the executable"
+    "* `PrepareCppSim` which generates the C++ code for the corresponding hls layer\n",
+    "* `CompileCppSim` which compules the C++ code and stores the path to the executable"
    ]
   },
   {
@@ -222,13 +222,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim\n",
-    "from finn.transformation.fpgadataflow.compile import Compile\n",
+    "from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim\n",
+    "from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim\n",
     "from finn.transformation.general import GiveUniqueNodeNames\n",
     "\n",
     "model_for_npysim = model_for_npysim.transform(GiveUniqueNodeNames())\n",
-    "model_for_npysim = model_for_npysim.transform(CodeGen_npysim())\n",
-    "model_for_npysim = model_for_npysim.transform(Compile())"
+    "model_for_cppsim = model_for_cppsim.transform(PrepareCppSim())\n",
+    "model_for_cppsim = model_for_cppsim.transform(CompileCppSim())"
    ]
   },
   {
@@ -247,7 +247,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Serving '/workspace/finn/tfc_w1_a1_for_npysim.onnx' at http://0.0.0.0:8081\n"
+      "Serving '/workspace/finn/tfc_w1_a1_for_cppsim.onnx' at http://0.0.0.0:8081\n"
      ]
     },
     {
@@ -273,8 +273,8 @@
     }
    ],
    "source": [
-    "model_for_npysim.save(build_dir+\"/tfc_w1_a1_for_npysim.onnx\")\n",
-    "showInNetron(build_dir+\"/tfc_w1_a1_for_npysim.onnx\")"
+    "model_for_cppsim.save(build_dir+\"/tfc_w1_a1_for_cppsim.onnx\")\n",
+    "showInNetron(build_dir+\"/tfc_w1_a1_for_cppsim.onnx\")"
    ]
   },
   {
@@ -282,7 +282,7 @@
    "metadata": {},
    "source": [
     "The following node attributes have been added:\n",
-    "* `code_gen_dir_npysim` indicates the directory where the files for the simulation using C++ are stored\n",
+    "* `code_gen_dir_cppsim` indicates the directory where the files for the simulation using C++ are stored\n",
     "* `executable_path` specifies the path to the executable\n",
     "\n",
     "We take now a closer look into the files that were generated:"
@@ -305,9 +305,9 @@
    "source": [
     "from finn.custom_op.registry import getCustomOp\n",
     "\n",
-    "fc0 = model_for_npysim.graph.node[1]\n",
+    "fc0 = model_for_cppsim.graph.node[1]\n",
     "fc0w = getCustomOp(fc0)\n",
-    "code_gen_dir = fc0w.get_nodeattr(\"code_gen_dir_npysim\")\n",
+    "code_gen_dir = fc0w.get_nodeattr(\"code_gen_dir_cppsim\")\n",
     "!ls {code_gen_dir}"
    ]
   },
@@ -322,7 +322,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To simulate the model the execution mode(exec_mode) must be set to \"npysim\". This is done using the transformation SetExecMode."
+    "To simulate the model the execution mode(exec_mode) must be set to \"cppsim\". This is done using the transformation SetExecMode."
    ]
   },
   {
@@ -333,8 +333,8 @@
    "source": [
     "from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode\n",
     "\n",
-    "model_for_npysim = model_for_npysim.transform(SetExecMode(\"npysim\"))\n",
-    "model_for_npysim.save(build_dir+\"/tfc_w1_a1_for_npysim.onnx\")"
+    "model_for_cppsim = model_for_cppsim.transform(SetExecMode(\"cppsim\"))"
+    "model_for_npysim.save(build_dir+\"/tfc_w1_a1_for_cppsim.onnx\")"
    ]
   },
   {
@@ -362,12 +362,12 @@
    "source": [
     "parent_model = ModelWrapper(build_dir+\"/tfc_w1_a1_dataflow_parent.onnx\")\n",
     "sdp_node = parent_model.graph.node[2]\n",
-    "child_model = build_dir + \"/tfc_w1_a1_for_npysim.onnx\"\n",
+    "child_model = build_dir + \"/tfc_w1_a1_for_cppsim.onnx\"\n",
     "getCustomOp(sdp_node).set_nodeattr(\"model\", child_model)\n",
     "output_dict = oxe.execute_onnx(parent_model, input_dict)\n",
-    "output_npysim = output_dict[list(output_dict.keys())[0]]\n",
+    "output_cppsim = output_dict[list(output_dict.keys())[0]]\n",
     "\n",
-    "if np.isclose(output_npysim, output_golden, atol=1e-3).all():\n",
+    "if np.isclose(output_cppsim, output_golden, atol=1e-3).all():\n",
     "    print(\"Results are the same!\")\n",
     "else:\n",
     "    print(\"The results are not the same!\")"
diff --git a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
index 78fc2ccfc92f9b7ca3ae6beafe7d24bdbfada2bc..ad30282d93034f8d043a05a2172790349c31ec83 100644
--- a/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/hls_synth_res_estimation.py
@@ -54,8 +54,8 @@ def hls_synth_res_estimation(model):
             if code_gen_dir == "":
                 warnings.warn(
                     """Could not find report files, values will be set to zero
-                    for this node. Please run "CodeGen_ipgen" transformation and
-                    "HLSSynth_IPGen" first to generate the report files"""
+                    for this node. Please run "PrepareIP" transformation and
+                    "HLSSynthIP" first to generate the report files"""
                 )
             else:
                 xmlfile = "{}/project_{}/sol1/syn/report/{}_csynth.xml".format(
@@ -71,7 +71,7 @@ def hls_synth_res_estimation(model):
                 else:
                     warnings.warn(
                         """Could not find report files, values will be set to zero
-                        for this node. Please run "CodeGen_ipgen" transformation and
-                        "HLSSynth_IPGen" first to generate the report files"""
+                        for this node. Please run "PrepareIP" transformation and
+                        "HLSSynthIP" first to generate the report files"""
                     )
     return res_dict
diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py
index 8430a56bc2688627f82da6ae92140f5cff82cb60..b3e30a07a96a5590fdb755766c235d2ba99f4caf 100644
--- a/src/finn/custom_op/fpgadataflow/__init__.py
+++ b/src/finn/custom_op/fpgadataflow/__init__.py
@@ -74,7 +74,7 @@ class HLSCustomOp(CustomOp):
     def get_nodeattr_types(self):
         return {
             "backend": ("s", True, "fpgadataflow"),
-            "code_gen_dir_npysim": ("s", False, ""),
+            "code_gen_dir_cppsim": ("s", False, ""),
             "code_gen_dir_ipgen": ("s", False, ""),
             "executable_path": ("s", False, ""),
             "ipgen_path": ("s", False, ""),
@@ -121,7 +121,7 @@ class HLSCustomOp(CustomOp):
         assert (
             code_gen_dir != ""
         ), """Node attribute "code_gen_dir_ipgen" is
-        not set. Please run HLSSynth_IPGen first."""
+        not set. Please run HLSSynthIP first."""
         verilog_file = self.get_verilog_top_filename()
         assert os.path.isfile(verilog_file), "Cannot find top-level Verilog file."
         # build the Verilator emu library
@@ -232,14 +232,14 @@ class HLSCustomOp(CustomOp):
         vlnv = "xilinx.com:hls:%s:1.0" % node.name
         self.set_nodeattr("ip_vlnv", vlnv)
 
-    def code_generation_npysim(self, model):
-        """Generates c++ code for simulation (npysim)."""
+    def code_generation_cppsim(self, model):
+        """Generates c++ code for simulation (cppsim)."""
         node = self.onnx_node
-        path = self.get_nodeattr("code_gen_dir_npysim")
+        path = self.get_nodeattr("code_gen_dir_cppsim")
         self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())]
         self.generate_params(model, path)
         self.global_includes()
-        self.defines("npysim")
+        self.defines("cppsim")
         self.read_npy_data()
         self.strm_decl()
         self.pragmas()
@@ -253,7 +253,7 @@ class HLSCustomOp(CustomOp):
             # transform list into long string separated by '\n'
             code_gen_line = "\n".join(self.code_gen_dict[key])
             template = template.replace(key, code_gen_line)
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w")
         f.write(template)
         f.close()
@@ -262,7 +262,7 @@ class HLSCustomOp(CustomOp):
     def compile_singlenode_code(self):
         """Builds the bash script for compilation using the CppBuilder from
         finn.util.basic and executes the script to produce the executable."""
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         builder = CppBuilder()
         # to enable additional debug features please uncommand the next line
         # builder.append_includes("-DDEBUG")
@@ -284,11 +284,11 @@ class HLSCustomOp(CustomOp):
 
         Count indicates the number of inputs that have to be saved."""
         node = self.onnx_node
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         if code_gen_dir == "":
             raise Exception(
                 """
-Found no codegen dir for this node, did you run the codegen_npysim transformation?
+Found no codegen dir for this node, did you run the prepare_cppsim transformation?
             """
             )
         # create a npy file for each input of the node (in_ind is input index)
@@ -306,7 +306,7 @@ Found no codegen dir for this node, did you run the codegen_npysim transformatio
         the context dictionary."""
         # TODO support multi-output nodes as needed
         node = self.onnx_node
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         output = np.load("{}/output.npy".format(code_gen_dir))
         context[node.output[0]] = output
 
@@ -399,9 +399,9 @@ compilation transformations?
         return outputs
 
     def execute_node(self, context, graph):
-        """Executes single node using npysim or rtlsim."""
+        """Executes single node using cppsim or rtlsim."""
         mode = self.get_nodeattr("exec_mode")
-        if mode == "npysim":
+        if mode == "cppsim":
             # save input(s)
             self.dynamic_input_to_npy(context, 1)
             # execute the precompiled model
@@ -414,7 +414,7 @@ compilation transformations?
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -435,14 +435,14 @@ compilation transformations?
     @abstractmethod
     def global_includes(self):
         """Function to set the global includes for c++ code that has to be generated
-        for npysim or rtlsim, is member function of HLSCustomOp class but has to
+        for cppsim or rtlsim, is member function of HLSCustomOp class but has to
         be filled by every node."""
         pass
 
     @abstractmethod
     def defines(self, var):
         """Function to set the define commands for c++ code that has to be generated
-        for npysim or rtlsim, is member function of HLSCustomOp class but has to
+        for cppsim or rtlsim, is member function of HLSCustomOp class but has to
         be filled by every node.
 
         var: makes it possible to reuse the function for different c++ code generation.
diff --git a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
index 2b469f7b0d6e5ddc3068fa3fd2d6cb487a560d92..e4d106068d4d128c66b2ce5f3d6c925dfe414b90 100644
--- a/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
+++ b/src/finn/custom_op/fpgadataflow/convolutioninputgenerator.py
@@ -177,14 +177,14 @@ class ConvolutionInputGenerator(HLSCustomOp):
         folded_oshape = self.get_folded_output_shape()
 
         # TODO ensure codegen dir exists
-        if mode == "npysim":
-            code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         elif mode == "rtlsim":
             code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -207,14 +207,14 @@ class ConvolutionInputGenerator(HLSCustomOp):
         reshaped_input = inp.copy()
         np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
 
-        if mode == "npysim":
+        if mode == "cppsim":
             # execute the precompiled model
             super().exec_precompiled_singlenode_model()
             # load output npy file
             super().npy_to_dynamic_output(context)
             assert (
                 context[node.output[0]].shape == folded_oshape
-            ), "npysim \
+            ), "cppsim \
             did not produce expected ofolded utput shape"
             context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
         elif mode == "rtlsim":
@@ -241,7 +241,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -277,7 +277,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
         ]
 
     def read_npy_data(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -313,7 +313,7 @@ class ConvolutionInputGenerator(HLSCustomOp):
         ]
 
     def dataoutstrm(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
diff --git a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
index f30871909b1c70f3b5df148f1b6eae22fdbadc25..1ca2c6d29313eb9d978a6ac0454b9226802f55a5 100644
--- a/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingdatawidthconverter_batch.py
@@ -226,7 +226,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
         ]
 
     def read_npy_data(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -260,7 +260,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
         ]
 
     def dataoutstrm(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -313,14 +313,14 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
         folded_ishape = self.get_folded_input_shape()
 
         # TODO ensure codegen dir exists
-        if mode == "npysim":
-            code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         elif mode == "rtlsim":
             code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -343,7 +343,7 @@ class StreamingDataWidthConverter_Batch(HLSCustomOp):
         reshaped_input = reshaped_input.copy()
         np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
 
-        if mode == "npysim":
+        if mode == "cppsim":
             output = inp
             output = np.asarray([output], dtype=np.float32).reshape(*exp_shape)
             context[node.output[0]] = output
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 46920711e13057178be9fca5fe3a18ce3e14feda..3757e3a5f1f29a1d6c88ccc73ce3f3715611cbc0 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -181,7 +181,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         # verify that all necessary attributes exist
         # TODO collect automatically from get_nodeattr_types
         try:
-            self.get_nodeattr("code_gen_dir_npysim")
+            self.get_nodeattr("code_gen_dir_cppsim")
             self.get_nodeattr("executable_path")
             self.get_nodeattr("resType")
             self.get_nodeattr("MW")
@@ -508,10 +508,10 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             f_weights.close()
 
         elif mem_mode == "decoupled":
-            """Saves weights in corresponding file format for npysim or rtlsim"""
+            """Saves weights in corresponding file format for cppsim or rtlsim"""
             # transpose weight tensor from (1, PE, WMEM, SIMD) to (1, WMEM, PE, SIMD)
             # and save as unflipped weight tensor to be able to differentiate between
-            # flipped an unflipped weight tensor (has to be flipped for npysim)
+            # flipped an unflipped weight tensor (has to be flipped for cppsim)
 
             weight_tensor_unflipped = np.transpose(weight_tensor, (0, 2, 1, 3))
 
@@ -613,14 +613,14 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         node = self.onnx_node
 
         # TODO ensure codegen dir exists
-        if mode == "npysim":
-            code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         elif mode == "rtlsim":
             code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -654,7 +654,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 raise Exception("Unexpected input found for StreamingFCLayer")
             in_ind += 1
 
-        if mode == "npysim":
+        if mode == "cppsim":
             # execute the precompiled model
             super().exec_precompiled_singlenode_model()
             # load output npy file
@@ -696,7 +696,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -744,7 +744,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             )
 
     def read_npy_data(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -841,7 +841,7 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             )
 
     def dataoutstrm(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
diff --git a/src/finn/custom_op/fpgadataflow/streamingfifo.py b/src/finn/custom_op/fpgadataflow/streamingfifo.py
index 0a7f143d26fd98c91a34ffcbe5f8fecabc677182..586d38a03f3717d1ea2cffcf7474ca434c9ea505 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfifo.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfifo.py
@@ -121,7 +121,7 @@ class StreamingFIFO(HLSCustomOp):
             # transform list into long string separated by '\n'
             code_gen_line = "\n".join(self.code_gen_dict[key])
             template = template.replace(key, code_gen_line)
-        f = open(os.path.join(verilog_dir, "{}.v".format(self.onnx_node.name,)), "w",)
+        f = open(os.path.join(verilog_dir, "{}.v".format(self.onnx_node.name)), "w")
         f.write(template)
         f.close()
         self.code_gen_dict.clear()
@@ -222,7 +222,7 @@ class StreamingFIFO(HLSCustomOp):
         inp = context[node.input[0]]
         exp_shape = self.get_normal_input_shape()
 
-        if mode == "npysim":
+        if mode == "cppsim":
             output = inp
             output = np.asarray([output], dtype=np.float32).reshape(*exp_shape)
             context[node.output[0]] = output
@@ -243,9 +243,7 @@ class StreamingFIFO(HLSCustomOp):
                 export_idt = DataType[self.get_nodeattr("dataType")]
             # make copy before saving the array
             reshaped_input = reshaped_input.copy()
-            np.save(
-                os.path.join(code_gen_dir, "input_0.npy"), reshaped_input,
-            )
+            np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
             sim = self.get_rtlsim()
             nbits = self.get_instream_width()
             inp = npy_to_rtlsim_input(
@@ -271,7 +269,7 @@ class StreamingFIFO(HLSCustomOp):
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
diff --git a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
index 7334c913b6f85cad4835b6e65eb14c488432af6b..2344e12f7e87634c189563f9cde7b1c861a3606e 100644
--- a/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingmaxpool_batch.py
@@ -171,7 +171,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         ]
 
     def read_npy_data(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -215,7 +215,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
             ]
 
     def dataoutstrm(self):
-        code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_output_datatype()
         if dtype == DataType.BIPOLAR:
             # use binary for bipolar storage
@@ -267,14 +267,14 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         folded_oshape = self.get_folded_output_shape()
 
         # TODO ensure codegen dir exists
-        if mode == "npysim":
-            code_gen_dir = self.get_nodeattr("code_gen_dir_npysim")
+        if mode == "cppsim":
+            code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         elif mode == "rtlsim":
             code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
@@ -296,14 +296,14 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         reshaped_input = inp.copy()
         np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input)
 
-        if mode == "npysim":
+        if mode == "cppsim":
             # execute the precompiled model
             super().exec_precompiled_singlenode_model()
             # load output npy file
             super().npy_to_dynamic_output(context)
             assert (
                 context[node.output[0]].shape == folded_oshape
-            ), "npysim \
+            ), "cppsim \
             did not produce expected ofolded utput shape"
             context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
         elif mode == "rtlsim":
@@ -330,7 +330,7 @@ class StreamingMaxPool_Batch(HLSCustomOp):
         else:
             raise Exception(
                 """Invalid value for attribute exec_mode! Is currently set to: {}
-            has to be set to one of the following value ("npysim", "rtlsim")""".format(
+            has to be set to one of the following value ("cppsim", "rtlsim")""".format(
                     mode
                 )
             )
diff --git a/src/finn/transformation/fpgadataflow/annotate_resources.py b/src/finn/transformation/fpgadataflow/annotate_resources.py
index d192372a7d9c1f6ee2f088c6a058b994d21f6c99..207075b00de1871da19ea78472125d435449ed6e 100644
--- a/src/finn/transformation/fpgadataflow/annotate_resources.py
+++ b/src/finn/transformation/fpgadataflow/annotate_resources.py
@@ -41,7 +41,7 @@ class AnnotateResources(Transformation):
     * 'hls' -- use results from the HLS synthesis report
 
     No annotations can be provided unless the relevant transformation for the
-    chosen mode (e.g. HLSSynth_IPGen for hls) was previously run.
+    chosen mode (e.g. HLSSynthIP for hls) was previously run.
     """
 
     def __init__(self, mode):
diff --git a/src/finn/transformation/fpgadataflow/cleanup.py b/src/finn/transformation/fpgadataflow/cleanup.py
index a31cbfa7dd30eff37ceb2d7bf3c162093a5a3a1c..248a99b57aed7f38f63cc25ad7ecf93bd1930e63 100644
--- a/src/finn/transformation/fpgadataflow/cleanup.py
+++ b/src/finn/transformation/fpgadataflow/cleanup.py
@@ -57,11 +57,11 @@ class CleanUp(Transformation):
                 try:
                     # lookup op_type in registry of CustomOps
                     inst = registry.custom_op[op_type](node)
-                    # delete code_gen_dir from npysim
-                    code_gen_dir = inst.get_nodeattr("code_gen_dir_npysim")
+                    # delete code_gen_dir from cppsim
+                    code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim")
                     if os.path.isdir(code_gen_dir):
                         shutil.rmtree(code_gen_dir)
-                    inst.set_nodeattr("code_gen_dir_npysim", "")
+                    inst.set_nodeattr("code_gen_dir_cppsim", "")
                     inst.set_nodeattr("executable_path", "")
                     # delete code_gen_dir from ipgen and project folder
                     code_gen_dir = inst.get_nodeattr("code_gen_dir_ipgen")
diff --git a/src/finn/transformation/fpgadataflow/compile.py b/src/finn/transformation/fpgadataflow/compile_cppsim.py
similarity index 91%
rename from src/finn/transformation/fpgadataflow/compile.py
rename to src/finn/transformation/fpgadataflow/compile_cppsim.py
index 40c7da8f77efeaa655459402699a401b642b776c..ddf00c799b8a53c428d0854551d0078a6e264111 100644
--- a/src/finn/transformation/fpgadataflow/compile.py
+++ b/src/finn/transformation/fpgadataflow/compile_cppsim.py
@@ -31,12 +31,12 @@ from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import NodeLocalTransformation
 
 
-class Compile(NodeLocalTransformation):
-    """For every node: compile C++ code in node attribute "code_gen_dir_npysim"
+class CompileCppSim(NodeLocalTransformation):
+    """For every node: compile C++ code in node attribute "code_gen_dir_cppsim"
     and save path to executables in node attribute "executable_path".
     All nodes in the graph must have the fpgadataflow backend attribute.
 
-    To use these executables, exec_mode must be set to "npysim" (using transformation
+    To use these executables, exec_mode must be set to "cppsim" (using transformation
     SetExecMode) and the model has to be executed using execute_onnx() from
     finn.core.onnx_exec
 
@@ -55,10 +55,10 @@ class Compile(NodeLocalTransformation):
                 inst = registry.custom_op[op_type](node)
                 # ensure that code is generated
                 assert (
-                    inst.get_nodeattr("code_gen_dir_npysim") != ""
+                    inst.get_nodeattr("code_gen_dir_cppsim") != ""
                 ), """Node
-                attribute "code_gen_dir_npysim" is not set. Please run
-                Transformation CodeGen_npysim first."""
+                attribute "code_gen_dir_cppsim" is not set. Please run
+                Transformation PrepareCppSim first."""
                 # call the compilation function for this node
                 inst.compile_singlenode_code()
                 # ensure that executable path is now set
diff --git a/src/finn/transformation/fpgadataflow/codegen_ipstitch.py b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
similarity index 98%
rename from src/finn/transformation/fpgadataflow/codegen_ipstitch.py
rename to src/finn/transformation/fpgadataflow/create_stitched_ip.py
index bc1fce836a16f49e6549f6b24de2973b902bf066..c22a21ebdfd19178d3937de3a235dfadb7ee1d71 100644
--- a/src/finn/transformation/fpgadataflow/codegen_ipstitch.py
+++ b/src/finn/transformation/fpgadataflow/create_stitched_ip.py
@@ -34,10 +34,10 @@ from finn.util.basic import get_by_name, make_build_dir
 from finn.custom_op.registry import getCustomOp
 
 
-class CodeGen_ipstitch(Transformation):
+class CreateStitchedIP(Transformation):
     """Create a Vivado IP Block Design project from all the generated IPs of a
     graph. All nodes in the graph must have the fpgadataflow backend attribute,
-    and the CodeGen_ipgen transformation must have been previously run on
+    and the PrepareIP transformation must have been previously run on
     the graph. The resulting block design is also packaged as IP. The
     transformation gets the fpgapart as a string.
 
diff --git a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
similarity index 94%
rename from src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
rename to src/finn/transformation/fpgadataflow/hlssynth_ip.py
index 2af623818fe0e830883ef5065e5e7c9c7364ef1e..6d7c4025653948d6958672177ae5e36ab08bf279 100644
--- a/src/finn/transformation/fpgadataflow/hlssynth_ipgen.py
+++ b/src/finn/transformation/fpgadataflow/hlssynth_ip.py
@@ -31,7 +31,7 @@ from finn.util.fpgadataflow import is_fpgadataflow_node
 from finn.transformation import NodeLocalTransformation
 
 
-class HLSSynth_IPGen(NodeLocalTransformation):
+class HLSSynthIP(NodeLocalTransformation):
     """For each node: generate IP block from code in folder
     that is referenced in node attribute "code_gen_dir_ipgen"
     and save path of generated project in node attribute "ipgen_path".
@@ -58,14 +58,14 @@ class HLSSynth_IPGen(NodeLocalTransformation):
                     inst.get_nodeattr("code_gen_dir_ipgen") != ""
                 ), """Node
                 attribute "code_gen_dir_ipgen" is empty. Please run
-                transformation CodeGen_ipgen first."""
+                transformation PrepareIP first."""
                 # call the compilation function for this node
                 inst.ipgen_singlenode_code()
                 # ensure that executable path is now set
                 assert (
                     inst.get_nodeattr("ipgen_path") != ""
                 ), """Transformation
-                HLSSynth_IPGen was not successful. Node attribute "ipgen_path"
+                HLSSynthIP was not successful. Node attribute "ipgen_path"
                 is empty."""
             except KeyError:
                 # exception if op_type is not supported
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_proj.py b/src/finn/transformation/fpgadataflow/make_pynq_proj.py
index 9fe5781ecd3aa885281bde772571d307ad0669c8..429b74bb5ea7e359ea720a0a86706f2c653ee6ce 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_proj.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_proj.py
@@ -40,7 +40,7 @@ class MakePYNQProject(Transformation):
     """Create a Vivado PYNQ overlay project (including the shell infrastructure)
     from the already-stitched IP block for this graph.
     All nodes in the graph must have the fpgadataflow backend attribute,
-    and the CodeGen_ipstitch transformation must have been previously run on
+    and the CreateStitchedIP transformation must have been previously run on
     the graph.
 
     Outcome if successful: sets the vivado_pynq_proj attribute in the ONNX
@@ -59,12 +59,12 @@ class MakePYNQProject(Transformation):
         ipstitch_path = model.get_metadata_prop("vivado_stitch_proj")
         if ipstitch_path is None or (not os.path.isdir(ipstitch_path)):
             raise Exception(
-                "No stitched IPI design found, apply CodeGen_ipstitch first."
+                "No stitched IPI design found, apply CreateStitchedIP first."
             )
         vivado_stitch_vlnv = model.get_metadata_prop("vivado_stitch_vlnv")
         if vivado_stitch_vlnv is None:
             raise Exception(
-                "No vlnv for stitched IP found, apply CodeGen_ipstitch first."
+                "No vlnv for stitched IP found, apply CreateStitchedIP first."
             )
 
         # collect list of all IP dirs
diff --git a/src/finn/transformation/fpgadataflow/codegen_npysim.py b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
similarity index 86%
rename from src/finn/transformation/fpgadataflow/codegen_npysim.py
rename to src/finn/transformation/fpgadataflow/prepare_cppsim.py
index 02200e76db3f9c8207605bb93c4b07f0ebc76cab..a1524322ec03a4e96ef41f999144e3eed349c5af 100644
--- a/src/finn/transformation/fpgadataflow/codegen_npysim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_cppsim.py
@@ -36,35 +36,35 @@ from finn.util.fpgadataflow import is_fpgadataflow_node
 
 def _codegen_single_node(node, model):
     """Calls C++ code generation for one node. Resulting code can be used
-    to simulate node using npysim."""
+    to simulate node using cppsim."""
 
     op_type = node.op_type
     try:
         # lookup op_type in registry of CustomOps
         inst = registry.custom_op[op_type](node)
         # get the path of the code generation directory
-        code_gen_dir = inst.get_nodeattr("code_gen_dir_npysim")
+        code_gen_dir = inst.get_nodeattr("code_gen_dir_cppsim")
         # ensure that there is a directory
         if code_gen_dir == "" or not os.path.isdir(code_gen_dir):
             code_gen_dir = make_build_dir(
-                prefix="code_gen_npysim_" + str(node.name) + "_"
+                prefix="code_gen_cppsim_" + str(node.name) + "_"
             )
-            inst.set_nodeattr("code_gen_dir_npysim", code_gen_dir)
+            inst.set_nodeattr("code_gen_dir_cppsim", code_gen_dir)
         # ensure that there is generated code inside the dir
-        inst.code_generation_npysim(model)
+        inst.code_generation_cppsim(model)
     except KeyError:
         # exception if op_type is not supported
         raise Exception("Custom op_type %s is currently not supported." % op_type)
 
 
-class CodeGen_npysim(Transformation):
+class PrepareCppSim(Transformation):
     """Call custom implementation to generate code for single custom node
     and create folder that contains all the generated files.
     All nodes in the graph must have the fpgadataflow backend attribute.
 
-    Outcome if succesful: Node attribute "code_gen_dir_npysim" contains path to folder
-    that contains generated C++ code that can be used to simulate node using npysim.
-    The subsequent transformation is Compile"""
+    Outcome if succesful: Node attribute "code_gen_dir_cppsim" contains path to folder
+    that contains generated C++ code that can be used to simulate node using cppsim.
+    The subsequent transformation is CompileCppSim"""
 
     def apply(self, model):
         for node in model.graph.node:
diff --git a/src/finn/transformation/fpgadataflow/codegen_ipgen.py b/src/finn/transformation/fpgadataflow/prepare_ip.py
similarity index 97%
rename from src/finn/transformation/fpgadataflow/codegen_ipgen.py
rename to src/finn/transformation/fpgadataflow/prepare_ip.py
index fa7725ae1fa03cc204aa58969d6fbc6cf71e7d97..00182773558ec30ab0271de6599615233785bdd7 100644
--- a/src/finn/transformation/fpgadataflow/codegen_ipgen.py
+++ b/src/finn/transformation/fpgadataflow/prepare_ip.py
@@ -57,7 +57,7 @@ def _codegen_single_node(node, model, fpgapart, clk):
         raise Exception("Custom op_type %s is currently not supported." % op_type)
 
 
-class CodeGen_ipgen(Transformation):
+class PrepareIP(Transformation):
     """Call custom implementation to generate code for single custom node
     and create folder that contains all the generated files.
     All nodes in the graph must have the fpgadataflow backend attribute and
@@ -69,7 +69,7 @@ class CodeGen_ipgen(Transformation):
 
     Outcome if succesful: Node attribute "code_gen_dir_ipgen" contains path to folder
     that contains generated C++ code that can be used to generate a Vivado IP block.
-    The subsequent transformation is HLSSynth_IPGen"""
+    The subsequent transformation is HLSSynthIP"""
 
     def __init__(self, fpgapart, clk):
         super().__init__()
diff --git a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
index baac3e7bdf5ac936a963ac8346f01638657b042c..5f0b89e85dc5f33319f64ef885db20ed9c4046af 100644
--- a/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
+++ b/src/finn/transformation/fpgadataflow/prepare_rtlsim.py
@@ -38,7 +38,7 @@ except ModuleNotFoundError:
 
 
 class PrepareRTLSim(NodeLocalTransformation):
-    """For a graph with generated RTL sources (after HLSSynth_IPGen), create a
+    """For a graph with generated RTL sources (after HLSSynthIP), create a
     Verilator emulation library for each node to prepare for rtlsim
     execution and set the rtlsim_so property to the path to the generated
     emulation library.
diff --git a/src/finn/transformation/fpgadataflow/set_exec_mode.py b/src/finn/transformation/fpgadataflow/set_exec_mode.py
index 83dda7ceccfd26fa1c43ab517ade2e19ccae4a61..40996e5f64fb812ea3766b71a9a8275514dec4a0 100644
--- a/src/finn/transformation/fpgadataflow/set_exec_mode.py
+++ b/src/finn/transformation/fpgadataflow/set_exec_mode.py
@@ -33,7 +33,7 @@ from finn.transformation import Transformation
 
 class SetExecMode(Transformation):
     """Set attribute exec_mode in all fpgadataflow nodes to specify which
-    kind of execution should be used ("npysim" or "rtlsim")"""
+    kind of execution should be used ("cppsim" or "rtlsim")"""
 
     def __init__(self, mode):
         super().__init__()
diff --git a/tests/end2end/test_end2end_cnv_w1a1.py b/tests/end2end/test_end2end_cnv_w1a1.py
index 1725eb3915b692e8f419924856eecb5f85faacf1..d7f59ef35aaf61891937dcaa105cf1392133e732 100644
--- a/tests/end2end/test_end2end_cnv_w1a1.py
+++ b/tests/end2end/test_end2end_cnv_w1a1.py
@@ -55,15 +55,15 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import (
 )
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.replace_verilog_relpaths import (
     ReplaceVerilogRelPaths,
 )
-from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
 from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject
 from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject
@@ -169,8 +169,8 @@ def test_end2end_cnv_w1a1_fold_and_tlastmarker():
 
 def test_end2end_cnv_w1a1_gen_hls_ip():
     model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_folded.onnx")
-    model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_cnv_w1a1_ipgen.onnx")
 
@@ -178,7 +178,7 @@ def test_end2end_cnv_w1a1_gen_hls_ip():
 def test_end2end_cnv_w1a1_ip_stitch():
     model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
-    model = model.transform(CodeGen_ipstitch(test_fpga_part))
+    model = model.transform(CreateStitchedIP(test_fpga_part))
     model.save(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
 
 
@@ -188,13 +188,13 @@ def test_end2end_cnv_w1a1_verify_dataflow_part():
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
     inp_dict = {inp_name: x}
-    # npysim
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
-    model.save(build_dir + "/end2end_cnv_w1a1_ipgen_npysim.onnx")
-    ret_npysim = execute_onnx(model, inp_dict, True)
-    res_npysim = ret_npysim[out_name]
+    # cppsim
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
+    model.save(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx")
+    ret_cppsim = execute_onnx(model, inp_dict, True)
+    res_cppsim = ret_cppsim[out_name]
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
@@ -208,8 +208,8 @@ def test_end2end_cnv_w1a1_verify_dataflow_part():
     os.environ["LIVENESS_THRESHOLD"] = "-1"
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]
-    assert np.isclose(res_npysim, res_rtlsim_nodebynode).all()
-    assert np.isclose(res_npysim, res_rtlsim_whole).all()
+    assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all()
+    assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
 def test_end2end_cnv_w1a1_verify_all():
@@ -231,12 +231,12 @@ def test_end2end_cnv_w1a1_verify_all():
     parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
-    # produce results with npysim
+    # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
-    sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_npysim.onnx")
-    ret_npysim = execute_onnx(parent_model, {iname: x}, True)
-    y_npysim = ret_npysim[oname]
+    sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx")
+    ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
+    y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx"
@@ -251,7 +251,7 @@ def test_end2end_cnv_w1a1_verify_all():
     os.environ["LIVENESS_THRESHOLD"] = "-1"
     ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_whole_rtlsim = ret_whole_rtlsim[oname]
-    assert np.isclose(y_golden, y_npysim).all()
+    assert np.isclose(y_golden, y_cppsim).all()
     assert np.isclose(y_golden, y_nodebynode_rtlsim).all()
     assert np.isclose(y_golden, y_whole_rtlsim).all()
     assert np.argmax(y_golden) == 3
@@ -316,7 +316,7 @@ def test_end2end_cnv_w1a1_run_on_pynq():
         ip = os.environ["PYNQ_IP"]  # NOQA
         if ip == "":
             pytest.skip("PYNQ board IP address not specified")
-        # produce results with npysim
+        # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
         sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx")
diff --git a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
index ea7ce9df81c3e10c951266e79496086f8f46d722..b5f3f4e27ff24723db69f887cb7f1cce9c4df617 100644
--- a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
+++ b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
@@ -46,14 +46,14 @@ from finn.core.throughput_test import throughput_test
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
@@ -155,8 +155,8 @@ def test_end2end_tfc_w1a1_fold_and_tlastmarker():
 
 def test_end2end_tfc_w1a1_gen_hls_ip():
     model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_folded.onnx")
-    model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_tfc_w1a1_ipgen.onnx")
 
@@ -164,7 +164,7 @@ def test_end2end_tfc_w1a1_gen_hls_ip():
 def test_end2end_tfc_w1a1_ip_stitch():
     model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
-    model = model.transform(CodeGen_ipstitch(test_fpga_part))
+    model = model.transform(CreateStitchedIP(test_fpga_part))
     model.save(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx")
 
 
@@ -174,13 +174,13 @@ def test_end2end_tfc_w1a1_verify_dataflow_part():
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
     inp_dict = {inp_name: x}
-    # npysim
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
-    model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_npysim.onnx")
-    ret_npysim = execute_onnx(model, inp_dict, True)
-    res_npysim = ret_npysim[out_name]
+    # cppsim
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
+    model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_cppsim.onnx")
+    ret_cppsim = execute_onnx(model, inp_dict, True)
+    res_cppsim = ret_cppsim[out_name]
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
@@ -192,8 +192,8 @@ def test_end2end_tfc_w1a1_verify_dataflow_part():
     model.save(build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx")
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]
-    assert np.isclose(res_npysim, res_rtlsim_nodebynode).all()
-    assert np.isclose(res_npysim, res_rtlsim_whole).all()
+    assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all()
+    assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
 def test_end2end_tfc_w1a1_verify_all():
@@ -212,12 +212,12 @@ def test_end2end_tfc_w1a1_verify_all():
     parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_dataflow_parent.onnx")
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
-    # produce results with npysim
+    # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
-    sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a1_ipstitch_npysim.onnx")
-    ret_npysim = execute_onnx(parent_model, {iname: x}, True)
-    y_npysim = ret_npysim[oname]
+    sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a1_ipstitch_cppsim.onnx")
+    ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
+    y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w1a1_ipstitch_nodebynode_rtlsim.onnx"
@@ -230,7 +230,7 @@ def test_end2end_tfc_w1a1_verify_all():
     )
     ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_whole_rtlsim = ret_whole_rtlsim[oname]
-    assert np.isclose(y_golden, y_npysim).all()
+    assert np.isclose(y_golden, y_cppsim).all()
     assert np.isclose(y_golden, y_nodebynode_rtlsim).all()
     assert np.isclose(y_golden, y_whole_rtlsim).all()
 
@@ -292,7 +292,7 @@ def test_end2end_tfc_w1a1_run_on_pynq():
         ip = os.environ["PYNQ_IP"]  # NOQA
         if ip == "":
             pytest.skip("PYNQ board IP address not specified")
-        # produce results with npysim
+        # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
         sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a1_pynq_deploy.onnx")
diff --git a/tests/end2end/test_end2end_tfc_w1a2.py b/tests/end2end/test_end2end_tfc_w1a2.py
index 6d0349057fc4b62ce2ebe536f86af3c8161d1612..ecc0d48a6af37bc2bdd48f9306976aa8582ca1b0 100644
--- a/tests/end2end/test_end2end_tfc_w1a2.py
+++ b/tests/end2end/test_end2end_tfc_w1a2.py
@@ -43,14 +43,14 @@ from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
@@ -147,8 +147,8 @@ def test_end2end_tfc_w1a2_fold_and_tlastmarker():
 
 def test_end2end_tfc_w1a2_gen_hls_ip():
     model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_folded.onnx")
-    model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_tfc_w1a2_ipgen.onnx")
 
@@ -156,7 +156,7 @@ def test_end2end_tfc_w1a2_gen_hls_ip():
 def test_end2end_tfc_w1a2_ip_stitch():
     model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
-    model = model.transform(CodeGen_ipstitch(test_fpga_part))
+    model = model.transform(CreateStitchedIP(test_fpga_part))
     model.save(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
 
 
@@ -166,13 +166,13 @@ def test_end2end_tfc_w1a2_verify_dataflow_part():
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
     inp_dict = {inp_name: x}
-    # npysim
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
-    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_npysim.onnx")
-    ret_npysim = execute_onnx(model, inp_dict, True)
-    res_npysim = ret_npysim[out_name]
+    # cppsim
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
+    model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx")
+    ret_cppsim = execute_onnx(model, inp_dict, True)
+    res_cppsim = ret_cppsim[out_name]
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
@@ -184,8 +184,8 @@ def test_end2end_tfc_w1a2_verify_dataflow_part():
     model.save(build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx")
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]
-    assert np.isclose(res_npysim, res_rtlsim_nodebynode).all()
-    assert np.isclose(res_npysim, res_rtlsim_whole).all()
+    assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all()
+    assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
 def test_end2end_tfc_w1a2_verify_all():
@@ -204,12 +204,12 @@ def test_end2end_tfc_w1a2_verify_all():
     parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx")
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
-    # produce results with npysim
+    # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
-    sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_ipstitch_npysim.onnx")
-    ret_npysim = execute_onnx(parent_model, {iname: x}, True)
-    y_npysim = ret_npysim[oname]
+    sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx")
+    ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
+    y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx"
@@ -222,7 +222,7 @@ def test_end2end_tfc_w1a2_verify_all():
     )
     ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_whole_rtlsim = ret_whole_rtlsim[oname]
-    assert np.isclose(y_golden, y_npysim).all()
+    assert np.isclose(y_golden, y_cppsim).all()
     assert np.isclose(y_golden, y_nodebynode_rtlsim).all()
     assert np.isclose(y_golden, y_whole_rtlsim).all()
 
@@ -284,7 +284,7 @@ def test_end2end_tfc_w1a2_run_on_pynq():
         ip = os.environ["PYNQ_IP"]  # NOQA
         if ip == "":
             pytest.skip("PYNQ board IP address not specified")
-        # produce results with npysim
+        # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
         sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx")
diff --git a/tests/end2end/test_end2end_tfc_w2a2.py b/tests/end2end/test_end2end_tfc_w2a2.py
index f1ded9ab0936e728af9b2ccdd771791cbc33c18f..8c13352d9e9d146d58d76b1cf1e17878f27513f5 100644
--- a/tests/end2end/test_end2end_tfc_w2a2.py
+++ b/tests/end2end/test_end2end_tfc_w2a2.py
@@ -43,14 +43,14 @@ from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
 from finn.transformation.fpgadataflow.insert_dwc import InsertDWC
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
@@ -147,8 +147,8 @@ def test_end2end_tfc_w2a2_fold_and_tlastmarker():
 
 def test_end2end_tfc_w2a2_gen_hls_ip():
     model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_folded.onnx")
-    model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_tfc_w2a2_ipgen.onnx")
 
@@ -156,7 +156,7 @@ def test_end2end_tfc_w2a2_gen_hls_ip():
 def test_end2end_tfc_w2a2_ip_stitch():
     model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
-    model = model.transform(CodeGen_ipstitch(test_fpga_part))
+    model = model.transform(CreateStitchedIP(test_fpga_part))
     model.save(build_dir + "/end2end_tfc_w2a2_ipstitch.onnx")
 
 
@@ -166,13 +166,13 @@ def test_end2end_tfc_w2a2_verify_dataflow_part():
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
     inp_dict = {inp_name: x}
-    # npysim
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
-    model.save(build_dir + "/end2end_tfc_w2a2_ipstitch_npysim.onnx")
-    ret_npysim = execute_onnx(model, inp_dict, True)
-    res_npysim = ret_npysim[out_name]
+    # cppsim
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
+    model.save(build_dir + "/end2end_tfc_w2a2_ipstitch_cppsim.onnx")
+    ret_cppsim = execute_onnx(model, inp_dict, True)
+    res_cppsim = ret_cppsim[out_name]
     # node-by-node rtlsim
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(PrepareRTLSim())
@@ -184,8 +184,8 @@ def test_end2end_tfc_w2a2_verify_dataflow_part():
     model.save(build_dir + "/end2end_tfc_w2a2_ipstitch_whole_rtlsim.onnx")
     ret_rtlsim_whole = execute_onnx(model, inp_dict, True)
     res_rtlsim_whole = ret_rtlsim_whole[out_name]
-    assert np.isclose(res_npysim, res_rtlsim_nodebynode).all()
-    assert np.isclose(res_npysim, res_rtlsim_whole).all()
+    assert np.isclose(res_cppsim, res_rtlsim_nodebynode).all()
+    assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
 def test_end2end_tfc_w2a2_verify_all():
@@ -204,12 +204,12 @@ def test_end2end_tfc_w2a2_verify_all():
     parent_model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_dataflow_parent.onnx")
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
-    # produce results with npysim
+    # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
-    sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w2a2_ipstitch_npysim.onnx")
-    ret_npysim = execute_onnx(parent_model, {iname: x}, True)
-    y_npysim = ret_npysim[oname]
+    sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w2a2_ipstitch_cppsim.onnx")
+    ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
+    y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w2a2_ipstitch_nodebynode_rtlsim.onnx"
@@ -222,7 +222,7 @@ def test_end2end_tfc_w2a2_verify_all():
     )
     ret_whole_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_whole_rtlsim = ret_whole_rtlsim[oname]
-    assert np.isclose(y_golden, y_npysim).all()
+    assert np.isclose(y_golden, y_cppsim).all()
     assert np.isclose(y_golden, y_nodebynode_rtlsim).all()
     assert np.isclose(y_golden, y_whole_rtlsim).all()
 
@@ -284,7 +284,7 @@ def test_end2end_tfc_w2a2_run_on_pynq():
         ip = os.environ["PYNQ_IP"]  # NOQA
         if ip == "":
             pytest.skip("PYNQ board IP address not specified")
-        # produce results with npysim
+        # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
         sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w2a2_pynq_deploy.onnx")
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index a9b9cd84de77d3393ff76760528ea53780482bfb..1228a9c79608a1c7eb44900ddb7df54ed900a3c2 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -33,7 +33,7 @@ from onnx import TensorProto, helper
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 
 def test_code_gen_trafo():
@@ -77,9 +77,9 @@ def test_code_gen_trafo():
     W = util.gen_finn_dt_tensor(wdt, (mw, mh))
     model.set_initializer("weights", W)
 
-    model = model.transform(CodeGen_npysim())
+    model = model.transform(PrepareCppSim())
     for node in model.graph.node:
-        code_gen_attribute = util.get_by_name(node.attribute, "code_gen_dir_npysim")
+        code_gen_attribute = util.get_by_name(node.attribute, "code_gen_dir_cppsim")
         tmp_dir = code_gen_attribute.s.decode("UTF-8")
         assert os.path.isdir(
             tmp_dir
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index 7a50888545b7c464d187f77122f89b255e9063da..35eed02f4e71a96f9f4e8957c372f93e6cd7927c 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -33,8 +33,8 @@ from onnx import TensorProto, helper
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 
 
 def test_compilation_trafo():
@@ -78,8 +78,8 @@ def test_compilation_trafo():
     W = util.gen_finn_dt_tensor(wdt, (mw, mh))
     model.set_initializer("weights", W)
 
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
     for node in model.graph.node:
         compilation_attribute = util.get_by_name(node.attribute, "executable_path")
         executable = compilation_attribute.s.decode("UTF-8")
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
index 46c39e45abe88c0a980228655eed7f2e31833a81..220f8a7966a146f954a7fcb3f32058e231b83e23 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
@@ -45,8 +45,8 @@ from finn.transformation.double_to_single_float import DoubleToSingleFloat
 from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.custom_op.registry import getCustomOp
 
@@ -113,9 +113,9 @@ def test_convert_to_hls_layers_cnv_w1a1():
     mp_nodes = model.get_nodes_by_op_type("StreamingMaxPool_Batch")
     assert len(mp_nodes) == 2
     # model.save("cnv-pre-compile.onnx")
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
     # model.save("cnv-post-compile.onnx")
     produced_ctx = oxe.execute_onnx(model, input_dict, True)
     produced = produced_ctx[model.graph.output[0].name]
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
index 1a2d65de001a484fc41dbe63a38194a254ac8490..b7dea03797bc5de5e7517d0d8b816c438027008b 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
@@ -42,8 +42,8 @@ from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.bipolar_to_xnor import ConvertBipolarMatMulToXnorPopcount
 from finn.transformation.fold_constants import FoldConstants
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
 from finn.transformation.infer_shapes import InferShapes
@@ -107,9 +107,9 @@ def test_convert_to_hls_layers_tfc_w1a1():
     fc3w.set_nodeattr("SIMD", 16)
     fc3w.set_nodeattr("PE", 10)
 
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
 
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
@@ -171,9 +171,9 @@ def test_convert_to_hls_layers_tfc_w1a2():
     fc3w = getCustomOp(fc3)
     fc3w.set_nodeattr("SIMD", 16)
     fc3w.set_nodeattr("PE", 10)
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
-    model = model.transform(SetExecMode("npysim"))
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
+    model = model.transform(SetExecMode("cppsim"))
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
     input_tensor = onnx.load_tensor_from_string(raw_i)
     # run using FINN-based execution
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index 7c19ebbfaeed09cb1e367cf6567e5b149aa4236c..02a9acae5e0e90d2a8dfa7d4d4afb03aa11f4239 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -33,10 +33,10 @@ from onnx import TensorProto, helper
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.general import GiveUniqueNodeNames
@@ -134,7 +134,7 @@ def prepare_inputs(input_tensor):
 # Stride
 @pytest.mark.parametrize("stride", [1, 2])
 # execution mode
-@pytest.mark.parametrize("exec_mode", ["npysim", "rtlsim"])
+@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 # input channel parallelism ("SIMD")
 @pytest.mark.parametrize("simd", [1, 2])
 def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd):
@@ -145,15 +145,15 @@ def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride, exec_mode,
         k, ifm_ch, ifm_dim, ofm_dim, simd, stride, idt
     )
 
-    if exec_mode == "npysim":
-        model = model.transform(SetExecMode("npysim"))
-        model = model.transform(CodeGen_npysim())
-        model = model.transform(Compile())
+    if exec_mode == "cppsim":
+        model = model.transform(SetExecMode("cppsim"))
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
     elif exec_mode == "rtlsim":
         model = model.transform(SetExecMode("rtlsim"))
         model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
-        model = model.transform(HLSSynth_IPGen())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+        model = model.transform(HLSSynthIP())
         model = model.transform(PrepareRTLSim())
     else:
         raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")
diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py
index 1465881830b4fec61d1b1aa6e8465a41766fd9de..a230fb4201673e3bf0a31cf9ec82f21250fd9e40 100644
--- a/tests/fpgadataflow/test_fpgadataflow_dwc.py
+++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py
@@ -4,8 +4,8 @@ from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.general import GiveUniqueNodeNames
@@ -65,8 +65,8 @@ def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):
 
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(GiveUniqueNodeNames())
-    model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynthIP())
     model = model.transform(PrepareRTLSim())
     y = oxe.execute_onnx(model, input_dict)["outp"]
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 7552fecd85ee0e36216f6c934d454f057a2a41ce..416d96d5dbfa1125d878eb8339ae38f5d572d1ce 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -38,10 +38,10 @@ from finn.analysis.fpgadataflow.hls_synth_res_estimation import hls_synth_res_es
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.multithreshold import multithreshold
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
@@ -149,7 +149,7 @@ def prepare_inputs(input_tensor, idt, wdt):
 @pytest.mark.parametrize("mw", [16])
 # HLS matrix height (output features)
 @pytest.mark.parametrize("mh", [16])
-def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
+def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
     if nf == -1:
         nf = mh
     if sf == -1:
@@ -190,9 +190,9 @@ def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
         # lookup op_type in registry of CustomOps
         inst = getCustomOp(node)
         inst.set_nodeattr("mem_mode", mem_mode)
-    model = model.transform(SetExecMode("npysim"))
-    model = model.transform(CodeGen_npysim())
-    model = model.transform(Compile())
+    model = model.transform(SetExecMode("cppsim"))
+    model = model.transform(PrepareCppSim())
+    model = model.transform(CompileCppSim())
     # prepare input data
     input_dict = prepare_inputs(x, idt, wdt)
     if wdt == DataType.BIPOLAR and idt == DataType.BIPOLAR:
@@ -215,7 +215,7 @@ def test_fpgadataflow_fclayer_npysim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 
     y_produced = y_produced.reshape(y_expected.shape)
 
-    assert (y_produced == y_expected).all(), "npysim failed"
+    assert (y_produced == y_expected).all(), "cppsim failed"
 
 
 # mem_mode: const or decoupled
@@ -297,8 +297,8 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
     # works for parametrized tests...
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(GiveUniqueNodeNames())
-    model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynthIP())
     model = model.transform(ReplaceVerilogRelPaths())
     model = model.transform(PrepareRTLSim())
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
@@ -389,8 +389,8 @@ def test_fpgadataflow_fclayer_large_depth_decoupled_mode(
     # works for parametrized tests...
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(GiveUniqueNodeNames())
-    model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+    model = model.transform(HLSSynthIP())
     model = model.transform(ReplaceVerilogRelPaths())
     model = model.transform(PrepareRTLSim())
     y_produced = oxe.execute_onnx(model, input_dict)["outp"]
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index 8ab4809928d91d8456b7720f897763b206c4e5f5..fe27d7d4273be2b938e5bf70338bb374ce16b6b2 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -5,10 +5,10 @@ from onnx import TensorProto, helper
 
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.general import GiveUniqueNodeNames
@@ -87,8 +87,8 @@ def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):
     model = model.transform(SetExecMode("rtlsim"))
     model = model.transform(InsertTLastMarker())
     model = model.transform(GiveUniqueNodeNames())
-    model = model.transform(CodeGen_ipgen(test_fpga_part, target_clk_ns))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
+    model = model.transform(HLSSynthIP())
     model = model.transform(PrepareRTLSim())
     y = oxe.execute_onnx(model, input_dict)["outp"]
     assert (
@@ -98,7 +98,7 @@ def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):
     assert y.shape == tuple(Shape), """The output shape is incorrect."""
 
     model = model.transform(ReplaceVerilogRelPaths())
-    model = model.transform(CodeGen_ipstitch(test_fpga_part))
+    model = model.transform(CreateStitchedIP(test_fpga_part))
     model = model.transform(MakePYNQProject(test_pynq_board))
     model = model.transform(SynthPYNQProject())
     model = model.transform(MakePYNQDriver())
diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
index af0c7b0755c7aad5dd145ea5ea8ace59941dd74a..f26ba428bf4cbe174c048dcd35a4d63dc58519ab 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
@@ -37,12 +37,12 @@ from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_ipstitch import CodeGen_ipstitch
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
 from finn.transformation.fpgadataflow.create_dataflow_partition import (
     CreateDataflowPartition,
 )
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
 from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
 from finn.transformation.fpgadataflow.make_pynq_driver import MakePYNQDriver
@@ -208,8 +208,8 @@ def test_fpgadataflow_ipstitch_gen_model():  # exec_mode):
         model.set_metadata_prop("exec_mode", "remote_pynq")
     model = model.transform(InsertTLastMarker())
     model = model.transform(GiveUniqueNodeNames())
-    model = model.transform(CodeGen_ipgen(test_fpga_part, 5))
-    model = model.transform(HLSSynth_IPGen())
+    model = model.transform(PrepareIP(test_fpga_part, 5))
+    model = model.transform(HLSSynthIP())
     assert model.graph.node[0].op_type == "StreamingFCLayer_Batch"
     assert model.graph.node[-1].op_type == "TLastMarker"
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx")
@@ -220,7 +220,7 @@ def test_fpgadataflow_ipstitch_do_stitch():
         ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx"
     )
     model = model.transform(rvp.ReplaceVerilogRelPaths())
-    model = model.transform(CodeGen_ipstitch(test_fpga_part))
+    model = model.transform(CreateStitchedIP(test_fpga_part))
     vivado_stitch_proj_dir = model.get_metadata_prop("vivado_stitch_proj")
     assert vivado_stitch_proj_dir is not None
     assert os.path.isdir(vivado_stitch_proj_dir)
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index a7a731aaa5593a9fd680061d2b8ad3fc47e9f490..ac4ab33469c7720c3d7b9f30f5d13be888e1439d 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -33,10 +33,10 @@ from onnx import TensorProto, helper
 import finn.core.onnx_exec as oxe
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
-from finn.transformation.fpgadataflow.codegen_ipgen import CodeGen_ipgen
-from finn.transformation.fpgadataflow.codegen_npysim import CodeGen_npysim
-from finn.transformation.fpgadataflow.compile import Compile
-from finn.transformation.fpgadataflow.hlssynth_ipgen import HLSSynth_IPGen
+from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
 from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.general import GiveUniqueNodeNames
@@ -120,7 +120,7 @@ def prepare_inputs(input_tensor):
 # input channels
 @pytest.mark.parametrize("ifm_ch", [1, 2])  # , 2, 3, 4])
 # execution mode
-@pytest.mark.parametrize("exec_mode", ["rtlsim", "npysim"])
+@pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
 def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
     stride = k
     ofm_dim = int(((ifm_dim - k) / stride) + 1)
@@ -136,15 +136,15 @@ def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
 
     model = make_single_streamingmaxpool_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, idt)
 
-    if exec_mode == "npysim":
-        model = model.transform(SetExecMode("npysim"))
-        model = model.transform(CodeGen_npysim())
-        model = model.transform(Compile())
+    if exec_mode == "cppsim":
+        model = model.transform(SetExecMode("cppsim"))
+        model = model.transform(PrepareCppSim())
+        model = model.transform(CompileCppSim())
     elif exec_mode == "rtlsim":
         model = model.transform(SetExecMode("rtlsim"))
         model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(CodeGen_ipgen("xc7z020clg400-1", 5))
-        model = model.transform(HLSSynth_IPGen())
+        model = model.transform(PrepareIP("xc7z020clg400-1", 5))
+        model = model.transform(HLSSynthIP())
         model = model.transform(PrepareRTLSim())
     else:
         raise Exception("Unknown exec_mode in test_fpgadataflow_slidingwindow")