diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py
index 07eda6aa1d82df0a9f9a01d4f17f7880a8cf8b26..3533fd13399a4ba4392d66af785979afc32cab29 100644
--- a/src/finn/core/throughput_test.py
+++ b/src/finn/core/throughput_test.py
@@ -157,8 +157,8 @@ def throughput_test_rtlsim(model, batchsize=100):
     res["cycles"] = cycles
     res["runtime[ms]"] = runtime_s * 1000
     res["throughput[images/s]"] = batchsize / runtime_s
-    res["DRAM_in_bandwidth[Mb/s]"] = i_bytes * 0.000001 / runtime_s
-    res["DRAM_out_bandwidth[Mb/s]"] = o_bytes * 0.000001 / runtime_s
+    res["DRAM_in_bandwidth[MB/s]"] = i_bytes * 0.000001 / runtime_s
+    res["DRAM_out_bandwidth[MB/s]"] = o_bytes * 0.000001 / runtime_s
     res["fclk[mhz]"] = fclk_mhz
     res["N"] = batchsize
 
diff --git a/src/finn/qnn-data/templates/driver/driver_base.py b/src/finn/qnn-data/templates/driver/driver_base.py
index 497477da9d4cff736dc32eb27532e658890d5cc7..2096760580b4f33ba1ab09564ebba1601c4dc23c 100644
--- a/src/finn/qnn-data/templates/driver/driver_base.py
+++ b/src/finn/qnn-data/templates/driver/driver_base.py
@@ -439,13 +439,13 @@ class FINNExampleOverlay(Overlay):
         total_in = 0
         for i in range(self.num_inputs):
             total_in += np.prod(self.ishape_packed(i))
-        res["DRAM_in_bandwidth[Mb/s]"] = total_in * 0.000001 / runtime
+        res["DRAM_in_bandwidth[MB/s]"] = total_in * 0.000001 / runtime
         total_out = 0
         for o in range(self.num_outputs):
             total_out += np.prod(self.oshape_packed(o))
-        res["DRAM_out_bandwidth[Mb/s]"] = total_out * 0.000001 / runtime
+        res["DRAM_out_bandwidth[MB/s]"] = total_out * 0.000001 / runtime
         for iwdma, iwbuf, iwdma_name in self.external_weights:
-            res["DRAM_extw_%s_bandwidth[Mb/s]" % iwdma_name] = (
+            res["DRAM_extw_%s_bandwidth[MB/s]" % iwdma_name] = (
                 self.batch_size * np.prod(iwbuf.shape) * 0.000001 / runtime
             )
         if self.platform == "zynq-iodma":
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index 863523605580ef77559b65a1abd72802daff187d..dce98e54a3d62d72b83ebed21aa0604f0f6fa8ce 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -118,12 +118,21 @@ class MakePYNQDriver(Transformation):
         files_to_copy.append(
             (qonnx_path + "/util/basic.py", qonnx_target_path + "/util/basic.py")
         )
+        files_to_copy.append(
+            (qonnx_path + "/util/__init__.py", qonnx_target_path + "/util/__init__.py")
+        )
         files_to_copy.append(
             (
                 finn_util_path + "/data_packing.py",
                 finn_target_path + "/util/data_packing.py",
             )
         )
+        files_to_copy.append(
+            (
+                finn_util_path + "/__init__.py",
+                finn_target_path + "/util/__init__.py",
+            )
+        )
         for (src_file, target_file) in files_to_copy:
             shutil.copy(src_file, target_file)
         # extract input-output shapes from the graph
diff --git a/tests/end2end/test_end2end_bnn_pynq.py b/tests/end2end/test_end2end_bnn_pynq.py
index ab82a00c234b48ced48f3987d929bb1f340083f5..782e18fbc38ec8e1a9cc1d2facaba5b38d3c947d 100644
--- a/tests/end2end/test_end2end_bnn_pynq.py
+++ b/tests/end2end/test_end2end_bnn_pynq.py
@@ -803,7 +803,7 @@ class TestEnd2End:
         ret_str += "\n" + "Raw data:"
 
         ret_str += "\n" + "{:<8} {:<16} {:<16} {:<16} {:<16} {:<16}".format(
-            "N", "runtime[ms]", "fclk[mhz]", "fps", "DRAM rd[Mb/s]", "DRAM wr[Mb/s]"
+            "N", "runtime[ms]", "fclk[mhz]", "fps", "DRAM rd[MB/s]", "DRAM wr[MB/s]"
         )
         for k in bsize_range:
             v = ret[k]
@@ -812,8 +812,8 @@ class TestEnd2End:
                 np.round(v["runtime[ms]"], 4),
                 v["fclk[mhz]"],
                 np.round(v["throughput[images/s]"], 2),
-                np.round(v["DRAM_in_bandwidth[Mb/s]"], 2),
-                np.round(v["DRAM_out_bandwidth[Mb/s]"], 2),
+                np.round(v["DRAM_in_bandwidth[MB/s]"], 2),
+                np.round(v["DRAM_out_bandwidth[MB/s]"], 2),
             )
         ret_str += "\n" + "-----------------------------"
         warnings.warn(ret_str)