diff --git a/finn-rtllib/memstream/hdl/memstream_singleblock.v b/finn-rtllib/memstream/hdl/memstream_singleblock.v
index 6bb3a97115325d81d4292c5af3c33921c2680a30..c9b8770aaa58dc3355bc259e5c5fece702125490 100644
--- a/finn-rtllib/memstream/hdl/memstream_singleblock.v
+++ b/finn-rtllib/memstream/hdl/memstream_singleblock.v
@@ -192,7 +192,11 @@ end else begin: bypass
 
 reg [MEM_WIDTH-1:0] singleval[0:0];
 initial begin
-    $readmemh({MEM_INIT,"memblock_0.dat"}, singleval, 0, 0);
+    `ifdef SYNTHESIS
+        $readmemh({MEM_INIT,"memblock_synth_0.dat"}, singleval, 0, 0);
+    `else
+        $readmemh({MEM_INIT,"memblock_sim_0.dat"}, singleval, 0, 0);
+    `endif
 end
 
 always @(posedge aclk)
diff --git a/finn-rtllib/memstream/hdl/ramb18_sdp.v b/finn-rtllib/memstream/hdl/ramb18_sdp.v
index 63a349f7d56197a9b5a66c837a2f003a6e8475e6..8d2fbf9a988c0e9702e1ed83f2b4e79efb1c5a85 100644
--- a/finn-rtllib/memstream/hdl/ramb18_sdp.v
+++ b/finn-rtllib/memstream/hdl/ramb18_sdp.v
@@ -71,15 +71,15 @@ initial begin
 	//MEM_INIT path must be terminated by /
   `ifdef SYNTHESIS
   if (ID < 10)
-    $readmemh({MEM_INIT,"memblock_",idx+8'd48,".dat"}, mem, 0, DEPTH-1);
+    $readmemh({MEM_INIT,"memblock_synth_",idx+8'd48,".dat"}, mem, 0, DEPTH-1);
   else
-    $readmemh({MEM_INIT,"memblock_",(idx/10)+8'd48,(idx%10)+8'd48,".dat"}, mem, 0, DEPTH-1);
+    $readmemh({MEM_INIT,"memblock_synth_",(idx/10)+8'd48,(idx%10)+8'd48,".dat"}, mem, 0, DEPTH-1);
   `else
   $sformat(idx,"%0d",ID);
   if (ID < 10)
-    $readmemh({MEM_INIT,"memblock_",idx[7:0],".dat"}, mem, 0, DEPTH-1);
+    $readmemh({MEM_INIT,"memblock_sim_",idx[7:0],".dat"}, mem, 0, DEPTH-1);
   else
-    $readmemh({MEM_INIT,"memblock_",idx,".dat"}, mem, 0, DEPTH-1);
+    $readmemh({MEM_INIT,"memblock_sim_",idx,".dat"}, mem, 0, DEPTH-1);
   `endif
 end
 
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 3f4103b4380f8d1838910b37e966e8363891d39f..96a3139dcffb83aaca0303546c49e5b8cf73424b 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -818,16 +818,28 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             self.make_weight_file(weights, "decoupled_npy", weight_filename_sim)
             if mem_mode == "decoupled":
                 # also save weights as Verilog .dat file
-                weight_filename_rtl = "{}/memblock_0.dat".format(code_gen_dir)
+                # note that we provide two different .dat files, one for synth
+                # and one for synthesis. this is because URAM-based weights always
+                # need zero weights for synthesis, otherwise they get inferred
+                # as BRAM
+                weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(
+                    code_gen_dir
+                )
+                weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir)
+                # sim weights are always the true weights
+                self.make_weight_file(
+                    weights, "decoupled_verilog_dat", weight_filename_rtl_sim
+                )
                 ram_style = self.get_nodeattr("ram_style")
                 if ram_style == "ultra":
                     # UltraRAM must have no memory initializer, or only zeroes
                     # otherwise BRAM will be inferred instead of URAM
                     # as a workaround we provide a zero-weight init here
-                    # TODO handle this in Verilog with an if statement
-                    weights = np.zeros_like(weights)
+                    synth_weights = np.zeros_like(weights)
+                else:
+                    synth_weights = weights
                 self.make_weight_file(
-                    weights, "decoupled_verilog_dat", weight_filename_rtl
+                    synth_weights, "decoupled_verilog_dat", weight_filename_rtl_synth
                 )
         else:
             raise Exception(
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index 173882bf929611f6cd9f560f48a46dfe09430622..707289d393e2486780aed2c4af336dd3bafd37a6 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -465,9 +465,26 @@ class Thresholding_Batch(HLSCustomOp):
             weight_filename_sim = "{}/thresholds.npy".format(code_gen_dir)
             self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim)
             # also save weights as Verilog .dat file
-            weight_filename_rtl = "{}/memblock_0.dat".format(code_gen_dir)
+            # note that we provide two different .dat files, one for synth
+            # and one for synthesis. this is because URAM-based weights always
+            # need zero weights for synthesis, otherwise they get inferred
+            # as BRAM
+            weight_filename_rtl_synth = "{}/memblock_synth_0.dat".format(code_gen_dir)
+            weight_filename_rtl_sim = "{}/memblock_sim_0.dat".format(code_gen_dir)
+            # sim weights are always the true weights
             self.make_weight_file(
-                thresholds, "decoupled_verilog_dat", weight_filename_rtl
+                thresholds, "decoupled_verilog_dat", weight_filename_rtl_sim
+            )
+            ram_style = self.get_nodeattr("ram_style")
+            if ram_style == "ultra":
+                # UltraRAM must have no memory initializer, or only zeroes
+                # otherwise BRAM will be inferred instead of URAM
+                # as a workaround we provide a zero-weight init here
+                synth_thresholds = np.zeros_like(thresholds)
+            else:
+                synth_thresholds = thresholds
+            self.make_weight_file(
+                synth_thresholds, "decoupled_verilog_dat", weight_filename_rtl_synth
             )
         else:
             raise Exception("Unrecognized mem_mode")