diff --git a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
index 027524dfdc3fdd45a37892bd1b0a510b5b3866a7..820ded8e25699fd8c4b5239f5f6306d6b6804702 100644
--- a/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
+++ b/src/finn/custom_op/fpgadataflow/channelwise_op_batch.py
@@ -41,18 +41,17 @@ from finn.util.data_packing import (
 )
 from . import templates
 
-# ONNX i/o tensor shape assumptions for Thresholding:
+# ONNX i/o tensor shape assumptions for channelwise ops:
 # input 0 is the input tensor, shape (..., NumChannels)
-# input 1 is the threshold tensor, shape (NumChannels, n_thres)
+# input 1 is the channelwise parameter tensor, shape (NumChannels, params_per_channel)
 # output 0 is the output tensor, shape (..., NumChannels) - same as input
 # the ... here can be any shape (representing groups of vectors)
 
-# by setting Func appropriately, this function can implement
-# any channel-wise operation, including Add, Mul, Thresholding
-
-
 class ChannelwiseOp_Batch(HLSCustomOp):
-    """Class that corresponds to finn-hls Thresholding_Batch function."""
+    """Class that corresponds to finn-hls Thresholding_Batch function.
+    It can implement a variety of channel-wise parametrized operations, including Add, Mul and 
+    multi-thresholding.
+    """
 
     def __init__(self, onnx_node):
         super().__init__(onnx_node)
@@ -60,10 +59,12 @@ class ChannelwiseOp_Batch(HLSCustomOp):
 
     def get_nodeattr_types(self):
         my_attrs = {
+            # channelwise "map" function to apply:
+            # one of cmp_le, cmp_ge, add, mul
             "Func": ("s", False, "cmp_le"),
             "PE": ("i", True, 0),
             "NumChannels": ("i", True, 0),
-            # string defining memory type
+            # string defining memory resource type for parameters
             "ram_style": ("s", False, "distributed"),
             # FINN DataTypes for inputs, weights, outputs
             "inputDataType": ("s", True, ""),
@@ -81,7 +82,8 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         return my_attrs
 
     def calc_tmem(self):
-        """Calculates and returns TMEM."""
+        """Calculates and returns TMEM, the depth of the memory used
+        to store the channelwise op parameters."""
         chn = self.get_nodeattr("NumChannels")
         pe = self.get_nodeattr("PE")
         return chn // pe
@@ -107,7 +109,8 @@ class ChannelwiseOp_Batch(HLSCustomOp):
         # check input datatype against property
         idt_name = self.get_input_datatype().name
         exp_idt_name = self.get_nodeattr("inputDataType")
-        assert exp_idt_name == idt_name, "Bad input DataType for Thresholding layer"
+        assert exp_idt_name == idt_name, "Bad input DataType for ChannelwiseOp layer"
+        # TODO: dynamically infer/update odt based on idt as done in ConvertToHLSLayers?
         # set output datatype from property
         odt = self.get_output_datatype()
         model.set_tensor_datatype(node.output[0], odt)
@@ -534,8 +537,8 @@ class ChannelwiseOp_Batch(HLSCustomOp):
             "#pragma HLS INTERFACE ap_ctrl_none port=return"
         )
 
-        # the threshold tensor is acc_type [PE][TMEM][N_THRES]
-        # partition for parallel access along PE and N_THRES
+        # the channelwise parameter tensor is acc_type [PE][TMEM][N_PARAMS_PER_CHANNEL]
+        # partition for parallel access along PE and N_PARAMS_PER_CHANNEL
         # dimensions (dims 1 and 3)
         self.code_gen_dict["$PRAGMAS$"].append(
             (
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 0cd3471ed18d4877c45f514a235f32fb4974faef..c34a7ef23d60a41f80922fcd48ad5db0bf1d2bf7 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -577,6 +577,7 @@ class InferChannelwiseLinearLayer(Transformation):
                     else:
                         odt = DataType.get_smallest_possible(idt.max() * idt.max())
                 model.set_initializer(ll_const, ll_cinit.reshape(ch))
+                model.set_tensor_datatype(ll_output, odt)
                 # create and insert node
                 new_node = helper.make_node(
                     "ChannelwiseOp_Batch",
diff --git a/src/finn/util/onnx.py b/src/finn/util/onnx.py
index 6a56f0cdcc85cd81a0c448971ab625268e8408d2..4d7cdd126ededac887639a932c2021ef5f081c02 100644
--- a/src/finn/util/onnx.py
+++ b/src/finn/util/onnx.py
@@ -41,9 +41,10 @@ def valueinfo_to_tensor(vi):
 
 
 def nchw_to_nhwc(t, model, idx, reverse=False):
-    """Converts a NCHW <-> NHWC by inserting a transpose. Input t is assumed NCHW.
-    By default we insert a transpose NCHW -> NHWC, but if reverse is true,
-    we convert NHWC -> NCHW"""
+    """Converts between NCHW <-> NHWC layouts for tensor t by inserting a transpose. 
+    If reverse=False, t is assumed NCHW and we insert transpose to convert NCHW -> NHWC
+    If reverse=True, t is assumed NHWC and we insert transpose to convert NHWC -> NCHW.
+    """
     graph = model.graph
     # create new NHWC tensor
     t_shape = model.get_tensor_shape(t)
diff --git a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
index 05d8e28498316f0a76da83cd70611801fdb37846..6a69d8180a4a9825a83b419666bbccb9f203a7d8 100644
--- a/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
+++ b/tests/fpgadataflow/test_fpgadataflow_channelwise_ops.py
@@ -89,7 +89,7 @@ def make_modelwrapper(C, pe, idt, odt, func, vecs):
 # input datatype
 @pytest.mark.parametrize("idt", [DataType.INT4])
 # folding, -1 is maximum possible
-@pytest.mark.parametrize("nf", [-1, 2, 1])
+@pytest.mark.parametrize("nf", [-1, 2])
 # number of input features
 @pytest.mark.parametrize("ich", [16])
 # vecs