diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index f3f56ed775b6563834aefaaef2c90cb7c8c9c58d..5d7806701e6664b860167175448001a3d0b54a0a 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -15,7 +15,7 @@ gecho () {
 # the repos themselves are cloned in the Dockerfile
 BREVITAS_COMMIT=989cdfdba4700fdd900ba0b25a820591d561c21a
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
-HLSLIB_COMMIT=c9930f455b27b4dfa0677471ba20ad601ba56118
+HLSLIB_COMMIT=1893584c83dc4500fd92733d500e80903bab1d5d
 PYVERILATOR_COMMIT=1d89cb0d4e0c97469cc6352c611f876ec13edfa6
 PYNQSHELL_COMMIT=0c82a61b0ec1a07fa275a14146233824ded7a13d
 
diff --git a/src/finn/custom_op/fpgadataflow/sameresize_batch.py b/src/finn/custom_op/fpgadataflow/fmpadding.py
similarity index 86%
rename from src/finn/custom_op/fpgadataflow/sameresize_batch.py
rename to src/finn/custom_op/fpgadataflow/fmpadding.py
index c459cac1e9c17336200a1fc85aad2af5e14e2c61..fa321dfa65d14b67fa218fb6a49f602ddab8d57e 100644
--- a/src/finn/custom_op/fpgadataflow/sameresize_batch.py
+++ b/src/finn/custom_op/fpgadataflow/fmpadding.py
@@ -6,27 +6,40 @@ from finn.custom_op.fpgadataflow import HLSCustomOp
 from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy
 
 
-class SameResize_Batch(HLSCustomOp):
-    """Class that corresponds to finn-hlslib SameResize function.
-    Implements 'same' padding on a given input image."""
+class FMPadding_Batch(HLSCustomOp):
+    """Corresponds to finn-hlslib FMPadding_Batch function.
+    Pads input image by given amount."""
 
     def __init__(self, onnx_node):
         super().__init__(onnx_node)
 
     def get_nodeattr_types(self):
         my_attrs = {
+            # spatial size of input images
             "ImgDim": ("i", True, 0),
-            "KernelDim": ("i", True, 0),
-            "Stride": ("i", True, 0),
+            # total padding (per dimension) to apply
+            "Padding": ("i", True, 2),
+            # number of channels in input image
             "NumChannels": ("i", True, 0),
             # FINN input datatype
             "inputDataType": ("s", True, ""),
-            # distribution of added values to achieve "same" padding
-            "PaddingStyle": ("i", True, 2),
+            # controls distribution of padded pixels
+            # in case of uneven padding -- see FMPadding fxn
+            # in hlslib
+            "PaddingStyle": ("i", False, 2),
+            # shape describing input vecs per execution
+            "numInputVectors": ("i", False, 1),
         }
         my_attrs.update(super().get_nodeattr_types())
         return my_attrs
 
+    def get_padded_odim(self):
+        "Return the padded spatial size of the output."
+
+        idim = self.get_nodeattr("ImgDim")
+        pad = self.get_nodeattr("Padding")
+        return idim + pad
+
     def get_normal_input_shape(self):
         idim = self.get_nodeattr("ImgDim")
         num_ch = self.get_nodeattr("NumChannels")
@@ -35,14 +48,8 @@ class SameResize_Batch(HLSCustomOp):
         return ishape
 
     def get_normal_output_shape(self):
-        idim = self.get_nodeattr("ImgDim")
+        odim = self.get_padded_odim()
         num_ch = self.get_nodeattr("NumChannels")
-        kdim = self.get_nodeattr("KernelDim")
-        stride = self.get_nodeattr("Stride")
-        assert idim % stride == 0, "Stride must divide input dimension."
-        # number of "same" windows over the input data
-        same_windows = idim // stride
-        odim = kdim + stride * (same_windows - 1)
 
         oshape = (1, odim, odim, num_ch)
         return oshape
@@ -87,7 +94,7 @@ class SameResize_Batch(HLSCustomOp):
         # data type stays the same
         dtype = model.get_tensor_datatype(node.input[0])
         exp_idtype = self.get_input_datatype()
-        assert dtype == exp_idtype, "Unexpected datatype for SameResize_Batch"
+        assert dtype == exp_idtype, "Unexpected datatype for FMPadding_Batch"
         model.set_tensor_datatype(node.output[0], dtype)
 
     def verify_node(self):
@@ -96,9 +103,9 @@ class SameResize_Batch(HLSCustomOp):
     def get_input_datatype(self):
         """Returns FINN DataType of input."""
         ret = DataType[self.get_nodeattr("inputDataType")]
-        # the hlslib op always pads with zeroes, so ensure that the DataType
-        # is able to represent zeroes
-        assert ret.allowed(0), "SameResize_Batch DataType must support zero"
+        # the hlslib op always pads with zeros, so ensure that the DataType
+        # is able to represent zeros
+        assert ret.allowed(0), "FMPadding_Batch DataType must support zero"
         return ret
 
     def get_output_datatype(self):
@@ -125,18 +132,16 @@ class SameResize_Batch(HLSCustomOp):
         self.code_gen_dict["$GLOBALS$"] = ['#include "streamtools.h"']
 
     def defines(self, var):
-        numReps = 1
-        assert self.get_nodeattr("PaddingStyle") == 2, "Only PaddingStyle=2 supported"
         self.code_gen_dict["$DEFINES$"] = [
-            """#define ImgDim1 {}\n #define KernelDim1 {}\n
-            #define Stride1 {}\n #define NumChannels1 {}\n
-            #define PaddingStyle1 {}\n #define numReps {}""".format(
+            """#define ImgDim1 {}\n#define OutputDim1 {}\n
+            #define Padding1 {}\n#define NumChannels1 {}\n
+            #define PaddingStyle1 {}\n#define numReps {}\n""".format(
                 self.get_nodeattr("ImgDim"),
-                self.get_nodeattr("KernelDim"),
-                self.get_nodeattr("Stride"),
+                self.get_padded_odim(),
+                self.get_nodeattr("Padding"),
                 self.get_nodeattr("NumChannels"),
                 self.get_nodeattr("PaddingStyle"),
-                numReps,
+                self.get_nodeattr("numInputVectors"),
             )
         ]
 
@@ -171,8 +176,8 @@ class SameResize_Batch(HLSCustomOp):
         in_t = self.get_input_datatype().get_hls_datatype_str()
         node = self.onnx_node
         self.code_gen_dict["$DOCOMPUTE$"] = [
-            """{}<ImgDim1, KernelDim1, Stride1, NumChannels1,
-                {}, PaddingStyle1> (in0, out, numReps);""".format(
+            """{}<ImgDim1, OutputDim1, Padding1, NumChannels1,
+            {}, PaddingStyle1> (in0, out, numReps);""".format(
                 node.op_type, in_t
             )
         ]
@@ -261,8 +266,7 @@ class SameResize_Batch(HLSCustomOp):
             super().npy_to_dynamic_output(context)
             assert (
                 context[node.output[0]].shape == folded_oshape
-            ), "cppsim \
-            did not produce expected ofolded utput shape"
+            ), "cppsim did not produce expected folded output shape"
             context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape)
         elif mode == "rtlsim":
             sim = self.get_rtlsim()
diff --git a/src/finn/custom_op/registry.py b/src/finn/custom_op/registry.py
index 238829e03353d79fab7c51e7d1b9dca6e2a96a11..614a3d7ffd70d0b102bad2b76177a2d3b32765c7 100644
--- a/src/finn/custom_op/registry.py
+++ b/src/finn/custom_op/registry.py
@@ -44,7 +44,7 @@ from finn.custom_op.fpgadataflow.streamingdatawidthconverter_batch import (
     StreamingDataWidthConverter_Batch,
 )
 from finn.custom_op.fpgadataflow.globalaccpool_batch import GlobalAccPool_Batch
-from finn.custom_op.fpgadataflow.sameresize_batch import SameResize_Batch
+from finn.custom_op.fpgadataflow.fmpadding import FMPadding_Batch
 from finn.custom_op.fpgadataflow.thresholding_batch import Thresholding_Batch
 from finn.custom_op.fpgadataflow.addstreams_batch import AddStreams_Batch
 from finn.custom_op.fpgadataflow.labelselect_batch import LabelSelect_Batch
@@ -65,7 +65,7 @@ custom_op["MaxPoolNHWC"] = MaxPoolNHWC
 custom_op["StreamingDataWidthConverter_Batch"] = StreamingDataWidthConverter_Batch
 custom_op["StreamingFIFO"] = StreamingFIFO
 custom_op["GlobalAccPool_Batch"] = GlobalAccPool_Batch
-custom_op["SameResize_Batch"] = SameResize_Batch
+custom_op["FMPadding_Batch"] = FMPadding_Batch
 custom_op["Thresholding_Batch"] = Thresholding_Batch
 custom_op["AddStreams_Batch"] = AddStreams_Batch
 custom_op["LabelSelect_Batch"] = LabelSelect_Batch
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 3ff86cab48d365c10e69bc2c764e8083c6a36880..d421a5f3ef8ca980b399087de1482b2ae913da1b 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -26,7 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from onnx import helper
+from onnx import helper, TensorProto
 
 from finn.core.datatype import DataType
 from finn.transformation import Transformation
@@ -59,27 +59,61 @@ class InferConvInpGen(Transformation):
                 ifm_ch = i2c_in_shape[-1]
                 ifm_dim = i2c_in_shape[1]
                 ofm_dim = i2c_out_shape[1]
-                # if padding enabled, ensure pad_val supported by DataType
+
+                # default params for ConvolutionInputGenerator
+                ConvInpGen_node_idx = node_ind
+                ConvInpGen_input = i2c_input
+                ConvInpGen_idim = ifm_dim
+
                 if pad > 0:
+                    # if padding enabled, ensure pad_val supported by DataType
                     assert dt.allowed(pad_val), "Im2Col DataType must support pad_val"
+
+                    odim_padding = ifm_dim + 2 * pad
+
+                    padding_out = helper.make_tensor_value_info(
+                        model.make_new_valueinfo_name(),
+                        TensorProto.FLOAT,
+                        (1, odim_padding, odim_padding, ifm_ch),
+                    )
+                    graph.value_info.append(padding_out)
+                    padding_out = padding_out.name
+                    model.set_tensor_datatype(padding_out, dt)
+
+                    ConvInpGen_node_idx += 1
+                    ConvInpGen_input = padding_out
+                    ConvInpGen_idim = odim_padding
+
+                    padding_node = helper.make_node(
+                        "FMPadding_Batch",
+                        [i2c_input],
+                        [padding_out],
+                        domain="finn",
+                        backend="fpgadataflow",
+                        ImgDim=ifm_dim,
+                        Padding=2 * pad,
+                        NumChannels=ifm_ch,
+                        inputDataType=dt.name,
+                    )
+                    graph.node.insert(node_ind, padding_node)
+
                 # create equivalent ConvolutionInputGenerator node
-                # TODO support padding
-                new_node = helper.make_node(
+                ConvInpGen_node = helper.make_node(
                     "ConvolutionInputGenerator",
-                    [i2c_input],
+                    [ConvInpGen_input],
                     [i2c_output],
                     domain="finn",
                     backend="fpgadataflow",
                     ConvKernelDim=k,
                     IFMChannels=ifm_ch,
-                    IFMDim=ifm_dim,
+                    IFMDim=ConvInpGen_idim,
                     OFMDim=ofm_dim,
                     SIMD=ifm_ch,
                     Stride=stride,
                     inputDataType=dt.name,
                     outputDataType=dt.name,
                 )
-                graph.node.insert(node_ind, new_node)
+                graph.node.insert(ConvInpGen_node_idx, ConvInpGen_node)
                 # remove old nodes
                 graph.node.remove(n)
                 graph_modified = True
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee65326ec57fb7fa7fa0490a8980dbabb8efc13c
--- /dev/null
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -0,0 +1,106 @@
+from onnx import TensorProto, helper
+import numpy as np
+import pytest
+
+from finn.core.datatype import DataType
+from finn.transformation.infer_shapes import InferShapes
+from finn.transformation.infer_datatypes import InferDataTypes
+from finn.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames
+from finn.transformation.infer_data_layouts import InferDataLayouts
+from finn.transformation.lower_convs_to_matmul import LowerConvsToMatMul
+
+import finn.core.onnx_exec as oxe
+from finn.core.modelwrapper import ModelWrapper
+from finn.util.basic import gen_finn_dt_tensor
+import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
+
+from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
+from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
+from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
+
+
+@pytest.mark.parametrize("padding", [True, False])
+@pytest.mark.parametrize("kernel_size", [3, 5])
+@pytest.mark.slow
+@pytest.mark.vivado
+def test_convert_to_hls_conv_layer(padding, kernel_size):
+
+    assert (
+        kernel_size % 2 != 0
+    ), """test_convert_to_hls_conv_layer test only
+    supports odd kernel_size"""
+
+    np.random.seed(0)
+    padding = True
+    idt = DataType.UINT4
+
+    in_feature_dim = 7
+    in_chn = 3
+
+    stages = 1  # just one convolution
+
+    out_feature_dim = (
+        in_feature_dim if padding else in_feature_dim - (kernel_size // 2 * 2) * stages
+    )
+
+    input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
+    output_shape = [1, in_chn, out_feature_dim, out_feature_dim]
+
+    conv_param_shape = [in_chn, in_chn, kernel_size, kernel_size]
+
+    conv_config = {}
+    conv_config["dilations"] = [1, 1]
+    conv_config["group"] = 1
+    conv_config["kernel_shape"] = [kernel_size, kernel_size]
+    if padding:
+        pad = kernel_size // 2
+        conv_config["pads"] = [pad, pad, pad, pad]
+    else:
+        conv_config["pads"] = [0, 0, 0, 0]
+    conv_config["strides"] = [1, 1]
+
+    top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape)
+    top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, output_shape)
+    value_info = [
+        helper.make_tensor_value_info("p1", TensorProto.FLOAT, conv_param_shape)
+    ]
+
+    modelproto = helper.make_model(
+        helper.make_graph(
+            name="conv_test",
+            inputs=[top_in],
+            outputs=[top_out],
+            value_info=value_info,
+            nodes=[
+                helper.make_node("Conv", ["top_in", "p1"], ["top_out"], **conv_config)
+            ],
+        )
+    )
+
+    model = ModelWrapper(modelproto)
+    model.set_tensor_datatype("top_in", idt)
+    model.set_tensor_datatype("top_out", idt)
+    model.set_tensor_datatype("p1", DataType.UINT4)
+
+    model = model.transform(InferShapes())
+    model.set_initializer(
+        "p1", np.round(np.random.rand(*conv_param_shape).astype(np.float32) * 16)
+    )
+
+    model.set_tensor_datatype(model.graph.input[0].name, idt)
+    model = model.transform(InferShapes())
+    model = model.transform(InferDataLayouts())
+    model = model.transform(GiveUniqueNodeNames())
+    model = model.transform(GiveReadableTensorNames())
+    model = model.transform(InferDataTypes())
+
+    new_model = model.transform(LowerConvsToMatMul())
+    new_model = new_model.transform(to_hls.InferConvInpGen())
+
+    new_model = new_model.transform(PrepareCppSim())
+    new_model = new_model.transform(CompileCppSim())
+    new_model = new_model.transform(SetExecMode("cppsim"))
+
+    x = gen_finn_dt_tensor(idt, input_shape)
+    inp_dict = {model.graph.input[0].name: x}
+    assert oxe.compare_execution(model, new_model, inp_dict)
diff --git a/tests/fpgadataflow/test_fpgadataflow_sameresize.py b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
similarity index 75%
rename from tests/fpgadataflow/test_fpgadataflow_sameresize.py
rename to tests/fpgadataflow/test_fpgadataflow_fmpadding.py
index ea6130c3891443595b038460233ebb85799ac461..9d6390b2673e5d2c0e72748183ac04ed222d078e 100644
--- a/tests/fpgadataflow/test_fpgadataflow_sameresize.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fmpadding.py
@@ -23,9 +23,11 @@ test_fpga_part = pynq_part_map[test_pynq_board]
 target_clk_ns = 10
 
 
-def make_single_sameresize_modelwrapper(
-    idim, odim, kdim, stride, num_ch, idt, pad_style
-):
+def make_single_fmpadding_modelwrapper(idim, padding, num_ch, idt, pad_style):
+    assert pad_style == 2, "only pad_style == 2 supported in hlslib"
+    assert padding > 0, "Output dim should be greater than input dim"
+    odim = idim + padding
+
     inp = helper.make_tensor_value_info(
         "inp", TensorProto.FLOAT, [1, idim, idim, num_ch]
     )
@@ -33,25 +35,25 @@ def make_single_sameresize_modelwrapper(
         "outp", TensorProto.FLOAT, [1, odim, odim, num_ch]
     )
 
-    SameResize_node = helper.make_node(
-        "SameResize_Batch",
+    FMPadding = helper.make_node(
+        "FMPadding_Batch",
         ["inp"],
         ["outp"],
         domain="finn",
         backend="fpgadataflow",
         ImgDim=idim,
-        KernelDim=kdim,
-        Stride=stride,
+        Padding=padding,
         NumChannels=num_ch,
         inputDataType=str(idt.name),
         PaddingStyle=pad_style,
+        numInputVectors=1,
     )
 
     graph = helper.make_graph(
-        nodes=[SameResize_node], name="sameresize_graph", inputs=[inp], outputs=[outp]
+        nodes=[FMPadding], name="fmpadding_graph", inputs=[inp], outputs=[outp]
     )
 
-    model = helper.make_model(graph, producer_name="sameresize-model")
+    model = helper.make_model(graph, producer_name="fmpadding-model")
     model = ModelWrapper(model)
 
     model.set_tensor_datatype("inp", idt)
@@ -60,34 +62,28 @@ def make_single_sameresize_modelwrapper(
     return model
 
 
-# image dimension
+# input image dimension
 @pytest.mark.parametrize("idim", [8, 16])
-# kernel dimension
-@pytest.mark.parametrize("kdim", [2, 3])
-# stride
-@pytest.mark.parametrize("stride", [1, 2])
+# number of rows and number of cols to add
+@pytest.mark.parametrize("pad", [2, 3])
 # number of channels
 @pytest.mark.parametrize("num_ch", [1, 2])
+# PaddingStyle: selects behavior when (odim-idim)%2 != 0
+@pytest.mark.parametrize("pad_style", [2])
 # FINN input datatype
 @pytest.mark.parametrize("idt", [DataType.INT2, DataType.INT4])
 # execution mode
 @pytest.mark.parametrize("mode", ["cppsim", "rtlsim"])
 @pytest.mark.slow
 @pytest.mark.vivado
-def test_fpgadataflow_sameresize(idim, kdim, stride, num_ch, idt, mode):
-    pad_style = 2
-    assert idim % stride == 0, "Stride must divide input dimension."
-    # number of "same" windows over the input data
-    same_windows = idim // stride
-    odim = kdim + stride * (same_windows - 1)
+def test_fpgadataflow_fmpadding(idim, pad, num_ch, pad_style, idt, mode):
 
     # generate input data
     x = gen_finn_dt_tensor(idt, [1, idim, idim, num_ch])
     input_dict = {"inp": x}
+    odim = idim + pad
 
-    model = make_single_sameresize_modelwrapper(
-        idim, odim, kdim, stride, num_ch, idt, pad_style
-    )
+    model = make_single_fmpadding_modelwrapper(idim, pad, num_ch, idt, pad_style)
     model = model.transform(InferShapes())
     model = model.transform(SetExecMode(mode))
     model = model.transform(GiveUniqueNodeNames())
@@ -103,8 +99,7 @@ def test_fpgadataflow_sameresize(idim, kdim, stride, num_ch, idt, mode):
     assert y_produced.shape == expected_oshape
 
     # calculate reference
-    # calculate correct padding according to parameters
-    pad = odim - idim
+    # calculate correct pad according to parameters
     if pad_style == 2:
         if pad % 2 == 0:
             pad_up = pad // 2
@@ -115,6 +110,7 @@ def test_fpgadataflow_sameresize(idim, kdim, stride, num_ch, idt, mode):
     else:
         pad_up = pad // 2
         pad_left = pad // 2
+
     pad_down = pad - pad_up
     pad_right = pad - pad_left