diff --git a/src/finn/core/remote_exec.py b/src/finn/core/remote_exec.py
index e97eb19a101e83f9d9603637e131b2ec9b7d16a4..335dfec04e4abee41f914c5d912ce291a0d31a91 100644
--- a/src/finn/core/remote_exec.py
+++ b/src/finn/core/remote_exec.py
@@ -65,7 +65,8 @@ def remote_exec(model, execution_context):
     cmd = (
         "sshpass -p {} ssh {}@{} -p {} "
         '"cd {}/{}; echo "{}" | '
-        'sudo -S python3.6 driver.py remote_pynq 1 resizer.bit input.npy output.npy"'
+        'sudo -S python3.6 driver.py --exec_mode="execute" --batchsize=1" '
+        '--bitfile="resizer.bit" --inputfile="input.npy" --outputfile="output.npy"'
     ).format(
         pynq_password,
         pynq_username,
diff --git a/src/finn/core/throughput_test.py b/src/finn/core/throughput_test.py
index fc929237bf6c985997e49cc3f74c7d492d79839a..c82d540e29fc59b92a22bf011e823a9f8c076843 100644
--- a/src/finn/core/throughput_test.py
+++ b/src/finn/core/throughput_test.py
@@ -47,8 +47,7 @@ def throughput_test(model):
     cmd = (
         "sshpass -p {} ssh {}@{} -p {} "
         '"cd {}/{}; echo "{}" | '
-        "sudo -S python3.6 driver.py throughput_test 1000 "
-        'resizer.bit input.npy output.npy"'
+        'sudo -S python3.6 driver.py --exec_mode="throughput_test" --batchsize=1000"'
     ).format(
         pynq_password,
         pynq_username,
diff --git a/src/finn/transformation/fpgadataflow/make_pynq_driver.py b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
index c5b8d35dba1069ac749e0a0d92060c8216ada507..049ede5064d252bd6391184c4227e5367a8c1e2b 100644
--- a/src/finn/transformation/fpgadataflow/make_pynq_driver.py
+++ b/src/finn/transformation/fpgadataflow/make_pynq_driver.py
@@ -70,8 +70,8 @@ class MakePYNQDriver(Transformation):
         # extract HLSCustomOp instances to get folded i/o shapes
         first_node = getCustomOp(model.find_consumer(i_tensor_name))
         last_node = getCustomOp(model.find_producer(o_tensor_name))
-        i_tensor_shape_folded = first_node.get_folded_input_shape()
-        o_tensor_shape_folded = last_node.get_folded_output_shape()
+        i_tensor_shape_folded = tuple(first_node.get_folded_input_shape())
+        o_tensor_shape_folded = tuple(last_node.get_folded_output_shape())
         # generate dummy folded i/o tensors and their packed versions
         i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded)
         o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded)
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 55a5af2ad887e4a8cfa5e3836bef00f2defe7284..55ecb57decd2ac4fa08331b5ebbcb7fd2f0cd5c6 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -102,144 +102,141 @@ from finn.util.data_packing import (
 )
 from finn.core.datatype import DataType
 
-class RemoteTest():
-    def __init__(
-        self,
-        exec_mode,
-        N,
-        bitfile="resizer.bit",
-        inputfile="input.npy",
-        outputfile="output.npy"):
-
-        self.exec_mode = exec_mode
+class FINNAccelDriver():
+    def __init__(self, N, bitfile):
+        \"\"\"Instantiate the FINN accelerator driver.
+        Gets batchsize (N) as integer and path to bitfile as string.\"\"\"
         self.N = N
-        self.inputfile = inputfile
-        self.outputfile = outputfile
+        # input FINN DataType
+        self.idt = $INPUT_FINN_DATATYPE$
+        # output FINN DataType
+        self.odt = $OUTPUT_FINN_DATATYPE$
+        # input and output shapes
+        self.ishape_normal = $INPUT_SHAPE_NORMAL$
+        self.oshape_normal = $OUTPUT_SHAPE_NORMAL$
+        self.ishape_folded = $INPUT_SHAPE_FOLDED$
+        self.oshape_folded = $OUTPUT_SHAPE_FOLDED$
+        self.ishape_packed = $INPUT_SHAPE_PACKED$   # datatype np.uint8
+        self.oshape_packed = $OUTPUT_SHAPE_PACKED$  # datatype np.uint8
+        # load bitfile and set up accelerator
         self.ol = Overlay(bitfile)
         self.dma = self.ol.axi_dma_0
         self.ctrl_regs = self.ol.resize_accel_0
-        self.ishape_packed = $INPUT_SHAPE_PACKED$
-        self.oshape_packed = $OUTPUT_SHAPE_PACKED$
         # neuron folding factor of output = iterations per sample
         self.itersPerSample = self.oshape_packed[-2]
         # AXI lite register offset for number of iterations
         # used by TLastMarker to signal end of transmission for AXI CDMA
         self.REG_OFFSET_NUM_ITERS = 0x10
+        # set up TLastMarker with correct num. samples
+        self.ctrl_regs.write(self.REG_OFFSET_NUM_ITERS, self.N*self.itersPerSample)
 
-    def load_input(self):
-        N = self.N
-        ishape_normal = $INPUT_SHAPE_NORMAL$
-        # load desired input .npy file
-        ibuf_normal = np.load(self.inputfile)
-        # ensure that shape is as expected
-        assert ibuf_normal.shape == ishape_normal
-        return ibuf_normal
+        # allocate a PYNQ buffer for the packed input and buffer
+        self.ibuf_packed_device = allocate(shape=self.ishape_packed, dtype=np.uint8)
+        self.obuf_packed_device = allocate(shape=self.oshape_packed, dtype=np.uint8)
 
-    def pack_input(self, ibuf_normal):
-        N = self.N
-        # input FINN DataType
-        idt = $INPUT_FINN_DATATYPE$
-        ishape_folded = $INPUT_SHAPE_FOLDED$
+    def fold_input(self, ibuf_normal):
+        \"\"\"Reshapes input in desired shape.
+        Gets input data (ibuf_normal), checks if data is in expected normal shape.
+        Returns folded input.\"\"\"
+        # ensure that shape is as expected
+        assert ibuf_normal.shape == self.ishape_normal
         # convert to folded form
-        ibuf_folded = ibuf_normal.reshape(ishape_folded)
-        # pack the input buffer, reversing both SIMD dim and endianness
+        ibuf_folded = ibuf_normal.reshape(self.ishape_folded)
+        return ibuf_folded
+
+    def pack_input(self, ibuf_folded):
+        \"\"\"Packs folded input and reverses both SIMD dim and endianness.
+        Gets input data in folded shape and returns packed input data.\"\"\"
         ibuf_packed = finnpy_to_packed_bytearray(
-            ibuf_folded, idt, reverse_endian=True, reverse_inner=True
+            ibuf_folded, self.idt, reverse_endian=True, reverse_inner=True
         )
         return ibuf_packed
 
     def unpack_output(self, obuf_packed):
-        N = self.N
-        # output FINN DataType
-        odt = $OUTPUT_FINN_DATATYPE$
-        oshape_folded = $OUTPUT_SHAPE_FOLDED$
-        # unpack the packed output buffer from accelerator
+        \"\"\"Unpacks the packed output buffer from accelerator.
+        Gets packed output and returns output data in folded shape.\"\"\"
         obuf_folded = packed_bytearray_to_finnpy(
-            obuf_packed, odt, oshape_folded, reverse_endian=True, reverse_inner=True
+            obuf_packed, self.odt, self.oshape_folded, reverse_endian=True, reverse_inner=True
         )
         return obuf_folded
 
-    def save_output(self, obuf_folded):
-        N = self.N
-        # convert to normal reshape and save
-        oshape_normal = $OUTPUT_SHAPE_NORMAL$
-        obuf_normal = obuf_folded.reshape(oshape_normal)
-        np.save(self.outputfile, obuf_normal)
-
-    def allocate_pynqbuffer(self, shape, data=None):
-        buf_device = allocate(shape=shape, dtype=np.uint8)
-
-        # if necessary copy the packed data into the PYNQ buffer
-        # TODO optimization: pack directly into the PYNQ buffer?
-        if data is not None:
-            np.copyto(buf_device, data)
-
-        return buf_device
+    def unfold_output(self, obuf_folded):
+        \"\"\"Unfolds output data to normal shape.
+        Gets folded output data and returns output data in normal shape.\"\"\"
+        obuf_normal = obuf_folded.reshape(self.oshape_normal)
+        return obuf_normal
 
+    def copy_input_data_to_device(self, data):
+        \"\"\"Copies given input data to PYNQ buffer.\"\"\"
+        np.copyto(self.ibuf_packed_device, data)
 
-    def run_nw(self):
-        exec_mode = self.exec_mode
-        if exec_mode == "remote_pynq":
-            ibuf_normal = self.load_input()
-            ibuf_packed = self.pack_input(ibuf_normal)
-        elif exec_mode != "throughput_test":
-            raise Exception("Exec mode has to be set to remote_pynq or throughput_test")
-
-        # set up TLastMarker with correct num. samples
-        self.ctrl_regs.write(self.REG_OFFSET_NUM_ITERS, N*self.itersPerSample)
-
-        # allocate a PYNQ buffer for the packed input buffer
-        if exec_mode == "remote_pynq":
-            ibuf_packed_device = self.allocate_pynqbuffer(self.ishape_packed, ibuf_packed)
-        else:
-            ibuf_packed_device = self.allocate_pynqbuffer(self.ishape_packed)
-
-        # allocate a PYNQ buffer for the returned packed output buffer
-        obuf_packed = self.allocate_pynqbuffer(self.oshape_packed)
-
-        if exec_mode == "throughput_test":
-            # measure runtime of network
-            start = time.time()
-            res={}
-
-        # set up the DMA and wait until all transfers complete
+    def execute(self):
+        \"\"\"Executes accelerator by setting up the DMA and
+        waiting until all transfers complete. Uses only member variables and
+        returns nothing.\"\"\"
         dma = self.dma
-        dma.sendchannel.transfer(ibuf_packed_device)
-        dma.recvchannel.transfer(obuf_packed)
+        dma.sendchannel.transfer(self.ibuf_packed_device)
+        dma.recvchannel.transfer(self.obuf_packed_device)
         dma.sendchannel.wait()
         dma.recvchannel.wait()
 
 
-        if exec_mode == "throughput_test":
-            end = time.time()
-            runtime = end - start
-            res["runtime[ms]"] = runtime*1000
-            res["throughput[images/s]"] = N / runtime
-            res["DRAM_in_bandwidth[Mb/s]"] = np.prod(self.ishape_packed)*0.000001 / runtime
-            res["DRAM_out_bandwidth[Mb/s]"] = np.prod(self.oshape_packed)*0.000001 / runtime
-            file = open("nw_metrics.txt", "w")
-            file.write(str(res))
-            file.close()
-        else:
-            obuf_folded = self.unpack_output(obuf_packed)
-            self.save_output(obuf_folded)
-
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Set exec mode, batchsize N, bitfile name, inputfile name and outputfile name')
-    parser.add_argument('exec_mode', help='Please select functional verification ("remote_pynq") or throughput test ("throughput_test")')
-    parser.add_argument('N', help='number of samples for inference', type=int)
-    parser.add_argument('bitfile', default="resizer.bit")
-    parser.add_argument('inputfile', default="input.npy")
-    parser.add_argument('outputfile', default="output.npy")
+    parser.add_argument('--exec_mode', help='Please select functional verification ("execute") or throughput test ("throughput_test")', default="execute")
+    parser.add_argument('--batchsize', help='number of samples for inference', type=int, default=1)
+    parser.add_argument('--bitfile', help='name of bitfile (i.e. "resizer.bit")', default="resizer.bit")
+    parser.add_argument('--inputfile', help='name of input npy file (i.e. "input.npy")', default="input.npy")
+    parser.add_argument('--outputfile', help='name of output npy file (i.e. "output.npy")', default="output.npy")
+    # parse arguments
     args = parser.parse_args()
     exec_mode = args.exec_mode
-    N = args.N
+    N = args.batchsize
     bitfile = args.bitfile
     inputfile = args.inputfile
     outputfile = args.outputfile
 
-    Test = RemoteTest(exec_mode, N, bitfile, inputfile, outputfile)
-    Test.run_nw()
+    # instantiate FINN accelerator driver and pass batchsize and bitfile
+    finnDriver = FINNAccelDriver(N, bitfile)
+
+    # for the remote execution the data from the input npy file has to be loaded,
+    # packed and copied to the PYNQ buffer
+    if exec_mode == "execute":
+        # load desired input .npy file
+        ibuf_normal = np.load(inputfile)
+        ibuf_folded = finnDriver.fold_input(ibuf_normal)
+        ibuf_packed = finnDriver.pack_input(ibuf_folded)
+        finnDriver.copy_input_data_to_device(ibuf_packed)
+    elif exec_mode != "throughput_test":
+        raise Exception("Exec mode has to be set to remote_pynq or throughput_test")
+
+    # for the throughput test the runtime of the network has to be measured
+    if exec_mode == "throughput_test":
+        # measure runtime of network
+        start = time.time()
+        # dictionary for results of throughput test
+        res={}
+
+    # execute accelerator
+    finnDriver.execute()
+
+    # measure run time and fill dictionary with results of the throughput test
+    if exec_mode == "throughput_test":
+        end = time.time()
+        runtime = end - start
+        res["runtime[ms]"] = runtime*1000
+        res["throughput[images/s]"] = N / runtime
+        res["DRAM_in_bandwidth[Mb/s]"] = np.prod(finnDriver.ishape_packed)*0.000001 / runtime
+        res["DRAM_out_bandwidth[Mb/s]"] = np.prod(finnDriver.oshape_packed)*0.000001 / runtime
+        file = open("nw_metrics.txt", "w")
+        file.write(str(res))
+        file.close()
+
+    # if execution is selected unpack, unfold and save output to output npy file
+    else:
+        obuf_folded = finnDriver.unpack_output(finnDriver.obuf_packed_device)
+        obuf_normal = finnDriver.unfold_output(obuf_folded)
+        np.save(outputfile, obuf_normal)
+
 
 """