diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh
index 1ed8875e886ea78511f1992d95be4417b3af80df..a8e05114c312028d18a006d10d5b210b44afb9d3 100644
--- a/docker/finn_entrypoint.sh
+++ b/docker/finn_entrypoint.sh
@@ -12,7 +12,7 @@ gecho () {
 
 # checkout the correct dependency repo commits
 # the repos themselves are cloned in the Dockerfile
-FINN_BASE_COMMIT=1363981654009067790d5f2d0c3dd303b5fa05cb
+FINN_BASE_COMMIT=91fb6066927d965471e66e103fd5201ac217c755
 BREVITAS_COMMIT=aff49758ec445d77c75721c7de3091a2a1797ca8
 CNPY_COMMIT=4e8810b1a8637695171ed346ce68f6984e585ef4
 HLSLIB_COMMIT=2e49322d1bbc4969ca293843bda1f3f9c05456fc
diff --git a/docs/finn/faq.rst b/docs/finn/faq.rst
new file mode 100644
index 0000000000000000000000000000000000000000..093344e70331572a425a09d34c5a68d7313bc521
--- /dev/null
+++ b/docs/finn/faq.rst
@@ -0,0 +1,71 @@
+.. _faq:
+
+***********************
+Frequently Asked Questions
+***********************
+
+.. note:: **This page is under construction.**
+
+Can I install FINN out of the Docker container?
+===============================================
+
+We do not support out of the Docker implementations at the moment. This is due 
+to the high complexity of the FINN project dependencies.
+
+Since FINN uses ONNX, can I compile any model from the ONNX Model Zoo to an FPGA accelerator?
+=============================================================================================
+
+The short answer is no. FINN uses ONNX in a specific (non-standard) way, including custom layer 
+types and quantization annotations. Networks must be first quantized using Brevitas and exported
+to FINN-ONNX to be converted to FPGA accelerators.
+
+
+Can I deploy custom NNs with arbitrary precisions and layers using FINN? 
+=========================================================================
+
+Yes, though the effort required and quality of results will vary.
+Although we do support arbitrary 
+precision, the way we create the hardware isn't typically practical for more than 
+4 bits, or very large networks for a single FPGA. 
+In terms of layers, only a subset of quantized layers covered by the various FINN examples 
+are currently supported.
+It is possible to add support for new layers, though we don't have tutorials for this in place
+just yet.
+
+Does FINN only work with the example networks?
+==============================================
+
+FINN isn't restricted to the example networks; 
+rather, it's restricted to certain patterns (e.g. certain layer types and their combinations). 
+The current best practice for custom networks is to take a working network and gradually modify it. 
+
+What is the expected background for using FINN?
+===============================================
+
+Some general knowledge of Python, Docker, machine learning with neural networks and Jupyter notebooks
+is expected.
+Our goal is to make the tool in a shape and form so that no hardware/FPGA background 
+should be necessary, although having some knowledge would give better results.
+
+What operating systems are supported by FINN?
+=============================================
+
+FINN should work fine under any Linux-based OS capable of running Vivado/Vitis, as long
+as you install Docker (``docker-ce``) on your machine .
+
+
+I am getting DocNav and Model_Composer errors when launching the Docker image.
+==============================================================================
+
+We do not mount those particular directories into the Docker container because they are not
+used. The errors are Vivado related but you can safely ignore them.
+
+What board do you recommend to start working with FINN?
+=======================================================
+
+Our preferred target platforms are those supported by  `PYNQ <http://www.pynq.io/board.html>`_.
+For those boards we can offer end-to-end (DNN-to-bitstream) deployment,
+see the `finn-examples <https://github.com/Xilinx/finn-examples>`_ repository for some examples.
+However, FINN also supports Vivado IP Integrator designs. The IPs connect using AXI stream (FIFO) 
+in-and-out interfaces. This means that it can be integrated onto any Xilinx FPGA board,
+though you will have to do the system integration manually.
diff --git a/docs/finn/index.rst b/docs/finn/index.rst
index fa7ed30205da5b9c63c469ca600211e7865a9730..320cd88fe91af857c5a3948ef36a587ea305040f 100644
--- a/docs/finn/index.rst
+++ b/docs/finn/index.rst
@@ -48,5 +48,6 @@ More FINN Resources
    example_networks
    internals
    developers
+   faq
    source_code/finn
    genindex
diff --git a/notebooks/basics/1_brevitas_network_import.ipynb b/notebooks/basics/1_brevitas_network_import.ipynb
index 3c9cad615e168e19c7f5dfef45e7c7c60965d1e3..ad2b3db8ffcf3ae99e2a3ca13a2c002685e2df92 100644
--- a/notebooks/basics/1_brevitas_network_import.ipynb
+++ b/notebooks/basics/1_brevitas_network_import.ipynb
@@ -356,7 +356,7 @@
     "from pkgutil import get_data\n",
     "import onnx\n",
     "import onnx.numpy_helper as nph\n",
-    "raw_i = get_data(\"finn\", \"data/onnx/mnist-conv/test_data_set_0/input_0.pb\")\n",
+    "raw_i = get_data(\"finn.data\", \"onnx/mnist-conv/test_data_set_0/input_0.pb\")\n",
     "input_tensor = onnx.load_tensor_from_string(raw_i)\n",
     "input_tensor_npy = nph.to_array(input_tensor)\n",
     "input_tensor_pyt = torch.from_numpy(input_tensor_npy).float()\n",
diff --git a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
index 795f7f22fef033381aed00375e6bd1bd45affce8..4130f35d7a371711fe1f6bf494358e3c93d8c136 100644
--- a/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/cnv_end2end_example.ipynb
@@ -701,16 +701,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.image.AxesImage at 0x7f25af026da0>"
+       "<matplotlib.image.AxesImage at 0x7f89a07e6eb8>"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -732,7 +732,7 @@
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "\n",
-    "fn = pk.resource_filename(\"finn\", \"data/cifar10/cifar10-test-data-class3.npz\")\n",
+    "fn = pk.resource_filename(\"finn.qnn-data\", \"cifar10/cifar10-test-data-class3.npz\")\n",
     "x = np.load(fn)[\"arr_0\"]\n",
     "x = x.reshape(3, 32,32).transpose(1, 2, 0)\n",
     "plt.imshow(x)"
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
index a067c6f6f8af1ef9e26384e1b2d92458c93b97fb..8cbff4fcea58d452b1e35c0dab647a8f922dc2c0 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_example.ipynb
@@ -1468,16 +1468,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "<matplotlib.image.AxesImage at 0x7fe2dd62bf98>"
+       "<matplotlib.image.AxesImage at 0x7fcb96004cc0>"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -1499,7 +1499,7 @@
     "import onnx.numpy_helper as nph\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
-    "raw_i = get_data(\"finn\", \"data/onnx/mnist-conv/test_data_set_0/input_0.pb\")\n",
+    "raw_i = get_data(\"finn.data\", \"onnx/mnist-conv/test_data_set_0/input_0.pb\")\n",
     "x = nph.to_array(onnx.load_tensor_from_string(raw_i))\n",
     "plt.imshow(x.reshape(28,28), cmap='gray')"
    ]
diff --git a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb
index 54738c3725c0141fddc3497dee024ca90db3f3ce..4a5d3dd07a2f6719b51e75d672790ed44883138f 100644
--- a/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb
+++ b/notebooks/end2end_example/bnn-pynq/tfc_end2end_verification.ipynb
@@ -71,7 +71,7 @@
     "from finn.util.test import get_test_model_trained\n",
     "\n",
     "fc = get_test_model_trained(\"TFC\", 1, 1)\n",
-    "raw_i = get_data(\"finn\", \"data/onnx/mnist-conv/test_data_set_0/input_0.pb\")\n",
+    "raw_i = get_data(\"finn.data\", \"onnx/mnist-conv/test_data_set_0/input_0.pb\")\n",
     "input_tensor = onnx.load_tensor_from_string(raw_i)\n",
     "input_brevitas = torch.from_numpy(nph.to_array(input_tensor)).float()\n",
     "output_golden = fc.forward(input_brevitas).detach().numpy()\n",
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index 94305b861cbe0c5e6b641c9dccee7976c73c236f..a221b510ab8d22f4daca1c32e717a9b482246712 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -65,8 +65,17 @@ class InferConvInpGen(Transformation):
                     continue
                 i2c_inst = getCustomOp(n)
                 stride = i2c_inst.get_nodeattr("stride")
-                k = i2c_inst.get_nodeattr("kernel_size")
-                pad = i2c_inst.get_nodeattr("pad_amount")
+                k_attr = i2c_inst.get_nodeattr("kernel_size")
+                k_h = k_attr[0]
+                k_w = k_attr[1]
+                pad_attr = i2c_inst.get_nodeattr("pad_amount")
+                pad_h = pad_attr[0] + pad_attr[2]
+                pad_w = pad_attr[1] + pad_attr[3]
+                # temporary checks until non-square conv support is finalized
+                assert pad_h == pad_w, "Non-square images not yet supported."
+                assert k_h == k_w, "Non-square kernels not yet supported."
+                k = k_h
+                pad = pad_attr[0]
                 pad_val = i2c_inst.get_nodeattr("pad_value")
                 depthwise = i2c_inst.get_nodeattr("depthwise")
                 ifm_ch = i2c_in_shape[-1]
@@ -330,8 +339,8 @@ class InferPool_Batch(Transformation):
                     [im2col_out],
                     domain="finn.custom_op.general",
                     stride=stride,
-                    kernel_size=k,
-                    pad_amount=pad,
+                    kernel_size=[k, k],
+                    pad_amount=[pad, pad, pad, pad],
                     pad_value=pad_value,
                     depthwise=1,
                     input_shape="(1,{},{},{})".format(ifm_dim, ifm_dim, ifm_ch),
@@ -557,7 +566,7 @@ class InferQuantizedStreamingFCLayer(Transformation):
                     wmem = mw * mh // (pe * simd)
                     assert (
                         mw * mh == wmem * pe * simd
-                    ), """Requirement (MW * MH) divisiable by
+                    ), """Requirement (MW * MH) divisible by
                     (WMEM * PE * SIMD) is violated."""
                     # see if we have any following thresholds
                     consumer = model.find_consumer(mm_output)
@@ -574,20 +583,27 @@ class InferQuantizedStreamingFCLayer(Transformation):
                         thresholds neither 1 nor MH."""
                         odt = model.get_tensor_datatype(mt_output)
                         scale = getCustomOp(consumer).get_nodeattr("out_scale")
-                        bipolar_ok = odt == DataType.BIPOLAR and scale == 2.0
-                        assert (
-                            scale == 1.0 or bipolar_ok
-                        ), "out_scale must be equal to 1.0 for HLS conversion."
                         actval = getCustomOp(consumer).get_nodeattr("out_bias")
                         assert (
                             int(actval) == actval
                         ), "out_bias must be integer for HLS conversion."
                         actval = int(actval)
+                        odt_is_bipolar = odt == DataType.BIPOLAR
+                        bipolar_ok = (
+                            odt_is_bipolar and (scale == 2.0) and (actval == -1)
+                        )
+                        assert (
+                            scale == 1.0 or bipolar_ok
+                        ), "out_scale = 1.0 or bipolar output needed for conversion."
                         assert (not odt.signed()) or (
                             actval < 0
                         ), "Signed output requres actval < 0"
                         model.set_tensor_shape(mm_input, mm_in_shape)
                         model.set_tensor_shape(mt_output, mt_out_shape)
+                        if bipolar_ok:
+                            # remove bias for bipolar, since
+                            # binary->bipolar is achieved by reinterpretation
+                            actval = 0
                         # create and insert new StreamingFCLayer node
                         new_node = helper.make_node(
                             "StreamingFCLayer_Batch",
diff --git a/src/finn/transformation/fpgadataflow/templates.py b/src/finn/transformation/fpgadataflow/templates.py
index 2d1c680338eec199908c305a42988403cb3645aa..73beb62f06a6b625a992bd2a7401a91ed09789f3 100644
--- a/src/finn/transformation/fpgadataflow/templates.py
+++ b/src/finn/transformation/fpgadataflow/templates.py
@@ -111,6 +111,9 @@ set_param board.repoPaths $paths_param
 if {$BOARD == "ZCU104"} {
     set_property board_part xilinx.com:zcu104:part0:1.1 [current_project]
     set ZYNQ_TYPE "zynq_us+"
+} elseif {$BOARD == "ZCU102"} {
+    set_property board_part xilinx.com:zcu102:part0:3.3 [current_project]
+    set ZYNQ_TYPE "zynq_us+"
 } elseif {$BOARD == "Ultra96"} {
     set_property board_part em.avnet.com:ultra96v1:part0:1.2 [current_project]
     set ZYNQ_TYPE "zynq_us+"
diff --git a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
index 9d350a9342e3de56cbbb5b3fc4abec69bfc254dc..d88576583eaacb7579b02bc00e4e0f9b77b16f7e 100644
--- a/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
+++ b/tests/fpgadataflow/test_convert_to_hls_conv_layer.py
@@ -77,7 +77,10 @@ def test_convert_to_hls_conv_layer(conv_config, depthwise, exec_mode):
         out_chn = 20
         conv_param_shape = [out_chn, in_chn, kernel_size, kernel_size]
 
-    out_feature_dim = compute_conv_output_dim(in_feature_dim, kernel_size, stride, pad)
+    total_pad = 2 * pad
+    out_feature_dim = compute_conv_output_dim(
+        in_feature_dim, kernel_size, stride, total_pad
+    )
 
     input_shape = [1, in_chn, in_feature_dim, in_feature_dim]
     output_shape = [1, out_chn, out_feature_dim, out_feature_dim]
diff --git a/tests/fpgadataflow/test_depthwise_convolution.py b/tests/fpgadataflow/test_depthwise_convolution.py
index 7c608fc3863ab72d1097f49b793af73664b2be48..c406d78158c52226fea881c48bc178139653fea5 100644
--- a/tests/fpgadataflow/test_depthwise_convolution.py
+++ b/tests/fpgadataflow/test_depthwise_convolution.py
@@ -57,7 +57,8 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
 
     # set up reference model consisting of Im2Col + MatMul (+ MultiThreshold)
     ofm_ch = ifm_ch
-    ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad=padding)
+    total_pad = 2 * padding
+    ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, total_pad=total_pad)
 
     if act is None:
         odt = DataType.INT32
@@ -96,9 +97,9 @@ def set_up_reference_model(act, idt, wdt, k, ifm_dim, ifm_ch, stride, padding):
         domain="finn.custom_op.general",
         inputs=["inp"],
         outputs=["im2col_out"],
-        kernel_size=k,
+        kernel_size=[k, k],
         stride=stride,
-        pad_amount=padding,
+        pad_amount=[padding, padding, padding, padding],
         input_shape="(1, {}, {}, {})".format(ifm_dim, ifm_dim, ifm_ch),
         depthwise=1,
     )
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index 0e2e60534bcc871592128fdbbd5ca52b3cc0fe4f..4e0e8c7c35a8fc8a30e0ba4c27a7c0d637e24d1f 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -63,9 +63,9 @@ def make_single_im2col_modelwrapper(k, ifm_ch, ifm_dim, ofm_dim, simd, stride, i
         domain="finn.custom_op.general",
         backend="fpgadataflow",
         stride=stride,
-        kernel_size=k,
+        kernel_size=[k, k],
         input_shape=str((1, ifm_dim, ifm_dim, ifm_ch)),
-        pad_amount=0,
+        pad_amount=[0, 0, 0, 0],
         pad_value=0,
     )
     graph = helper.make_graph(
diff --git a/tests/transformation/streamline/test_move_mul_past_dw_conv.py b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
index 5e96d15867b087fbb5f4f1b467aea34cb33e3ff4..ce0cbcd0405f8a09efabbadd5555de1bd6b89e43 100644
--- a/tests/transformation/streamline/test_move_mul_past_dw_conv.py
+++ b/tests/transformation/streamline/test_move_mul_past_dw_conv.py
@@ -32,8 +32,8 @@ def test_move_mul_past_dw_conv(ifm_dim, ifm_ch, k, stride, pad_amt, dw):
         ofm_ch = ifm_ch + 2
         groups = 1
         W_shape = [ofm_ch, ifm_ch, k, k]
-
-    ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, pad_amt)
+    total_pad = 2 * pad_amt
+    ofm_dim = compute_conv_output_dim(ifm_dim, k, stride, total_pad)
 
     # set up onnx model
     inp = helper.make_tensor_value_info(