diff --git a/docs/img/finn-examples-header.png b/docs/img/finn-examples-header.png
new file mode 100644
index 0000000000000000000000000000000000000000..50f8fa7761e10a958ed3567f268ef675cf1814f7
Binary files /dev/null and b/docs/img/finn-examples-header.png differ
diff --git a/docs/img/imagenet.jpg b/docs/img/imagenet.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5cdd5aa303d9add5fbe6270936da0e152eca0135
Binary files /dev/null and b/docs/img/imagenet.jpg differ
diff --git a/finn-rtllib/memstream/hdl/axilite_if.v b/finn-rtllib/memstream/hdl/axilite_if.v
index 93b2227de1b51d4fca145e8b61e6ed6dc2ed3121..bdd4de288ed3a5de859cbb20c3157d7f21f8239c 100644
--- a/finn-rtllib/memstream/hdl/axilite_if.v
+++ b/finn-rtllib/memstream/hdl/axilite_if.v
@@ -127,7 +127,7 @@ always @(posedge aclk or negedge aresetn)
 always @(*) begin
     internal_waddr = awaddr >> $clog2(DATA_WIDTH/8);
     internal_wdata = wdata;
-    internal_wen = (state == STATE_IDLE) & awvalid & wvalid; 
+    internal_wen = (state == STATE_IDLE) & awvalid & wvalid;
 end
 
 always @(posedge aclk) begin
@@ -208,4 +208,3 @@ always @(posedge aclk or negedge aresetn)
     end
 
 endmodule
-
diff --git a/finn-rtllib/memstream/hdl/memstream_singleblock.v b/finn-rtllib/memstream/hdl/memstream_singleblock.v
index 54ee56764e187520997e03bdcb291b4183e6ecf0..6bb3a97115325d81d4292c5af3c33921c2680a30 100644
--- a/finn-rtllib/memstream/hdl/memstream_singleblock.v
+++ b/finn-rtllib/memstream/hdl/memstream_singleblock.v
@@ -98,7 +98,7 @@ wire strm1_incr_en;
 assign strm0_incr_en = m_axis_0_tready | ~m_axis_0_tvalid;
 assign strm1_incr_en = m_axis_1_tready | ~m_axis_1_tvalid;
 
-reg rack_shift[1:0]; 
+reg rack_shift[1:0];
 
 generate
 if(MEM_DEPTH > 1) begin: use_ram
diff --git a/finn-rtllib/memstream/sim/tb_memstream_writes.v b/finn-rtllib/memstream/sim/tb_memstream_writes.v
index 867acfe813280cc3c9a473fb2a7e6bc9d7c05b23..a6ac747e967e594ac010f25a2827ebf7a7fcaa0f 100644
--- a/finn-rtllib/memstream/sim/tb_memstream_writes.v
+++ b/finn-rtllib/memstream/sim/tb_memstream_writes.v
@@ -179,7 +179,6 @@ task axi_read;
                     data = data | (rdata<<(32*j));
                 end
             join
-            
             @(posedge clk);
         end
     end
@@ -270,7 +269,6 @@ memstream
     MEM_WIDTH,
     ".",
     "auto",
-    
     //widths per stream
     STRM0_WIDTH,
     STRM1_WIDTH,
@@ -278,7 +276,6 @@ memstream
     STRM3_WIDTH,
     STRM4_WIDTH,
     STRM5_WIDTH,
-    
     //depths per stream
     STRM0_DEPTH,
     STRM1_DEPTH,
@@ -286,7 +283,6 @@ memstream
     STRM3_DEPTH,
     STRM4_DEPTH,
     STRM5_DEPTH,
-    
     //offsets for each stream
     STRM0_OFFSET,
     STRM1_OFFSET,
@@ -332,32 +328,26 @@ dut
     m_axis_0_tready,
     m_axis_0_tvalid,
     m_axis_0_tdata,
-    
     m_axis_1_afull,
     m_axis_1_tready,
     m_axis_1_tvalid,
     m_axis_1_tdata,
-    
     m_axis_2_afull,
     m_axis_2_tready,
     m_axis_2_tvalid,
     m_axis_2_tdata,
-    
     m_axis_3_afull,
     m_axis_3_tready,
     m_axis_3_tvalid,
     m_axis_3_tdata,
-    
     m_axis_4_afull,
     m_axis_4_tready,
     m_axis_4_tvalid,
     m_axis_4_tdata,
-    
     m_axis_5_afull,
     m_axis_5_tready,
     m_axis_5_tvalid,
     m_axis_5_tdata
-    
 
 );
 
@@ -406,7 +396,6 @@ initial begin
 				end
 			end
 		end
-		
 		//check stream 2
 	    begin
 		    $display("Starting stream 2 checker");
diff --git a/finn-rtllib/memstream/sim/test.sh b/finn-rtllib/memstream/sim/test.sh
index 3348e64b715ccbba17a38ac3bdf2c2c4173c3956..7cb0497d261ac41a763bad8e58afabb204887d39 100755
--- a/finn-rtllib/memstream/sim/test.sh
+++ b/finn-rtllib/memstream/sim/test.sh
@@ -30,4 +30,3 @@
 
 iverilog ../hdl/*.v tb_memstream_writes.v -o sim
 ./sim
-
diff --git a/finn-rtllib/memstream/xgui/memstream_v1_0.tcl b/finn-rtllib/memstream/xgui/memstream_v1_0.tcl
index b8be5e0a2f5c960cc5cb47ff9b348efffad98762..87565bc5613ce783d6a8067e8323d2358adb8061 100644
--- a/finn-rtllib/memstream/xgui/memstream_v1_0.tcl
+++ b/finn-rtllib/memstream/xgui/memstream_v1_0.tcl
@@ -38,7 +38,6 @@ proc init_gui { IPINST } {
 
 proc update_PARAM_VALUE.AXILITE_ADDR_WIDTH { PARAM_VALUE.AXILITE_ADDR_WIDTH PARAM_VALUE.MEM_DEPTH PARAM_VALUE.MEM_WIDTH } {
 	# Procedure called to update AXILITE_ADDR_WIDTH when any of the dependent parameters in the arguments change
-	
 	set AXILITE_ADDR_WIDTH ${PARAM_VALUE.AXILITE_ADDR_WIDTH}
 	set MEM_DEPTH ${PARAM_VALUE.MEM_DEPTH}
 	set MEM_WIDTH ${PARAM_VALUE.MEM_WIDTH}
@@ -393,4 +392,3 @@ proc update_MODELPARAM_VALUE.AXILITE_ADDR_WIDTH { MODELPARAM_VALUE.AXILITE_ADDR_
 	# Procedure called to set VHDL generic/Verilog parameter value(s) based on TCL parameter value
 	set_property value [get_property value ${PARAM_VALUE.AXILITE_ADDR_WIDTH}] ${MODELPARAM_VALUE.AXILITE_ADDR_WIDTH}
 }
-
diff --git a/src/finn/analysis/fpgadataflow/floorplan_params.py b/src/finn/analysis/fpgadataflow/floorplan_params.py
index 8ea68eda3a8050d7068630083a61f7622be619c4..4c8cbf53de1ae7dc951911678a3f118bd3506dfe 100644
--- a/src/finn/analysis/fpgadataflow/floorplan_params.py
+++ b/src/finn/analysis/fpgadataflow/floorplan_params.py
@@ -36,7 +36,12 @@ def floorplan_params(model):
     Returns {node name : {slr, device id, partition id, memory port}}."""
 
     ret_dict = {
-        "Defaults": {"slr": [-1, ["all"]], "partition_id": [0, ["all"]], "device_id": [0, ["all"]], "mem_port": ["", ["all"]]}
+        "Defaults": {
+            "slr": [-1, ["all"]],
+            "partition_id": [0, ["all"]],
+            "device_id": [0, ["all"]],
+            "mem_port": ["", ["all"]],
+        }
     }
     for node in model.graph.node:
         if is_fpgadataflow_node(node) is True:
diff --git a/src/finn/analysis/fpgadataflow/res_estimation.py b/src/finn/analysis/fpgadataflow/res_estimation.py
index 2c714b1f12b75e9789f1865d6737422f4d9d9a97..31cfeb76a6d4f411808af5dcd265e4f07352ae02 100644
--- a/src/finn/analysis/fpgadataflow/res_estimation.py
+++ b/src/finn/analysis/fpgadataflow/res_estimation.py
@@ -61,7 +61,10 @@ def res_estimation_complete(model):
         if is_fpgadataflow_node(node) is True:
             op_type = node.op_type
             inst = registry.getCustomOp(node)
-            if op_type == "StreamingFCLayer_Batch" or op_type == "Vector_Vector_Activate_Batch":
+            if (
+                op_type == "StreamingFCLayer_Batch"
+                or op_type == "Vector_Vector_Activate_Batch"
+            ):
                 orig_restype = inst.get_nodeattr("resType")
                 res_dict[node.name] = []
                 inst.set_nodeattr("resType", "dsp")
diff --git a/src/finn/transformation/fpgadataflow/floorplan.py b/src/finn/transformation/fpgadataflow/floorplan.py
index 32b21cda55a3c9fb08cf2bd18155d1cdd5f140f8..c6bedd466e31efb622640cbd203d344ff9b3d88f 100644
--- a/src/finn/transformation/fpgadataflow/floorplan.py
+++ b/src/finn/transformation/fpgadataflow/floorplan.py
@@ -87,7 +87,6 @@ class Floorplan(Transformation):
                     narrow_neighbour = model.find_consumer(node.output[0])
                 else:
                     narrow_neighbour = model.find_producer(node.input[0])
-                    
                 node_slr = getCustomOp(narrow_neighbour).get_nodeattr("slr")
                 node_inst.set_nodeattr("slr", node_slr)
             if node.op_type == "StreamingFIFO":
@@ -98,7 +97,6 @@ class Floorplan(Transformation):
                 node_slr = getCustomOp(srcnode).get_nodeattr("slr")
                 node_inst.set_nodeattr("slr", node_slr)
 
-
         if unassigned_nodes > 0:
             warnings.warn(
                 str(unassigned_nodes)
@@ -106,7 +104,6 @@ class Floorplan(Transformation):
                 + "and no default value was set"
             )
 
-
         # partition id generation
         partition_cnt = 0
 
diff --git a/tests/fpgadataflow/test_set_folding.py b/tests/fpgadataflow/test_set_folding.py
index 7f7e3f3abd8df3e489816a2175d62393c2a37b21..fe3a1db8a476e33bfc0d76996917fab9ae6ed98b 100644
--- a/tests/fpgadataflow/test_set_folding.py
+++ b/tests/fpgadataflow/test_set_folding.py
@@ -28,8 +28,6 @@
 
 import pytest
 import numpy as np
-import math
-import random
 from onnx import TensorProto, helper
 
 from finn.custom_op.registry import getCustomOp
@@ -43,45 +41,53 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import (
 )
 from finn.util.test import load_test_checkpoint_or_skip
 
+
 def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes):
 
-    W = np.random.randint(wdt.min(), wdt.max()+1, size=(ch, ch))
+    W = np.random.randint(wdt.min(), wdt.max() + 1, size=(ch, ch))
     W = W.astype(np.float32)
 
-    T = np.random.randint(tdt.min(), tdt.max()+1, size=(ch, 2**adt.bitwidth()-1))
+    T = np.random.randint(tdt.min(), tdt.max() + 1, size=(ch, 2 ** adt.bitwidth() - 1))
     T = T.astype(np.float32)
 
     tensors = []
     tensors.append(helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, ch]))
     for i in range(1, nnodes):
-        inter = helper.make_tensor_value_info("inter_"+str(i), TensorProto.FLOAT, [1, ch])
+        inter = helper.make_tensor_value_info(
+            "inter_" + str(i), TensorProto.FLOAT, [1, ch]
+        )
         tensors.append(inter)
     tensors.append(helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, ch]))
-        
+
     FCLayer_nodes = []
     for i in range(nnodes):
         pe = 1
         simd = 1
-        FCLayer_nodes += [helper.make_node(
-            "StreamingFCLayer_Batch",
-            [tensors[i].name, "weights_"+str(i), "thresh_"+str(i)],
-            [tensors[i+1].name],
-            domain="finn.custom_op.fpgadataflow",
-            backend="fpgadataflow",
-            MW=ch,
-            MH=ch,
-            SIMD=simd,
-            PE=pe,
-            inputDataType=adt.name,
-            weightDataType=wdt.name,
-            outputDataType=adt.name,
-            ActVal=0,
-            binaryXnorMode=0,
-            noActivation=0,
-        )]
+        FCLayer_nodes += [
+            helper.make_node(
+                "StreamingFCLayer_Batch",
+                [tensors[i].name, "weights_" + str(i), "thresh_" + str(i)],
+                [tensors[i + 1].name],
+                domain="finn.custom_op.fpgadataflow",
+                backend="fpgadataflow",
+                MW=ch,
+                MH=ch,
+                SIMD=simd,
+                PE=pe,
+                inputDataType=adt.name,
+                weightDataType=wdt.name,
+                outputDataType=adt.name,
+                ActVal=0,
+                binaryXnorMode=0,
+                noActivation=0,
+            )
+        ]
 
     graph = helper.make_graph(
-        nodes=FCLayer_nodes, name="fclayer_graph", inputs=[tensors[0]], outputs=[tensors[-1]]
+        nodes=FCLayer_nodes,
+        name="fclayer_graph",
+        inputs=[tensors[0]],
+        outputs=[tensors[-1]],
     )
 
     model = helper.make_model(graph, producer_name="fclayer-model")
@@ -89,24 +95,27 @@ def make_multi_fclayer_model(ch, wdt, adt, tdt, nnodes):
 
     model.set_tensor_datatype("inp", adt)
     model.set_tensor_datatype("outp", adt)
-    
-    for i in range(1, nnodes+1):
+
+    for i in range(1, nnodes + 1):
         model.graph.value_info.append(tensors[i])
-        model.set_initializer("weights_"+str(i-1), W)
-        model.set_initializer("thresh_"+str(i-1), T)
-        model.set_tensor_datatype("weights_"+str(i-1), wdt)
-        model.set_tensor_datatype("thresh_"+str(i-1), tdt)
+        model.set_initializer("weights_" + str(i - 1), W)
+        model.set_initializer("thresh_" + str(i - 1), T)
+        model.set_tensor_datatype("weights_" + str(i - 1), wdt)
+        model.set_tensor_datatype("thresh_" + str(i - 1), tdt)
 
     return model
 
+
 # desired frames per second
 @pytest.mark.parametrize("target_fps", [30, 10 ** 5, 10 ** 7])
 # target chip or board
 @pytest.mark.parametrize("platform", ["Pynq-Z1", "Ultra96", "U200"])
 def test_set_folding(target_fps, platform):
 
-    model = make_multi_fclayer_model(128, DataType.INT4, DataType.INT2, DataType.INT16, 5)
-    
+    model = make_multi_fclayer_model(
+        128, DataType.INT4, DataType.INT2, DataType.INT16, 5
+    )
+
     model = model.transform(GiveUniqueNodeNames())
     parent_model = model.transform(CreateDataflowPartition())
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
@@ -125,7 +134,6 @@ def test_set_folding(target_fps, platform):
     min_cycles["Pynq-Z1"] = 128
     min_cycles["Ultra96"] = 64
     min_cycles["U200"] = 1
-    
 
     assert achieved_cycles_per_frame <= max(
         min_cycles[platform], target_cycles_per_frame