diff --git a/Dockerfile b/Dockerfile
index c220e6ac6f4f4b24f2a10af778a0740137ee949f..9cbb364b936c27c09bcfab71ef3866dff23f95ee 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -49,6 +49,30 @@ RUN echo "StrictHostKeyChecking no" >> /etc/ssh/ssh_config
 RUN pip install sphinx
 RUN pip install sphinx_rtd_theme
 
+# cloning dependency repos
+# Brevitas
+RUN git clone --branch feature/finn_onnx_export https://github.com/Xilinx/brevitas.git /workspace/brevitas  
+RUN git -C /workspace/brevitas checkout ed1a3b70a14a91853066ece630421e89660d93e9
+
+# Brevitas examples
+RUN git clone https://github.com/maltanar/brevitas_cnv_lfc.git /workspace/brevitas_cnv_lfc
+RUN git -C /workspace/brevitas_cnv_lfc checkout a443708b382cbcfd69d19c9fc3fe94b2a2c03d71
+
+# CNPY
+RUN git clone https://github.com/rogersce/cnpy.git /workspace/cnpy
+RUN git -C /workspace/cnpy checkout 4e8810b1a8637695171ed346ce68f6984e585ef4
+
+# FINN hlslib
+RUN git clone https://github.com/Xilinx/finn-hlslib.git /workspace/finn-hlslib
+RUN git -C /workspace/finn-hlslib checkout b5dc957a16017b8356a7010144b0a4e2f8cfd124
+
+# PyVerilator
+RUN git clone https://github.com/maltanar/pyverilator /workspace/pyverilator
+RUN git -C /workspace/pyverilator checkout 307fc5c82db748620836307a2002fdc9fe170226
+
+# PYNQ-HelloWorld
+RUN git clone https://github.com/maltanar/PYNQ-HelloWorld.git /workspace/PYNQ-HelloWorld
+RUN git -C /workspace/PYNQ-HelloWorld checkout ef4c438dff4bd346e5f6b8d4eddfd1c8a3999c03
 
 # Note that we expect the cloned finn directory on the host to be
 # mounted on /workspace/finn -- see run-docker.sh for an example
diff --git a/README.md b/README.md
index 0a70f27b675c105d76259edcacb78251419a5205..b408b1a69d6833382763795f35002e2b3322f09d 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ Please see the [Getting Started](https://finn.readthedocs.io/en/latest/getting_s
 
 ## What's New in FINN?
 
-* **2020-02-27:** FINN v0.2b (beta) is released, which is a clean-slate reimplementation of the framework. Currently only fully-connected networks are supported for the end-to-end flow. Please see the release blog post for a summary of the key features.
+* **2020-02-28:** FINN v0.2b (beta) is released, which is a clean-slate reimplementation of the framework. Currently only fully-connected networks are supported for the end-to-end flow. Please see the release blog post for a summary of the key features.
 
 ## Documentation
 
diff --git a/docs/_posts/2020-02-28-finn-v02b-beta-is-released.md b/docs/_posts/2020-02-28-finn-v02b-beta-is-released.md
new file mode 100644
index 0000000000000000000000000000000000000000..319c03e14229f4866279cb09a4b70419ce2fcdc7
--- /dev/null
+++ b/docs/_posts/2020-02-28-finn-v02b-beta-is-released.md
@@ -0,0 +1,33 @@
+---
+layout: post
+title:  "FINN v0.2b (beta) is released"
+author: "Yaman Umuroglu"
+---
+
+We've been working on the new version of the FINN compiler for a while, and today we are excited to announce our first beta release to 
+give you a taste of how things are shaping up! 
+
+Here's a quick overview of the key features:
+
+* <b>Train and export highly-quantized networks in PyTorch using Brevitas.</b> You can use <a href="https://github.com/Xilinx/brevitas">Brevitas</a>,
+  our PyTorch library for quantization-aware training to train networks with few-bit weights and activations, then export them into 
+  FINN-ONNX to be used by the FINN compiler.
+
+* <b>Fully transparent end-to-end flow.</b> We support taking quantized networks (with limitations, see bottom of post) all the way down to a 
+  customized FPGA bitstream. This happens across many steps ranging from streamlining to Vivado IPI stitching, and each step is fully 
+  visible to the user. So if you are happy with just the threshold-activation (streamlined) QNN in ONNX, or if you want to take the 
+  generated Vivado IP block and integrate it into your own IPI design, it's easy to break out of the flow at any step. 
+  We also provide a variety of mechanisms to verify the design at different steps.
+
+* <b>ONNX-based intermediate representation.</b> We use ONNX with some custom nodes and annotations as our intermediate representation. As the 
+  FINN compiler transforms the network across many steps to produce an FPGA bitstream, you can view and explore the transformed network 
+  using the excellent <a href="https://www.lutzroeder.com/ai/netron">Netron</a> viewer from the comfort of your web browser.
+
+* Tutorials and documentation. We have prepared a set of <a href="https://github.com/Xilinx/finn/tree/master/notebooks">Jupyter notebooks</a> 
+  to let you experiment with some of the things FINN can do, covering the basics, demonstrating the end-to-end flow on an example network, 
+  and discussing some of the internals for more advanced users and developers. We also have Sphinx-generated documentation on 
+  <a href="http://finn.readthedocs.io/">readthedocs</a> for more information on the FINN compiler and its API.
+
+The release (tagged 0.2b) is now available on GitHub. Currently it's a beta release and only supports fully-connected layers in linear 
+(non-branching) topologies, but we're actively working on the end-to-end convolution support for the next release. Further down the 
+road, we hope to support more advanced topologies and provide end-to-end examples for MobileNet and ResNet-50.
diff --git a/docs/_posts/2020-03-11-rn50-released.md b/docs/_posts/2020-03-11-rn50-released.md
new file mode 100644
index 0000000000000000000000000000000000000000..baa924410cf56a07e22a6c85450205d18a4d45bb
--- /dev/null
+++ b/docs/_posts/2020-03-11-rn50-released.md
@@ -0,0 +1,75 @@
+---
+layout: post
+title:  "ResNet50 for Alveo released"
+author: "Lucian Petrica"
+---
+
+We're pleased to announce as part of the FINN project our release of the first fully quantized, all-dataflow ResNet50 inference accelerator for Xilinx Alveo boards. The source code is available on [GitHub](https://github.com/Xilinx/ResNet50-PYNQ) and we provide a Python [package](https://pypi.org/project/resnet50-pynq/) and Jupyter Notebook to get you started and show how the accelerator is controlled using [PYNQ](http://www.pynq.io/) for Alveo.
+Built using a custom [FINN](https://xilinx.github.io/finn/about.html) streamlining flow, which is not yet public, 
+this accelerator showcases the advantage of deep quantization for FPGA acceleration of DNN workloads in the datacenter. 
+The key performance metrics are:
+
+FPGA Device | ImageNet Accuracy     | Max FPS    | Min Latency | Power @ Max FPS | Power @ Min Latency
+----------  |----------             |----------  |----------   |----------       |----------
+Alveo U250  | 65% Top-1 / 85% Top-5 | 2000       | 2 ms      | 70 W            | 40 W
+
+In addition to demonstrating the achievable performance of low-precision dataflow acceleration on Alveo, the ResNet50 design
+serves as proof of concept for two key features of future FINN releases: 
+modular build flows based on Vivado IP Integrator, and pure Python interface to the accelerator. 
+
+## Modular build flow
+
+FINN accelerators targetting embedded parts, such as the [BNN-PYNQ](https://github.com/Xilinx/BNN-PYNQ) accelerators, have in the past implemented the
+entire acceleration functionality in a singe monolithic HLS C++ description.
+For large datacenter-class designs this approach is not feasible, as the HLS simulation and synthesis times become very large.
+
+Instead, here we identify the key computational pattern, the residual block, which we implement as a HLS C++ IP block by assembling multiple Matrix-Vector-Activation Units from the [FINN HLS Library](https://github.com/Xilinx/finn-hlslib). 
+We then construct the accelerator by instantiating and connecting multiple residual blocks together in a Vivado IPI block design, which are then synthesized in parallel and exported as a netlist IP.
+
+<img align="left" src="https://xilinx.github.io/finn/img/rn50-ipi.png" alt="drawing" style="margin-right: 20px" width="300"/>
+
+
+In our flow, this IP is linked by Vitis into an Alveo platform, but users are free to integrate the ResNet50 IP in their own Vivado-based flows and augment it with other HLS or RTL IP. See our build scripts and documentation for more information.
+
+## Pure Python host interface
+
+Using PYNQ for Alveo, users can interface directly with the ResNet50 accelerator in Python.
+To program the accelerator, an Overlay object is created from an XCLBin file produced by Vitis.
+
+```Python
+import pynq
+
+ol=pynq.Overlay("resnet50.xclbin")
+accelerator=ol.resnet50_1
+```
+
+Before using the accelerator, we must configure the weights of the fully-connected layer in DDR Bank 0.
+Assuming the weights are already loaded in the NumPy array `fcweights`, we allocate a buffer 
+of appropriate size, copy the weights into it, and flush it to the Alveo DDR Bank 0.
+
+```Python
+fcbuf = pynq.allocate((1000,2048), dtype=np.int8, target=ol.bank0)
+fcbuf[:] = fcweights
+fcbuf.sync_to_device()
+```
+
+To perform inference we first allocate input and output buffers for one image, and copy the contents of the NumPy array `img` into the input buffer.
+We then flush the input data to the Alveo DDR Bank 0, and call the accelerator providing as arguments
+the input and output buffers, the FC layer weights buffer, and the number of images to process, in this case just one.
+After the call finishes, we pull the output buffer data from the accelerator DDR to host memory and copy its 
+contents to user memory in a NumPy array.
+
+```Python
+inbuf = pynq.allocate((224,224,3), dtype=np.int8, target=ol.bank0)
+outbuf = pynq.allocate((5,), dtype=np.uint32, target=ol.bank0)
+
+inbuf[:] = img
+inbuf.sync_to_device()
+
+accelerator.call(inbuf, outbuf, fcbuf, 1)
+
+outbuf.sync_from_device()
+results = np.copy(outbuf)
+```
+
+It's that easy! See our Jupyter Notebook demo and application examples for more details.
diff --git a/docs/_posts/2020-03-27-brevitas-quartznet-release.md b/docs/_posts/2020-03-27-brevitas-quartznet-release.md
new file mode 100644
index 0000000000000000000000000000000000000000..0940f754815c834662919404860b8a7b00d08e64
--- /dev/null
+++ b/docs/_posts/2020-03-27-brevitas-quartznet-release.md
@@ -0,0 +1,92 @@
+---
+layout: post
+title:  "Quantized QuartzNet with Brevitas for efficient speech recognition"
+author: "Giuseppe Franco"
+---
+
+*Although not yet supported in FINN, we are excited to show you how Brevitas and quantized neural network training techniques can be applied to models beyond image classification.*
+
+We are pleased to announce the release of quantized pre-trained models of [QuartzNet](https://arxiv.org/abs/1904.03288) for efficient speech recognition.
+They can be found at the [following link](https://github.com/Xilinx/brevitas/tree/master/examples/speech_to_text), with a brief
+explanation on how to test them.
+The quantized version of QuartzNet has been trained using [Brevitas](https://github.com/Xilinx/brevitas), an experimental library for quantization-aware training.
+
+QuartzNet, whose structure can be seen in Fig. 1, is a convolution-based speech-to-text network, based on a similar structure as [Jasper](https://arxiv.org/abs/1904.03288).
+
+| <img src="https://xilinx.github.io/finn/img/QuartzNet.jpg" alt="QuartzNet Structure" title="QuartzNet Structure" width="450" height="500" align="center"/>|
+| :---:|
+| *Fig. 1 QuartzNet Model, [source](https://arxiv.org/abs/1910.10261)* |
+
+The starting point is the mel-spectrogram representation of the input audio file.
+Through repeated base building blocks of 1D Convolutions (1D-Conv), Batch-Normalizations (BN), and ReLU with residual connections,
+QuartzNet is able to reconstruct the underlying text.
+The main difference with respect to Jasper is the use of Depthwise and Pointwise 1D-Conv (Fig. 2a), instead of 'simple' 1D-Conv (Fig. 2b).
+Thanks to this structure, QuartzNet is able to achieve better performance in terms of Word Error Rate (WER) compared to Jasper,
+using *only* 19.9 M parameters, compared to 333M parameters of Jasper.
+
+Moreover, the authors proposed a grouped-pointwise convolution strategy that allows to greatly reduce the numbers of parameters,
+down to 8.7M, with a little degradation in accuracy.
+
+| <img src="https://xilinx.github.io/finn/img/quartzPic1.jpg" alt="QuartzNet block" title="QuartzNet block" width="130" height="220" align="center"/> | <img src="https://xilinx.github.io/finn/img/JasperVertical4.jpg" alt="Jasper block" title="Jasper block" width="130" height="220" align="center"/>|
+| :---:|:---:|
+| *Fig. 2a QuartzNet Block, [source](https://arxiv.org/abs/1910.10261)* | *Fig. 2b Jasper Block [source](https://arxiv.org/abs/1904.03288)*  |
+
+
+The authors of QuartzNet proposes different BxR configurations. Each B<sub>i</sub> block consist of the same base building block described above,
+repeated R times.
+Different BxR configurations have been trained on several different datasets (Wall Street Journal,
+LibriSpeech + Mozilla Common Voice, LibriSpeech only).
+
+For our quantization experiments, we focus on the 15x5 variant trained on LibriSpeech with spec-augmentation without grouped convolutions.
+More detail about this configuration can be found in the paper and on a [related discussion with the authors](https://github.com/NVIDIA/NeMo/issues/230).
+
+Started from the [official implementation](https://github.com/NVIDIA/NeMo/blob/master/examples/asr/quartznet.py),
+the first step was to implement a quantized version of the topology in Brevitas, using quantized convolutions and activations.
+
+After implementing the quantized version, the second step was to re-train the model, starting
+from the [pre-trained models](https://ngc.nvidia.com/catalog/models/nvidia:quartznet_15x5_ls_sp)
+kindly released by the authors.
+
+We focused on three main quantization configurations. Two configurations at 8 bit, with per-tensor and per-channel scaling,
+and one configuration at 4 bit, with per-channel scaling.
+
+We compare our results with the one achieved by the authors, not only in terms of pure WER, but also the parameter's memory footprint,
+and the number of operations performed. Note that the WER is always based on greedy decoding. The results can be seen in Fig. 3 and Fig. 4,
+and are summed up in Table 1.
+
+| Configuration | Word Error Rate (WER) | Memory Footprint (MegaByte) | Mega MACs |
+| :-----------: | :-------------------: | :-------------------------: | :-------: |
+| FP 300E, 1G   | 11.58%                | 37.69                       | 1658.54   |
+| FP 400E, 1G   | 11.08%                | 37.69                       | 1658.54   |
+| FP 1500E, 1G  | 10.78%                | 37.69                       | 1658.54   |
+| FP 300E, 2G   | 12.52%                | 24.06                       | 1058.75   |
+| FP 300E, 4G   | 13.48%                | 17.25                       |  758.86   |
+| 8 bit, 1G Per-Channel scaling| 10.98% | 18.58                       |  414.63   |
+| 8 bit, 1G Per-Tensor scaling | 11.03% | 18.58                       |  414.63   |
+| 4 bit, 1G Per-Channel scaling| 12.00% |  9.44                       |  104.18   |
+
+| <img src="https://xilinx.github.io/finn/img/WERMB.jpg" alt="WERvsMB" title="WERvsMB" width="500" height="300" align="center"/> |
+| :---:|
+| *Fig. 3 Memory footprint over WER on LibriSpeech dev-other* |
+
+| <img src="https://xilinx.github.io/finn/img/WERNops.jpg" alt="WERvsMACs" title="WERvsMACs" width="500" height="300" align="center"/> |
+| :---: |
+| *Fig. 4 Number of MACs Operations over WER on LibriSpeech dev-other*  |
+
+In evaluating the memory footprint, we consider half-precision (16 bit) Floating Point (FP) numbers for the original QuartzNet.
+As we can see on Fig. 3, the quantized implementations are able to achieve comparable accuracy compared to the corresponding floating-point verion,
+while greatly reducing the memory occupation. In the graph, the terms <em>E</em> stands for Epochs, while <em>G</em> for Groups, referring
+to the numbers of groups used for the grouped convolutions.
+In case of our 4 bit implementation, the first and last layer are left at 8 bit, but this is taken in account both in the computation
+of the memory occupation and of the number of operations.
+Notice how the 4 bit version is able to greatly reduce the memory footprint of the network compared to the grouped convolution variants, while still granting better accuracy.
+
+
+For comparing accuracy against the number of multiply-accumulate (MAC), we consider 16 bit floating-point multiplications as 16 bit integer multiplications.
+This means that we are greatly underestimating the complexity of operations performed in the original floating-point QuartzNet model.
+Assuming a n^2 growth in the cost of integer multiplication, we consider a 4 bit MAC 16x less expensive than a 16 bit one.
+The number of MACs in the Fig. 2b is normalized with respect to 16 bit.
+Also in this case, it is clear to see that the quantized versions are able to greatly reduce the amount of operations required,
+with little-to-none degradation in accuracy. In particular, the 8 bit versions are already able to have a better WER and lower amount
+of MACs compared to the grouped convolutions, and this is confirmed also by the 4 bit version, with a little degradation in terms of
+WER.
diff --git a/docs/img/JasperVertical4.jpg b/docs/img/JasperVertical4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d7364ec8a99f51e77b421c85a8da4eebe2883751
Binary files /dev/null and b/docs/img/JasperVertical4.jpg differ
diff --git a/docs/img/QuartzNet.jpg b/docs/img/QuartzNet.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ce258fcd5f458caae606af0973c2eb14aea0af27
Binary files /dev/null and b/docs/img/QuartzNet.jpg differ
diff --git a/docs/img/WERMB.jpg b/docs/img/WERMB.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c1ce7d6bc3e378f6e75c204a01538f02a9cb007
Binary files /dev/null and b/docs/img/WERMB.jpg differ
diff --git a/docs/img/WERNops.jpg b/docs/img/WERNops.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e539bb26077fb98f9a0f7b554ed63a18d57207a1
Binary files /dev/null and b/docs/img/WERNops.jpg differ
diff --git a/docs/img/quartzPic1.jpg b/docs/img/quartzPic1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cec4829f2187d720be8589d075c83443eaaef69c
Binary files /dev/null and b/docs/img/quartzPic1.jpg differ
diff --git a/docs/img/rn50-ipi.png b/docs/img/rn50-ipi.png
new file mode 100644
index 0000000000000000000000000000000000000000..504b011c9660b446ae39d407a8ce3d824bd2cd6a
Binary files /dev/null and b/docs/img/rn50-ipi.png differ
diff --git a/run-docker.sh b/run-docker.sh
index f5c9f64b7d89e7def72c5b39131f37c22fcf57bf..e1ccb2a2a1c3270a97ee044013b8a267c905b5a3 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -65,44 +65,15 @@ SCRIPT=$(readlink -f "$0")
 # Absolute path this script is in, thus /home/user/bin
 SCRIPTPATH=$(dirname "$SCRIPT")
 
-BREVITAS_REPO=https://github.com/Xilinx/brevitas.git
-EXAMPLES_REPO=https://github.com/maltanar/brevitas_cnv_lfc.git
-CNPY_REPO=https://github.com/rogersce/cnpy.git
-#FINN_HLS_REPO=https://github.com/Xilinx/finn-hlslib.git
-FINN_HLS_REPO=https://github.com/Tobi-Alonso/finn-hlslib.git
-PYVERILATOR_REPO=https://github.com/maltanar/pyverilator
-PYNQSHELL_REPO=https://github.com/maltanar/PYNQ-HelloWorld.git
-
-BREVITAS_LOCAL=$SCRIPTPATH/brevitas
-EXAMPLES_LOCAL=$SCRIPTPATH/brevitas_cnv_lfc
-CNPY_LOCAL=$SCRIPTPATH/cnpy
-FINN_HLS_LOCAL=$SCRIPTPATH/finn-hlslib
-PYVERILATOR_LOCAL=$SCRIPTPATH/pyverilator
-PYNQSHELL_LOCAL=$SCRIPTPATH/PYNQ-HelloWorld
 BUILD_LOCAL=/tmp/$DOCKER_INST_NAME
 VIVADO_HLS_LOCAL=$VIVADO_PATH
 VIVADO_IP_CACHE=$BUILD_LOCAL/vivado_ip_cache
 
-# clone dependency repos
-git clone --branch feature/finn_onnx_export $BREVITAS_REPO $BREVITAS_LOCAL ||  git -C "$BREVITAS_LOCAL" pull
-git clone $EXAMPLES_REPO $EXAMPLES_LOCAL ||  git -C "$EXAMPLES_LOCAL" checkout feature/rework_scaling_clipping; git -C "$EXAMPLES_LOCAL" pull
-git clone $CNPY_REPO $CNPY_LOCAL ||  git -C "$CNPY_LOCAL" pull
-git clone $FINN_HLS_REPO $FINN_HLS_LOCAL ||  git -C "$FINN_HLS_LOCAL" checkout master; git -C "$FINN_HLS_LOCAL" pull
-git clone $PYVERILATOR_REPO $PYVERILATOR_LOCAL ||  git -C "$PYVERILATOR_LOCAL" pull
-git clone $PYNQSHELL_REPO $PYNQSHELL_LOCAL ||  git -C "$PYNQSHELL_LOCAL" checkout feature/synth_rpt; git -C "$PYNQSHELL_LOCAL" pull
-
 # ensure build dir exists locally
 mkdir -p $BUILD_LOCAL
 mkdir -p $VIVADO_IP_CACHE
 
 echo "Instance is named as $DOCKER_INST_NAME"
-echo "Mounting $SCRIPTPATH into /workspace/finn"
-echo "Mounting $SCRIPTPATH/brevitas into /workspace/brevitas"
-echo "Mounting $SCRIPTPATH/brevitas_cnv_lfc into /workspace/brevitas_cnv_lfc"
-echo "Mounting $SCRIPTPATH/cnpy into /workspace/cnpy"
-echo "Mounting $SCRIPTPATH/finn-hlslib into /workspace/finn-hlslib"
-echo "Mounting $SCRIPTPATH/pyverilator into /workspace/pyverilator"
-echo "Mounting $SCRIPTPATH/PYNQ-HelloWorld into /workspace/PYNQ-HelloWorld"
 echo "Mounting $BUILD_LOCAL into $BUILD_LOCAL"
 echo "Mounting $VIVADO_PATH into $VIVADO_PATH"
 echo "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT"
@@ -137,12 +108,6 @@ docker run -t --rm --name $DOCKER_INST_NAME -it \
 -e "XILINX_VIVADO=$VIVADO_PATH" \
 -e "SHELL=/bin/bash" \
 -v $SCRIPTPATH:/workspace/finn \
--v $SCRIPTPATH/brevitas:/workspace/brevitas \
--v $SCRIPTPATH/brevitas_cnv_lfc:/workspace/brevitas_cnv_lfc \
--v $SCRIPTPATH/cnpy:/workspace/cnpy \
--v $SCRIPTPATH/finn-hlslib:/workspace/finn-hlslib \
--v $SCRIPTPATH/pyverilator:/workspace/pyverilator \
--v $SCRIPTPATH/PYNQ-HelloWorld:/workspace/PYNQ-HelloWorld \
 -v $BUILD_LOCAL:$BUILD_LOCAL \
 -v $VIVADO_PATH:$VIVADO_PATH \
 -e VIVADO_PATH=$VIVADO_PATH \