diff --git a/docker/Dockerfile.finn_ci b/docker/Dockerfile.finn_ci
index 41f6a88f5dd4c9b0822a74cf4a0e7b4663dce910..0d610ec66a5f433d156f4e8da976767ce6458aef 100644
--- a/docker/Dockerfile.finn_ci
+++ b/docker/Dockerfile.finn_ci
@@ -73,6 +73,8 @@ RUN mkdir -p $VIVADO_IP_CACHE
 WORKDIR /workspace/finn
 
 COPY finn_entrypoint.sh /usr/local/bin/
+COPY quicktest.sh /usr/local/bin/
 RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
+RUN chmod 755 /usr/local/bin/quicktest.sh
 ENTRYPOINT ["finn_entrypoint.sh"]
 CMD ["bash"]
diff --git a/docker/Dockerfile.finn_dev b/docker/Dockerfile.finn_dev
index b7cfc299a2999662672225aa5f8912653d189559..1c2cb19d14137b866b55417522fdebb8e0d7ad90 100644
--- a/docker/Dockerfile.finn_dev
+++ b/docker/Dockerfile.finn_dev
@@ -95,7 +95,9 @@ RUN echo "source \$VIVADO_PATH/settings64.sh" >> /home/$UNAME/.bashrc
 # copy entrypoint script
 USER root
 COPY docker/finn_entrypoint.sh /usr/local/bin/
+COPY docker/quicktest.sh /usr/local/bin/
 RUN chmod 755 /usr/local/bin/finn_entrypoint.sh
+RUN chmod 755 /usr/local/bin/quicktest.sh
 USER $UNAME
 
 ENTRYPOINT ["finn_entrypoint.sh"]
diff --git a/docker/quicktest.sh b/docker/quicktest.sh
new file mode 100755
index 0000000000000000000000000000000000000000..4f6a2d3e230de9fcbb947d794722294880a7730d
--- /dev/null
+++ b/docker/quicktest.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+cd $FINN_ROOT
+python setup.py test --addopts "-m 'not (vivado or slow)'"
diff --git a/run-docker.sh b/run-docker.sh
index b7f844d314c5fb67e11e0933f42b3edfa4d96036..e1f17e728204217ff3caa6e486b2daae16d6d271 100755
--- a/run-docker.sh
+++ b/run-docker.sh
@@ -27,13 +27,27 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
+# green echo
+gecho () {
+  echo -e "${GREEN}$1${NC}"
+}
+
+# red echo
+recho () {
+  echo -e "${RED}$1${NC}"
+}
+
 if [ -z "$VIVADO_PATH" ];then
-        echo "For correct implementation please set an environment variable VIVADO_PATH that contains the path to your vivado installation directory"
-        exit 1
+        recho "Please set the VIVADO_PATH that contains the path to your Vivado installation directory."
+        recho "FINN functionality depending on Vivado or Vivado HLS will not be available."
 fi
 
 if [ -z "$PYNQ_IP" ];then
-        echo "Please set the PYNQ_IP env.var. to enable PYNQ deployment tests."
+        recho "Please set the PYNQ_IP env.var. to enable PYNQ deployment tests."
 fi
 
 DOCKER_GID=$(id -g)
@@ -74,22 +88,25 @@ VIVADO_IP_CACHE=$BUILD_LOCAL/vivado_ip_cache
 mkdir -p $BUILD_LOCAL
 mkdir -p $VIVADO_IP_CACHE
 
-echo "Instance is named as $DOCKER_INST_NAME"
-echo "Mounting $BUILD_LOCAL into $BUILD_LOCAL"
-echo "Mounting $VIVADO_PATH into $VIVADO_PATH"
-echo "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT"
-echo "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT"
-echo "Vivado IP cache dir is at $VIVADO_IP_CACHE"
-echo "Using default PYNQ board $PYNQ_BOARD"
+gecho "Instance is named as $DOCKER_INST_NAME"
+gecho "Mounting $BUILD_LOCAL into $BUILD_LOCAL"
+gecho "Mounting $VIVADO_PATH into $VIVADO_PATH"
+gecho "Port-forwarding for Jupyter $JUPYTER_PORT:$JUPYTER_PORT"
+gecho "Port-forwarding for Netron $NETRON_PORT:$NETRON_PORT"
+gecho "Vivado IP cache dir is at $VIVADO_IP_CACHE"
+gecho "Using default PYNQ board $PYNQ_BOARD"
 
 if [ "$1" = "test" ]; then
-        echo "Running test suite"
+        gecho "Running test suite (all tests)"
         DOCKER_CMD="python setup.py test"
+elif [ "$1" = "quicktest" ]; then
+        gecho "Running test suite (non-Vivado, non-slow tests)"
+        DOCKER_CMD="quicktest.sh"
 elif [ "$1" = "notebook" ]; then
-        echo "Running Jupyter notebook server"
+        gecho "Running Jupyter notebook server"
         DOCKER_CMD="jupyter notebook --ip=0.0.0.0 --port $JUPYTER_PORT notebooks"
 else
-        echo "Running container only"
+        gecho "Running container only"
         DOCKER_CMD="bash"
 fi
 
diff --git a/setup.cfg b/setup.cfg
index 5974cda20e37449a879f7528516895fb7cea4264..1d7dcf247636b486e35d6320669eae706c2b7a72 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -101,6 +101,9 @@ extras = True
 # in order to write a coverage file that can be read by Jenkins.
 addopts =
     --verbose
+markers =
+    slow: marks tests as slow (deselect with '-m "not slow"')
+    vivado: mark tests that require Vivado or Vivado HLS
 norecursedirs =
     dist
     build
diff --git a/src/finn/transformation/fpgadataflow/make_deployment.py b/src/finn/transformation/fpgadataflow/make_deployment.py
index a185f5392c4b5ec848cd463e02ebab4be9c56a46..a4c751b844a2796447930eb74adad6321454ac09 100644
--- a/src/finn/transformation/fpgadataflow/make_deployment.py
+++ b/src/finn/transformation/fpgadataflow/make_deployment.py
@@ -68,10 +68,10 @@ class DeployToPYNQ(Transformation):
         for file in os.listdir(vivado_pynq_proj):
             if file.endswith(".bit"):
                 bitfile = os.path.join(vivado_pynq_proj, file)
+                copy(bitfile, deployment_dir)
             elif file.endswith(".hwh"):
                 hwhfile = os.path.join(vivado_pynq_proj, file)
-        copy(bitfile, deployment_dir)
-        copy(hwhfile, deployment_dir)
+                copy(hwhfile, deployment_dir)
 
         # driver.py and python libraries
         pynq_driver_dir = model.get_metadata_prop("pynq_driver_dir")
diff --git a/src/finn/util/test.py b/src/finn/util/test.py
index 4cad01b1f7ec58da7ba6d5460c072faa01202c55..f29af66f52e18f8aeed87e19bfb52354ca1b73a7 100644
--- a/src/finn/util/test.py
+++ b/src/finn/util/test.py
@@ -27,6 +27,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from brevitas_examples import bnn_pynq
+import pytest
+import warnings
+from finn.core.modelwrapper import ModelWrapper
 
 # map of (wbits,abits) -> model
 example_map = {
@@ -61,3 +64,13 @@ def get_test_model_trained(netname, wbits, abits):
 def get_test_model_untrained(netname, wbits, abits):
     "get_test_model with pretrained=False"
     return get_test_model(netname, wbits, abits, pretrained=False)
+
+
+def load_test_checkpoint_or_skip(filename):
+    "Try to load given .onnx and return ModelWrapper, else skip current test."
+    try:
+        model = ModelWrapper(filename)
+        return model
+    except FileNotFoundError:
+        warnings.warn(filename + " not found from previous test step, skipping")
+        pytest.skip(filename + " not found from previous test step, skipping")
diff --git a/tests/end2end/test_end2end_cnv_w1a1.py b/tests/end2end/test_end2end_cnv_w1a1.py
index 7dd45cbc732a50f8f41c1932601308f0dfd77c20..e6d1fc4efd61c01654ee88638698215d23a82eb3 100644
--- a/tests/end2end/test_end2end_cnv_w1a1.py
+++ b/tests/end2end/test_end2end_cnv_w1a1.py
@@ -36,7 +36,6 @@ import onnx  # NOQA
 
 import pytest
 import pkg_resources as pk
-from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
 from finn.core.onnx_exec import execute_onnx
 from finn.transformation.double_to_single_float import DoubleToSingleFloat
@@ -69,7 +68,7 @@ from finn.transformation.fpgadataflow.make_pynq_proj import MakePYNQProject
 from finn.transformation.fpgadataflow.synth_pynq_proj import SynthPYNQProject
 from finn.transformation.fpgadataflow.make_deployment import DeployToPYNQ
 from finn.util.basic import pynq_part_map
-from finn.util.test import get_test_model_trained
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
 from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO
@@ -91,7 +90,7 @@ def test_end2end_cnv_w1a1_export():
 
 
 def test_end2end_cnv_w1a1_import_and_tidy():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_export.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_export.onnx")
     model = model.transform(DoubleToSingleFloat())
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
@@ -101,7 +100,7 @@ def test_end2end_cnv_w1a1_import_and_tidy():
 
 
 def test_end2end_cnv_w1a1_streamline():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_tidy.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_tidy.onnx")
     model = model.transform(Streamline())
     model = model.transform(LowerConvsToMatMul())
     model = model.transform(MakeMaxPoolNHWC())
@@ -112,7 +111,9 @@ def test_end2end_cnv_w1a1_streamline():
 
 
 def test_end2end_cnv_w1a1_convert_to_hls_layers():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_streamlined.onnx"
+    )
     model = model.transform(to_hls.InferBinaryStreamingFCLayer(mem_mode))
     model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
     model = model.transform(to_hls.InferConvInpGen())
@@ -122,18 +123,22 @@ def test_end2end_cnv_w1a1_convert_to_hls_layers():
 
 
 def test_end2end_cnv_w1a1_create_dataflow_partition():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_hls_layers.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_hls_layers.onnx"
+    )
     parent_model = model.transform(CreateDataflowPartition())
     parent_model.save(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
     dataflow_model_filename = sdp_node.get_nodeattr("model")
-    dataflow_model = ModelWrapper(dataflow_model_filename)
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
     dataflow_model.save(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
 
 
 def test_end2end_cnv_w1a1_fold_and_tlastmarker():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_dataflow_model.onnx"
+    )
     fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
     # each tuple is (PE, SIMD, in_fifo_depth) for a layer
     folding = [
@@ -167,23 +172,27 @@ def test_end2end_cnv_w1a1_fold_and_tlastmarker():
     model.save(build_dir + "/end2end_cnv_w1a1_folded.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_cnv_w1a1_gen_hls_ip():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_folded.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_folded.onnx")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_cnv_w1a1_ipgen.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_cnv_w1a1_ip_stitch():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipgen.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model.save(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_cnv_w1a1_verify_dataflow_part():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
     x = np.zeros((1, 32, 32, 3), dtype=np.float32)
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
@@ -212,9 +221,12 @@ def test_end2end_cnv_w1a1_verify_dataflow_part():
     assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
+@pytest.mark.vivado
 def test_end2end_cnv_w1a1_verify_all():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     # load one of the test vectors
@@ -228,22 +240,31 @@ def test_end2end_cnv_w1a1_verify_all():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
+    load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx")
     sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_ipgen_cppsim.onnx")
     ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
     y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_cnv_w1a1_ipgen_nodebynode_rtlsim.onnx"
     )
     ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname]
     # produce results with whole-network (stitched ip) rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_cnv_w1a1_ipstitch_whole_rtlsim.onnx"
     )
@@ -257,27 +278,34 @@ def test_end2end_cnv_w1a1_verify_all():
     assert np.argmax(y_golden) == 3
 
 
+@pytest.mark.vivado
 def test_end2end_cnv_w1a1_make_pynq_proj():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_ipstitch.onnx")
     model = model.transform(MakePYNQProject(test_pynq_board))
     model.save(build_dir + "/end2end_cnv_w1a1_pynq_project.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_cnv_w1a1_synth_pynq_project():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_pynq_project.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_pynq_project.onnx"
+    )
     model = model.transform(SynthPYNQProject())
     model = model.transform(AnnotateResources("synth"))
     model.save(build_dir + "/end2end_cnv_w1a1_synth.onnx")
 
 
 def test_end2end_cnv_w1a1_make_driver():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_synth.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_synth.onnx")
     model = model.transform(MakePYNQDriver())
     model.save(build_dir + "/end2end_cnv_w1a1_pynq_driver.onnx")
 
 
 def test_end2end_cnv_w1a1_deploy_on_pynq():
-    model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_pynq_driver.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_pynq_driver.onnx"
+    )
     try:
         ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
         if ip == "":
@@ -295,7 +323,9 @@ def test_end2end_cnv_w1a1_deploy_on_pynq():
 
 def test_end2end_cnv_w1a1_run_on_pynq():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_cnv_w1a1_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     # load one of the test vectors
@@ -309,7 +339,9 @@ def test_end2end_cnv_w1a1_run_on_pynq():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_cnv_w1a1_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     try:
@@ -319,6 +351,7 @@ def test_end2end_cnv_w1a1_run_on_pynq():
         # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx")
         sdp_node.set_nodeattr("model", build_dir + "/end2end_cnv_w1a1_pynq_deploy.onnx")
         ret = execute_onnx(parent_model, {iname: x}, True)
         y = ret[oname]
diff --git a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
index 74cd46549f45b7512a03da450e011c4f2e80e16e..1ba149687bb80a0f977115bd380a09f70eef23f1 100644
--- a/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
+++ b/tests/end2end/test_end2end_tfc_w1a1_throughput_test.py
@@ -40,7 +40,6 @@ import onnx.numpy_helper as nph
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.core.throughput_test import throughput_test
 from finn.custom_op.registry import getCustomOp
@@ -71,7 +70,7 @@ from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
 from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.util.basic import pynq_part_map
-from finn.util.test import get_test_model_trained
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
 from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 
@@ -92,7 +91,7 @@ def test_end2end_tfc_w1a1_export():
 
 
 def test_end2end_tfc_w1a1_import_and_tidy():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_export.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_export.onnx")
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
     model = model.transform(GiveUniqueNodeNames())
@@ -102,13 +101,15 @@ def test_end2end_tfc_w1a1_import_and_tidy():
 
 
 def test_end2end_tfc_w1a1_streamline():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_tidy.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_tidy.onnx")
     model = model.transform(Streamline())
     model.save(build_dir + "/end2end_tfc_w1a1_streamlined.onnx")
 
 
 def test_end2end_tfc_w1a1_convert_to_hls_layers():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_streamlined.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_streamlined.onnx"
+    )
     model = model.transform(ConvertBipolarMatMulToXnorPopcount())
     model = model.transform(absorb.AbsorbAddIntoMultiThreshold())
     model = model.transform(absorb.AbsorbMulIntoMultiThreshold())
@@ -118,18 +119,22 @@ def test_end2end_tfc_w1a1_convert_to_hls_layers():
 
 
 def test_end2end_tfc_w1a1_create_dataflow_partition():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_hls_layers.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_hls_layers.onnx"
+    )
     parent_model = model.transform(CreateDataflowPartition())
     parent_model.save(build_dir + "/end2end_tfc_w1a1_dataflow_parent.onnx")
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
     dataflow_model_filename = sdp_node.get_nodeattr("model")
-    dataflow_model = ModelWrapper(dataflow_model_filename)
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
     dataflow_model.save(build_dir + "/end2end_tfc_w1a1_dataflow_model.onnx")
 
 
 def test_end2end_tfc_w1a1_fold_and_tlastmarker():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_dataflow_model.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_dataflow_model.onnx"
+    )
     fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
     # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
     config = [
@@ -153,23 +158,27 @@ def test_end2end_tfc_w1a1_fold_and_tlastmarker():
     model.save(build_dir + "/end2end_tfc_w1a1_folded.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_tfc_w1a1_gen_hls_ip():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_folded.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_folded.onnx")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_tfc_w1a1_ipgen.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a1_ip_stitch():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_ipgen.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model.save(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a1_verify_dataflow_part():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx")
     x = np.zeros((1, 784), dtype=np.float32)
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
@@ -196,9 +205,12 @@ def test_end2end_tfc_w1a1_verify_dataflow_part():
     assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a1_verify_all():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a1_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
@@ -209,22 +221,31 @@ def test_end2end_tfc_w1a1_verify_all():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
+    load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_ipstitch_cppsim.onnx")
     sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a1_ipstitch_cppsim.onnx")
     ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
     y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_ipstitch_nodebynode_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w1a1_ipstitch_nodebynode_rtlsim.onnx"
     )
     ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname]
     # produce results with whole-network (stitched ip) rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w1a1_ipstitch_whole_rtlsim.onnx"
     )
@@ -235,27 +256,34 @@ def test_end2end_tfc_w1a1_verify_all():
     assert np.isclose(y_golden, y_whole_rtlsim).all()
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a1_make_pynq_proj():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_ipstitch.onnx")
     model = model.transform(MakePYNQProject(test_pynq_board))
     model.save(build_dir + "/end2end_tfc_w1a1_pynq_project.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_tfc_w1a1_synth_pynq_project():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_pynq_project.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_pynq_project.onnx"
+    )
     model = model.transform(SynthPYNQProject())
     model = model.transform(AnnotateResources("synth"))
     model.save(build_dir + "/end2end_tfc_w1a1_synth.onnx")
 
 
 def test_end2end_tfc_w1a1_make_driver():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_synth.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_synth.onnx")
     model = model.transform(MakePYNQDriver())
     model.save(build_dir + "/end2end_tfc_w1a1_pynq_driver.onnx")
 
 
 def test_end2end_tfc_w1a1_deploy_on_pynq():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_pynq_driver.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_pynq_driver.onnx"
+    )
     try:
         ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
         if ip == "":
@@ -273,7 +301,9 @@ def test_end2end_tfc_w1a1_deploy_on_pynq():
 
 def test_end2end_tfc_w1a1_run_on_pynq():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a1_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
@@ -285,7 +315,9 @@ def test_end2end_tfc_w1a1_run_on_pynq():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a1_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a1_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     try:
@@ -295,11 +327,12 @@ def test_end2end_tfc_w1a1_run_on_pynq():
         # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a1_pynq_deploy.onnx")
         sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a1_pynq_deploy.onnx")
         ret = execute_onnx(parent_model, {iname: x}, True)
         y = ret[oname]
         assert np.isclose(y, y_golden).all()
-        child_model = ModelWrapper(sdp_node.get_nodeattr("model"))
+        child_model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
         res = throughput_test(child_model)
         assert res is not None
 
diff --git a/tests/end2end/test_end2end_tfc_w1a2.py b/tests/end2end/test_end2end_tfc_w1a2.py
index 5ee2942845c41f4c6705b4ee3ecee89154d9faa9..d4c005a86580fb36e735beb00717fcfdffff21e5 100644
--- a/tests/end2end/test_end2end_tfc_w1a2.py
+++ b/tests/end2end/test_end2end_tfc_w1a2.py
@@ -39,7 +39,6 @@ import onnx  # NOQA
 import onnx.numpy_helper as nph
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.fold_constants import FoldConstants
@@ -67,7 +66,7 @@ from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
 from finn.util.basic import pynq_part_map
-from finn.util.test import get_test_model_trained
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
 from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 
@@ -88,7 +87,7 @@ def test_end2end_tfc_w1a2_export():
 
 
 def test_end2end_tfc_w1a2_import_and_tidy():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_export.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_export.onnx")
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
     model = model.transform(GiveUniqueNodeNames())
@@ -98,30 +97,36 @@ def test_end2end_tfc_w1a2_import_and_tidy():
 
 
 def test_end2end_tfc_w1a2_streamline():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_tidy.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_tidy.onnx")
     model = model.transform(Streamline())
     model.save(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
 
 
 def test_end2end_tfc_w1a2_convert_to_hls_layers():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_streamlined.onnx"
+    )
     model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
     model.save(build_dir + "/end2end_tfc_w1a2_hls_layers.onnx")
 
 
 def test_end2end_tfc_w1a2_create_dataflow_partition():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_hls_layers.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_hls_layers.onnx"
+    )
     parent_model = model.transform(CreateDataflowPartition())
     parent_model.save(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx")
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
     dataflow_model_filename = sdp_node.get_nodeattr("model")
-    dataflow_model = ModelWrapper(dataflow_model_filename)
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
     dataflow_model.save(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx")
 
 
 def test_end2end_tfc_w1a2_fold_and_tlastmarker():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_dataflow_model.onnx"
+    )
     fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
     # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
     config = [
@@ -145,23 +150,27 @@ def test_end2end_tfc_w1a2_fold_and_tlastmarker():
     model.save(build_dir + "/end2end_tfc_w1a2_folded.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_tfc_w1a2_gen_hls_ip():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_folded.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_folded.onnx")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_tfc_w1a2_ipgen.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a2_ip_stitch():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipgen.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model.save(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a2_verify_dataflow_part():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
     x = np.zeros((1, 784), dtype=np.float32)
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
@@ -188,9 +197,12 @@ def test_end2end_tfc_w1a2_verify_dataflow_part():
     assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a2_verify_all():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
@@ -201,22 +213,31 @@ def test_end2end_tfc_w1a2_verify_all():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
+    load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx")
     sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_ipstitch_cppsim.onnx")
     ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
     y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w1a2_ipstitch_nodebynode_rtlsim.onnx"
     )
     ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname]
     # produce results with whole-network (stitched ip) rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w1a2_ipstitch_whole_rtlsim.onnx"
     )
@@ -227,27 +248,34 @@ def test_end2end_tfc_w1a2_verify_all():
     assert np.isclose(y_golden, y_whole_rtlsim).all()
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w1a2_make_pynq_proj():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_ipstitch.onnx")
     model = model.transform(MakePYNQProject(test_pynq_board))
     model.save(build_dir + "/end2end_tfc_w1a2_pynq_project.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_tfc_w1a2_synth_pynq_project():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_pynq_project.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_pynq_project.onnx"
+    )
     model = model.transform(SynthPYNQProject())
     model = model.transform(AnnotateResources("synth"))
     model.save(build_dir + "/end2end_tfc_w1a2_synth.onnx")
 
 
 def test_end2end_tfc_w1a2_make_driver():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_synth.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_synth.onnx")
     model = model.transform(MakePYNQDriver())
     model.save(build_dir + "/end2end_tfc_w1a2_pynq_driver.onnx")
 
 
 def test_end2end_tfc_w1a2_deploy_on_pynq():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_pynq_driver.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_pynq_driver.onnx"
+    )
     try:
         ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
         if ip == "":
@@ -265,7 +293,9 @@ def test_end2end_tfc_w1a2_deploy_on_pynq():
 
 def test_end2end_tfc_w1a2_run_on_pynq():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_tfc_w1a2_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
@@ -277,7 +307,9 @@ def test_end2end_tfc_w1a2_run_on_pynq():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w1a2_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     try:
@@ -287,6 +319,7 @@ def test_end2end_tfc_w1a2_run_on_pynq():
         # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx")
         sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w1a2_pynq_deploy.onnx")
         ret = execute_onnx(parent_model, {iname: x}, True)
         y = ret[oname]
diff --git a/tests/end2end/test_end2end_tfc_w2a2.py b/tests/end2end/test_end2end_tfc_w2a2.py
index 2477318efd1e02b0865dadb40bad1a74ac8ea0b4..19d3f86e046658c4080d71984df1cff74008adab 100644
--- a/tests/end2end/test_end2end_tfc_w2a2.py
+++ b/tests/end2end/test_end2end_tfc_w2a2.py
@@ -39,7 +39,6 @@ import onnx  # NOQA
 import onnx.numpy_helper as nph
 
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
-from finn.core.modelwrapper import ModelWrapper
 from finn.core.onnx_exec import execute_onnx
 from finn.custom_op.registry import getCustomOp
 from finn.transformation.fold_constants import FoldConstants
@@ -67,7 +66,7 @@ from finn.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.infer_shapes import InferShapes
 from finn.transformation.streamline import Streamline
 from finn.util.basic import pynq_part_map
-from finn.util.test import get_test_model_trained
+from finn.util.test import get_test_model_trained, load_test_checkpoint_or_skip
 from finn.transformation.fpgadataflow.annotate_resources import AnnotateResources
 from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
 
@@ -88,7 +87,7 @@ def test_end2end_tfc_w2a2_export():
 
 
 def test_end2end_tfc_w2a2_import_and_tidy():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_export.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_export.onnx")
     model = model.transform(InferShapes())
     model = model.transform(FoldConstants())
     model = model.transform(GiveUniqueNodeNames())
@@ -98,30 +97,36 @@ def test_end2end_tfc_w2a2_import_and_tidy():
 
 
 def test_end2end_tfc_w2a2_streamline():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_tidy.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_tidy.onnx")
     model = model.transform(Streamline())
     model.save(build_dir + "/end2end_tfc_w2a2_streamlined.onnx")
 
 
 def test_end2end_tfc_w2a2_convert_to_hls_layers():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_streamlined.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_streamlined.onnx"
+    )
     model = model.transform(to_hls.InferQuantizedStreamingFCLayer(mem_mode))
     model.save(build_dir + "/end2end_tfc_w2a2_hls_layers.onnx")
 
 
 def test_end2end_tfc_w2a2_create_dataflow_partition():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_hls_layers.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_hls_layers.onnx"
+    )
     parent_model = model.transform(CreateDataflowPartition())
     parent_model.save(build_dir + "/end2end_tfc_w2a2_dataflow_parent.onnx")
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
     dataflow_model_filename = sdp_node.get_nodeattr("model")
-    dataflow_model = ModelWrapper(dataflow_model_filename)
+    dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename)
     dataflow_model.save(build_dir + "/end2end_tfc_w2a2_dataflow_model.onnx")
 
 
 def test_end2end_tfc_w2a2_fold_and_tlastmarker():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_dataflow_model.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_dataflow_model.onnx"
+    )
     fc_layers = model.get_nodes_by_op_type("StreamingFCLayer_Batch")
     # (PE, SIMD, in_fifo_depth, out_fifo_depth, ramstyle) for each layer
     config = [
@@ -145,23 +150,27 @@ def test_end2end_tfc_w2a2_fold_and_tlastmarker():
     model.save(build_dir + "/end2end_tfc_w2a2_folded.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_tfc_w2a2_gen_hls_ip():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_folded.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_folded.onnx")
     model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
     model = model.transform(HLSSynthIP())
     model = model.transform(AnnotateResources("hls"))
     model.save(build_dir + "/end2end_tfc_w2a2_ipgen.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w2a2_ip_stitch():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_ipgen.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_ipgen.onnx")
     model = model.transform(ReplaceVerilogRelPaths())
     model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns))
     model.save(build_dir + "/end2end_tfc_w2a2_ipstitch.onnx")
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w2a2_verify_dataflow_part():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_ipstitch.onnx")
     x = np.zeros((1, 784), dtype=np.float32)
     inp_name = model.graph.input[0].name
     out_name = model.graph.output[0].name
@@ -188,9 +197,12 @@ def test_end2end_tfc_w2a2_verify_dataflow_part():
     assert np.isclose(res_cppsim, res_rtlsim_whole).all()
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w2a2_verify_all():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_tfc_w2a2_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
@@ -201,22 +213,31 @@ def test_end2end_tfc_w2a2_verify_all():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     # produce results with cppsim
     sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
     sdp_node = getCustomOp(sdp_node)
+    load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_ipstitch_cppsim.onnx")
     sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w2a2_ipstitch_cppsim.onnx")
     ret_cppsim = execute_onnx(parent_model, {iname: x}, True)
     y_cppsim = ret_cppsim[oname]
     # produce results with node-by-node rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_ipstitch_nodebynode_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w2a2_ipstitch_nodebynode_rtlsim.onnx"
     )
     ret_nodebynode_rtlsim = execute_onnx(parent_model, {iname: x}, True)
     y_nodebynode_rtlsim = ret_nodebynode_rtlsim[oname]
     # produce results with whole-network (stitched ip) rtlsim
+    load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_ipstitch_whole_rtlsim.onnx"
+    )
     sdp_node.set_nodeattr(
         "model", build_dir + "/end2end_tfc_w2a2_ipstitch_whole_rtlsim.onnx"
     )
@@ -227,27 +248,34 @@ def test_end2end_tfc_w2a2_verify_all():
     assert np.isclose(y_golden, y_whole_rtlsim).all()
 
 
+@pytest.mark.vivado
 def test_end2end_tfc_w2a2_make_pynq_proj():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_ipstitch.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_ipstitch.onnx")
     model = model.transform(MakePYNQProject(test_pynq_board))
     model.save(build_dir + "/end2end_tfc_w2a2_pynq_project.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_end2end_tfc_w2a2_synth_pynq_project():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_pynq_project.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_pynq_project.onnx"
+    )
     model = model.transform(SynthPYNQProject())
     model = model.transform(AnnotateResources("synth"))
     model.save(build_dir + "/end2end_tfc_w2a2_synth.onnx")
 
 
 def test_end2end_tfc_w2a2_make_driver():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_synth.onnx")
+    model = load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_synth.onnx")
     model = model.transform(MakePYNQDriver())
     model.save(build_dir + "/end2end_tfc_w2a2_pynq_driver.onnx")
 
 
 def test_end2end_tfc_w2a2_deploy_on_pynq():
-    model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_pynq_driver.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_pynq_driver.onnx"
+    )
     try:
         ip = os.environ["PYNQ_IP"]  # no fault for this one; skip if not defined
         if ip == "":
@@ -265,7 +293,9 @@ def test_end2end_tfc_w2a2_deploy_on_pynq():
 
 def test_end2end_tfc_w2a2_run_on_pynq():
     # use the streamlined model as the "golden" model for right answers
-    golden = ModelWrapper(build_dir + "/end2end_tfc_w2a2_streamlined.onnx")
+    golden = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_streamlined.onnx"
+    )
     iname = golden.graph.input[0].name
     oname = golden.graph.output[0].name
     raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb")
@@ -277,7 +307,9 @@ def test_end2end_tfc_w2a2_run_on_pynq():
     y_golden = ret_golden[oname]
     # set up parent+child graph to test
     # we'll use models from the previous step as the child model
-    parent_model = ModelWrapper(build_dir + "/end2end_tfc_w2a2_dataflow_parent.onnx")
+    parent_model = load_test_checkpoint_or_skip(
+        build_dir + "/end2end_tfc_w2a2_dataflow_parent.onnx"
+    )
     iname = parent_model.graph.input[0].name
     oname = parent_model.graph.output[0].name
     try:
@@ -287,6 +319,7 @@ def test_end2end_tfc_w2a2_run_on_pynq():
         # produce results with cppsim
         sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0]
         sdp_node = getCustomOp(sdp_node)
+        load_test_checkpoint_or_skip(build_dir + "/end2end_tfc_w2a2_pynq_deploy.onnx")
         sdp_node.set_nodeattr("model", build_dir + "/end2end_tfc_w2a2_pynq_deploy.onnx")
         ret = execute_onnx(parent_model, {iname: x}, True)
         y = ret[oname]
diff --git a/tests/fpgadataflow/test_code_gen_trafo.py b/tests/fpgadataflow/test_code_gen_trafo.py
index 1228a9c79608a1c7eb44900ddb7df54ed900a3c2..24933759830535dfcec768d47a6020b4f3e2de35 100644
--- a/tests/fpgadataflow/test_code_gen_trafo.py
+++ b/tests/fpgadataflow/test_code_gen_trafo.py
@@ -29,13 +29,14 @@
 import os
 
 from onnx import TensorProto, helper
-
+import pytest
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
 from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 
 
+@pytest.mark.vivado
 def test_code_gen_trafo():
     idt = wdt = odt = DataType.BIPOLAR
     mw = 8
diff --git a/tests/fpgadataflow/test_compilation_trafo.py b/tests/fpgadataflow/test_compilation_trafo.py
index 35eed02f4e71a96f9f4e8957c372f93e6cd7927c..65894e02e490f6931e5b03a9aa67b8f22e32583a 100644
--- a/tests/fpgadataflow/test_compilation_trafo.py
+++ b/tests/fpgadataflow/test_compilation_trafo.py
@@ -30,6 +30,7 @@ import os
 
 from onnx import TensorProto, helper
 
+import pytest
 import finn.util.basic as util
 from finn.core.datatype import DataType
 from finn.core.modelwrapper import ModelWrapper
@@ -37,6 +38,7 @@ from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
 from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
 
 
+@pytest.mark.vivado
 def test_compilation_trafo():
     idt = wdt = odt = DataType.BIPOLAR
     mw = 8
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
index 220f8a7966a146f954a7fcb3f32058e231b83e23..e03090f0581eebf68cac7baffb6888a6992df68d 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_cnv.py
@@ -31,7 +31,7 @@ import pkg_resources as pk
 
 import brevitas.onnx as bo
 import numpy as np
-
+import pytest
 import finn.core.onnx_exec as oxe
 import finn.transformation.streamline.absorb as absorb
 from finn.transformation.streamline.reorder import MakeMaxPoolNHWC
@@ -53,6 +53,7 @@ from finn.custom_op.registry import getCustomOp
 export_onnx_path_cnv = "test_output_cnv.onnx"
 
 
+@pytest.mark.vivado
 def test_convert_to_hls_layers_cnv_w1a1():
     cnv = get_test_model_trained("CNV", 1, 1)
     bo.export_finn_onnx(cnv, (1, 3, 32, 32), export_onnx_path_cnv)
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
index b7dea03797bc5de5e7517d0d8b816c438027008b..e261a3114853bf24bdb4c931c46ff92eea4150dd 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
@@ -34,7 +34,7 @@ import numpy as np
 import onnx
 import onnx.numpy_helper as nph
 import torch
-
+import pytest
 import finn.core.onnx_exec as oxe
 import finn.transformation.fpgadataflow.convert_to_hls_layers as to_hls
 import finn.transformation.streamline.absorb as absorb
@@ -56,6 +56,7 @@ export_onnx_path = "test_output_tfc.onnx"
 export_onnx_path_cnv = "test_output_cnv.onnx"
 
 
+@pytest.mark.vivado
 def test_convert_to_hls_layers_tfc_w1a1():
     tfc = get_test_model_trained("TFC", 1, 1)
     bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path)
@@ -125,6 +126,7 @@ def test_convert_to_hls_layers_tfc_w1a1():
     assert np.isclose(produced, expected, atol=1e-3).all()
 
 
+@pytest.mark.vivado
 def test_convert_to_hls_layers_tfc_w1a2():
     tfc = get_test_model_trained("TFC", 1, 2)
     bo.export_finn_onnx(tfc, (1, 1, 28, 28), export_onnx_path)
diff --git a/tests/fpgadataflow/test_create_dataflow_partition.py b/tests/fpgadataflow/test_create_dataflow_partition.py
index 77e0ddeebf6080e1840d6014978a4c9b4a10b5c1..c4f748051ff038371353574298580f3bf9e05e9f 100644
--- a/tests/fpgadataflow/test_create_dataflow_partition.py
+++ b/tests/fpgadataflow/test_create_dataflow_partition.py
@@ -29,7 +29,6 @@
 import os.path
 from pkgutil import get_data
 
-import pytest
 
 from finn.core.modelwrapper import ModelWrapper
 from finn.custom_op.registry import getCustomOp
@@ -38,11 +37,11 @@ from finn.transformation.fpgadataflow.create_dataflow_partition import (
 )
 from finn.transformation.fpgadataflow.insert_tlastmarker import InsertTLastMarker
 from finn.util.basic import make_build_dir
+from finn.util.test import load_test_checkpoint_or_skip
 
 build_dir = make_build_dir("test_dataflow_partition_")
 
 
-@pytest.mark.dependency()
 def test_dataflow_partition_create():
     # load the onnx model
     raw_m = get_data(
@@ -57,9 +56,10 @@ def test_dataflow_partition_create():
     model.save(build_dir + "/test_dataflow_partition_create.onnx")
 
 
-@pytest.mark.dependency(depends=["test_dataflow_partition_create"])
 def test_dataflow_partition_tlastmarker():
-    model = ModelWrapper(build_dir + "/test_dataflow_partition_create.onnx")
+    model = load_test_checkpoint_or_skip(
+        build_dir + "/test_dataflow_partition_create.onnx"
+    )
     model_path = getCustomOp(model.graph.node[2]).get_nodeattr("model")
     model = ModelWrapper(model_path)
     model = model.transform(InsertTLastMarker())
diff --git a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
index 02a9acae5e0e90d2a8dfa7d4d4afb03aa11f4239..5051bf34dc690daf8b6186859d3717cc8e217eee 100644
--- a/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
+++ b/tests/fpgadataflow/test_fpgadataflow_convinputgenerator.py
@@ -137,6 +137,8 @@ def prepare_inputs(input_tensor):
 @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"])
 # input channel parallelism ("SIMD")
 @pytest.mark.parametrize("simd", [1, 2])
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_slidingwindow(idt, k, ifm_dim, ifm_ch, stride, exec_mode, simd):
     ofm_dim = int(((ifm_dim - k) / stride) + 1)
 
diff --git a/tests/fpgadataflow/test_fpgadataflow_dwc.py b/tests/fpgadataflow/test_fpgadataflow_dwc.py
index a230fb4201673e3bf0a31cf9ec82f21250fd9e40..1d83f7a23cd3bad757e772055d242799cf22b0da 100644
--- a/tests/fpgadataflow/test_fpgadataflow_dwc.py
+++ b/tests/fpgadataflow/test_fpgadataflow_dwc.py
@@ -55,6 +55,8 @@ def prepare_inputs(input_tensor, dt):
 @pytest.mark.parametrize("OUTWidth", [2, 4])
 # finn_dtype
 @pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR, DataType.INT2])
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_dwc_rtlsim(Shape, INWidth, OUTWidth, finn_dtype):
 
     # generate input data
diff --git a/tests/fpgadataflow/test_fpgadataflow_fclayer.py b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
index 416d96d5dbfa1125d878eb8339ae38f5d572d1ce..fc5cdb7745945bee99564ba9ab19423a66d8e035 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fclayer.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fclayer.py
@@ -149,6 +149,8 @@ def prepare_inputs(input_tensor, idt, wdt):
 @pytest.mark.parametrize("mw", [16])
 # HLS matrix height (output features)
 @pytest.mark.parametrize("mh", [16])
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
     if nf == -1:
         nf = mh
@@ -234,6 +236,8 @@ def test_fpgadataflow_fclayer_cppsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 @pytest.mark.parametrize("mw", [16])
 # HLS matrix height (output features)
 @pytest.mark.parametrize("mh", [16])
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
     if nf == -1:
         nf = mh
@@ -324,6 +328,7 @@ def test_fpgadataflow_fclayer_rtlsim(mem_mode, idt, wdt, act, nf, sf, mw, mh):
 @pytest.mark.parametrize("mw", [128])
 # HLS matrix height (output features)
 @pytest.mark.parametrize("mh", [128])
+@pytest.mark.vivado
 def test_fpgadataflow_fclayer_large_depth_decoupled_mode(
     mem_mode, idt, wdt, act, nf, sf, mw, mh
 ):
diff --git a/tests/fpgadataflow/test_fpgadataflow_fifo.py b/tests/fpgadataflow/test_fpgadataflow_fifo.py
index 9158a0b0e72017b2468627e4f30fd3432c418d38..94090a47ad64fc377530e6e21d35661e1d92b5a6 100644
--- a/tests/fpgadataflow/test_fpgadataflow_fifo.py
+++ b/tests/fpgadataflow/test_fpgadataflow_fifo.py
@@ -76,6 +76,8 @@ def prepare_inputs(input_tensor, dt):
 @pytest.mark.parametrize("depth", [16])
 # finn_dtype
 @pytest.mark.parametrize("finn_dtype", [DataType.BIPOLAR])  # , DataType.INT2])
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_fifo_rtlsim(Shape, folded_shape, depth, finn_dtype):
 
     # generate input data
diff --git a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
index 30b86d639ae52143320dfdfeb25488bae865b4d2..16100522aa94fd25d234efa1d03edfdc866ca1bb 100644
--- a/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
+++ b/tests/fpgadataflow/test_fpgadataflow_ip_stitch.py
@@ -52,6 +52,7 @@ import finn.transformation.fpgadataflow.replace_verilog_relpaths as rvp
 from finn.transformation.general import GiveUniqueNodeNames
 from finn.util.basic import gen_finn_dt_tensor, pynq_part_map
 from finn.util.fpgadataflow import pyverilate_stitched_ip
+from finn.util.test import load_test_checkpoint_or_skip
 
 test_pynq_board = os.getenv("PYNQ_BOARD", default="Pynq-Z1")
 test_fpga_part = pynq_part_map[test_pynq_board]
@@ -198,13 +199,14 @@ def create_two_fc_model():
 
 # exec_mode of StreamingDataflowPartition
 # @pytest.mark.parametrize("exec_mode", ["remote_pynq"]) #, "rtlsim"])
+@pytest.mark.vivado
 def test_fpgadataflow_ipstitch_gen_model():  # exec_mode):
     model = create_one_fc_model()
     if model.graph.node[0].op_type == "StreamingDataflowPartition":
         sdp_node = getCustomOp(model.graph.node[0])
         assert sdp_node.__class__.__name__ == "StreamingDataflowPartition"
         assert os.path.isfile(sdp_node.get_nodeattr("model"))
-        model = ModelWrapper(sdp_node.get_nodeattr("model"))
+        model = load_test_checkpoint_or_skip(sdp_node.get_nodeattr("model"))
         model.set_metadata_prop("exec_mode", "remote_pynq")
     model = model.transform(InsertTLastMarker())
     model = model.transform(GiveUniqueNodeNames())
@@ -215,8 +217,9 @@ def test_fpgadataflow_ipstitch_gen_model():  # exec_mode):
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx")
 
 
+@pytest.mark.vivado
 def test_fpgadataflow_ipstitch_do_stitch():
-    model = ModelWrapper(
+    model = load_test_checkpoint_or_skip(
         ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_gen_model.onnx"
     )
     model = model.transform(rvp.ReplaceVerilogRelPaths())
@@ -231,8 +234,11 @@ def test_fpgadataflow_ipstitch_do_stitch():
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx")
 
 
+@pytest.mark.vivado
 def test_fpgadataflow_ipstitch_rtlsim():
-    model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx")
+    model = load_test_checkpoint_or_skip(
+        ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx"
+    )
     model.set_metadata_prop("rtlsim_trace", "whole_trace.vcd")
     sim = pyverilate_stitched_ip(model)
     exp_io = [
@@ -275,8 +281,11 @@ def test_fpgadataflow_ipstitch_rtlsim():
     assert (rtlsim_res == x).all()
 
 
+@pytest.mark.vivado
 def test_fpgadataflow_ipstitch_pynq_projgen():
-    model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx")
+    model = load_test_checkpoint_or_skip(
+        ip_stitch_model_dir + "/test_fpgadataflow_ip_stitch.onnx"
+    )
     model = model.transform(MakePYNQProject(test_pynq_board))
     vivado_pynq_proj_dir = model.get_metadata_prop("vivado_pynq_proj")
     assert vivado_pynq_proj_dir is not None
@@ -284,8 +293,12 @@ def test_fpgadataflow_ipstitch_pynq_projgen():
     model.save(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx")
 
 
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_ipstitch_pynq_synth():
-    model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx")
+    model = load_test_checkpoint_or_skip(
+        ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx"
+    )
     model = model.transform(SynthPYNQProject())
     bitfile = model.get_metadata_prop("vivado_pynq_bitfile")
     assert bitfile is not None
@@ -294,7 +307,9 @@ def test_fpgadataflow_ipstitch_pynq_synth():
 
 
 def test_fpgadataflow_ipstitch_pynq_driver():
-    model = ModelWrapper(ip_stitch_model_dir + "/test_fpgadataflow_pynq_projgen.onnx")
+    model = load_test_checkpoint_or_skip(
+        ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_synth.onnx"
+    )
     model = model.transform(MakePYNQDriver())
     driver_dir = model.get_metadata_prop("pynq_driver_dir")
     assert driver_dir is not None
@@ -303,13 +318,13 @@ def test_fpgadataflow_ipstitch_pynq_driver():
 
 
 def test_fpgadataflow_ipstitch_pynq_deployment_folder():
+    model = load_test_checkpoint_or_skip(
+        ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_driver.onnx"
+    )
     try:
         ip = os.environ["PYNQ_IP"]  # no default for this one; skip if not defined
         if ip == "":
             pytest.skip("PYNQ board IP address not specified")
-        model = ModelWrapper(
-            ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_driver.onnx"
-        )
         username = os.getenv("PYNQ_USERNAME", "xilinx")
         password = os.getenv("PYNQ_PASSWORD", "xilinx")
         port = os.getenv("PYNQ_PORT", 22)
@@ -341,7 +356,7 @@ def test_fpgadataflow_ipstitch_remote_execution():
         ip = os.environ["PYNQ_IP"]  # NOQA
         if ip == "":
             pytest.skip("PYNQ board IP address not specified")
-        model = ModelWrapper(
+        model = load_test_checkpoint_or_skip(
             ip_stitch_model_dir + "/test_fpgadataflow_ipstitch_pynq_deployment.onnx"
         )
         iname = "inp"
diff --git a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
index ac4ab33469c7720c3d7b9f30f5d13be888e1439d..bda66bebbd93d346eb0026b17cbaff9a7ca5df5e 100644
--- a/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
+++ b/tests/fpgadataflow/test_layer_streaming_maxpool_batch.py
@@ -121,6 +121,8 @@ def prepare_inputs(input_tensor):
 @pytest.mark.parametrize("ifm_ch", [1, 2])  # , 2, 3, 4])
 # execution mode
 @pytest.mark.parametrize("exec_mode", ["rtlsim", "cppsim"])
+@pytest.mark.slow
+@pytest.mark.vivado
 def test_fpgadataflow_streamingmaxpool(idt, k, ifm_dim, ifm_ch, exec_mode):
     stride = k
     ofm_dim = int(((ifm_dim - k) / stride) + 1)
diff --git a/tests/transformation/test_infer_datatypes.py b/tests/transformation/test_infer_datatypes.py
index 77b6a94f8ed891a4fe761fe864a6e18d35e84382..e3db40289c4318894cf5ad41c2f67b3bff501db9 100644
--- a/tests/transformation/test_infer_datatypes.py
+++ b/tests/transformation/test_infer_datatypes.py
@@ -54,8 +54,8 @@ def test_infer_datatypes():
     assert model.get_tensor_datatype("MatMul_1_out0") == DataType.INT32
     assert model.get_tensor_datatype("MatMul_2_out0") == DataType.INT32
     assert model.get_tensor_datatype("MatMul_3_out0") == DataType.INT32
-    assert model.get_tensor_datatype("Sign_0_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("Sign_1_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("Sign_2_out0") == DataType.BIPOLAR
-    assert model.get_tensor_datatype("Sign_3_out0") == DataType.BIPOLAR
+    assert model.get_tensor_datatype("MultiThreshold_0_out0") == DataType.BIPOLAR
+    assert model.get_tensor_datatype("MultiThreshold_1_out0") == DataType.BIPOLAR
+    assert model.get_tensor_datatype("MultiThreshold_2_out0") == DataType.BIPOLAR
+    assert model.get_tensor_datatype("MultiThreshold_3_out0") == DataType.BIPOLAR
     os.remove(export_onnx_path)
diff --git a/tests/util/test_data_packing.py b/tests/util/test_data_packing.py
index 28f1d56d0dbc5451ccad3d36b4b1d4c6bed4f63e..7b77c4be20c1f41c11b53a9b65b79441c9bbbe47 100644
--- a/tests/util/test_data_packing.py
+++ b/tests/util/test_data_packing.py
@@ -47,6 +47,7 @@ from finn.util.data_packing import (
 
 @pytest.mark.parametrize("dtype", [DataType.BINARY, DataType.INT2, DataType.INT32])
 @pytest.mark.parametrize("test_shape", [(1, 2, 4), (1, 1, 64), (2, 64)])
+@pytest.mark.vivado
 def test_npy2apintstream(test_shape, dtype):
     ndarray = cutil.gen_finn_dt_tensor(dtype, test_shape)
     test_dir = cutil.make_build_dir(prefix="test_npy2apintstream_")