diff --git a/src/finn/core/datatype.py b/src/finn/core/datatype.py
index 222d11a8872f9be757fd60fbfa5f8abea683311a..df895a1ad446d6b2cc3ebb24f1179944f4cfe9ab 100644
--- a/src/finn/core/datatype.py
+++ b/src/finn/core/datatype.py
@@ -50,17 +50,69 @@ class DataType(Enum):
     UINT2 = auto()
     UINT3 = auto()
     UINT4 = auto()
+    UINT5 = auto()
+    UINT6 = auto()
+    UINT7 = auto()
     UINT8 = auto()
+    UINT9 = auto()
+    UINT10 = auto()
+    UINT11 = auto()
+    UINT12 = auto()
+    UINT13 = auto()
+    UINT14 = auto()
+    UINT15 = auto()
     UINT16 = auto()
+    UINT17 = auto()
+    UINT18 = auto()
+    UINT19 = auto()
+    UINT20 = auto()
+    UINT21 = auto()
+    UINT22 = auto()
+    UINT23 = auto()
+    UINT24 = auto()
+    UINT25 = auto()
+    UINT26 = auto()
+    UINT27 = auto()
+    UINT28 = auto()
+    UINT29 = auto()
+    UINT30 = auto()
+    UINT31 = auto()
     UINT32 = auto()
+    UINT64 = auto()
     BIPOLAR = auto()
     TERNARY = auto()
     INT2 = auto()
     INT3 = auto()
     INT4 = auto()
+    INT5 = auto()
+    INT6 = auto()
+    INT7 = auto()
     INT8 = auto()
+    INT9 = auto()
+    INT10 = auto()
+    INT11 = auto()
+    INT12 = auto()
+    INT13 = auto()
+    INT14 = auto()
+    INT15 = auto()
     INT16 = auto()
+    INT17 = auto()
+    INT18 = auto()
+    INT19 = auto()
+    INT20 = auto()
+    INT21 = auto()
+    INT22 = auto()
+    INT23 = auto()
+    INT24 = auto()
+    INT25 = auto()
+    INT26 = auto()
+    INT27 = auto()
+    INT28 = auto()
+    INT29 = auto()
+    INT30 = auto()
+    INT31 = auto()
     INT32 = auto()
+    INT64 = auto()
     FLOAT32 = auto()
 
     def bitwidth(self):
diff --git a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
index 72aa322e0e44a6f4a5c11025d94bdfeb820338a3..bc266e4934c41d6f5f1261e0a30e90cb72ba83a8 100644
--- a/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
+++ b/src/finn/custom_op/fpgadataflow/streamingfclayer_batch.py
@@ -39,6 +39,7 @@ from finn.custom_op.fpgadataflow import HLSCustomOp
 from finn.util.basic import (
     interleave_matrix_outer_dim_from_partitions,
     roundup_to_integer_multiple,
+    calculate_matvec_accumulator_range,
 )
 from finn.util.data_packing import (
     npy_to_rtlsim_input,
@@ -75,6 +76,8 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             "inputDataType": ("s", True, ""),
             "weightDataType": ("s", True, ""),
             "outputDataType": ("s", True, ""),
+            # FINN DataType for accumulator -- auto-computed and updated
+            "accDataType": ("s", False, "DataType.INT32"),
             # use xnor-popcount for binary weights/inputs, thus treating them
             # as bipolar
             "binaryXnorMode": ("i", False, 0),
@@ -444,6 +447,47 @@ class StreamingFCLayer_Batch(HLSCustomOp):
         ret = np.flip(ret, axis=-1)
         return ret
 
+    def minimize_accumulator_width(self, model):
+        weights = model.get_initializer(self.onnx_node.input[1])
+        if len(self.onnx_node.input) > 2:
+            thresholds = model.get_initializer(self.onnx_node.input[2])
+        else:
+            thresholds = None
+        idt = self.get_input_datatype()
+        # calculate minimum and maximum values of accumulator
+        (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt)
+        if thresholds is not None:
+            threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
+            # set threshold datatype (and accumulator datatype implicitly)
+            min_threshold = thresholds.min()
+            max_threshold = thresholds.max()
+            # get range required by threshold values
+            tdt_min = min(acc_min, min_threshold)
+            tdt_max = max(acc_max, max_threshold)
+            if tdt_min < 0:
+                if abs(tdt_min) > tdt_max:
+                    tdt = DataType.get_smallest_possible(tdt_min)
+                else:
+                    tdt = DataType.get_smallest_possible(0 - tdt_max)
+            else:
+                tdt = DataType.get_smallest_possible(tdt_max)
+            assert np.vectorize(tdt.allowed)(
+                threshold_tensor
+            ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
+            self.set_nodeattr("accDataType", tdt.name)
+        else:
+            if acc_min < 0:
+                if abs(acc_min) > acc_max:
+                    adt = DataType.get_smallest_possible(acc_min)
+                else:
+                    adt = DataType.get_smallest_possible(0 - acc_max)
+            else:
+                adt = DataType.get_smallest_possible(acc_max)
+            self.set_nodeattr("accDataType", adt.name)
+            # for no-activation nodes, output dt = acc dt
+            self.set_nodeattr("outputDataType", adt.name)
+        return DataType[self.get_nodeattr("accDataType")]
+
     def get_hls_compatible_threshold_tensor(self, orig_thres_matrix):
         """Convert the original numpy weight matrix orig_weight_matrix into
         a form suitable for passing to the hlslib call:
@@ -605,7 +649,6 @@ class StreamingFCLayer_Batch(HLSCustomOp):
             thresholds = model.get_initializer(self.onnx_node.input[2])
             if thresholds is not None:
                 threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
-                tdt = DataType.INT32
                 # use UINT32 threshold export for bipolar times bipolar
                 inp_is_bipolar = self.get_input_datatype() == DataType.BIPOLAR
                 wt_is_bipolar = self.get_weight_datatype() == DataType.BIPOLAR
@@ -615,11 +658,12 @@ class StreamingFCLayer_Batch(HLSCustomOp):
                 bin_xnor_mode = self.get_nodeattr("binaryXnorMode") == 1
                 inp_is_bipolar = inp_is_bipolar or (inp_is_binary and bin_xnor_mode)
                 wt_is_bipolar = wt_is_bipolar or (wt_is_binary and bin_xnor_mode)
-                if inp_is_bipolar and wt_is_bipolar:
-                    tdt = DataType.UINT32
+                # get computed threshold datatype from attribute
+                tdt = DataType[self.get_nodeattr("accDataType")]
+
                 assert np.vectorize(tdt.allowed)(
                     threshold_tensor
-                ).all(), "Thresholds are not int"
+                ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
                 thresholds_hls_code = numpy_to_hls_code(
                     threshold_tensor, tdt, "thresholds", False, True
                 )
diff --git a/src/finn/custom_op/fpgadataflow/thresholding_batch.py b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
index 379ebd92d86d54c6bc621c7f89b01eacba2b5d3f..562bab0f18990096f7364b3a4e2bcbbbf4ce2b58 100644
--- a/src/finn/custom_op/fpgadataflow/thresholding_batch.py
+++ b/src/finn/custom_op/fpgadataflow/thresholding_batch.py
@@ -283,10 +283,25 @@ class Thresholding_Batch(HLSCustomOp):
         thresholds = model.get_initializer(self.onnx_node.input[1])
 
         threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds)
-        tdt = DataType.INT32
+
+        min_threshold = thresholds.min()
+        max_threshold = thresholds.max()
+        min_input = self.get_input_datatype().min()
+        max_input = self.get_input_datatype().max()
+        # get range required by threshold values
+        tdt_min = min(min_input, min_threshold)
+        tdt_max = max(max_input, max_threshold)
+        if tdt_min < 0:
+            if abs(tdt_min) > tdt_max:
+                tdt = DataType.get_smallest_possible(tdt_min)
+            else:
+                tdt = DataType.get_smallest_possible(0 - tdt_max - 1)
+        else:
+            tdt = DataType.get_smallest_possible(tdt_max)
         assert np.vectorize(tdt.allowed)(
             threshold_tensor
-        ).all(), "Thresholds are not int"
+        ).all(), "Thresholds can't be expressed with type %s" % str(tdt)
+
         thresholds_hls_code = numpy_to_hls_code(
             threshold_tensor, tdt, "thresholds", False, True
         )
diff --git a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
index e6dca0e4b05f943c971bc0f97af03f5038fd0dab..88f5fa926f73d5cb1919a02c83153cb8d1894711 100644
--- a/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
+++ b/src/finn/transformation/fpgadataflow/convert_to_hls_layers.py
@@ -40,6 +40,9 @@ from finn.transformation.general import SortGraph
 import finn.core.data_layout as DataLayout
 from finn.util.onnx import nchw_to_nhwc
 from finn.util.basic import get_by_name
+from finn.transformation.fpgadataflow.minimize_accumulator_width import (
+    MinimizeAccumulatorWidth,
+)
 
 
 class InferConvInpGen(Transformation):
@@ -489,6 +492,7 @@ class InferBinaryStreamingFCLayer(Transformation):
                     graph.node.remove(n)
                     graph_modified = True
         if graph_modified:
+            model = model.transform(MinimizeAccumulatorWidth())
             model = model.transform(InferShapes())
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
@@ -623,6 +627,7 @@ class InferQuantizedStreamingFCLayer(Transformation):
                         graph.node.remove(n)
                         graph_modified = True
         if graph_modified:
+            model = model.transform(MinimizeAccumulatorWidth())
             model = model.transform(InferShapes())
             model = model.transform(InferDataTypes())
         return (model, graph_modified)
diff --git a/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c54a5efbd3b28f0fbfd074b512929edab234e78
--- /dev/null
+++ b/src/finn/transformation/fpgadataflow/minimize_accumulator_width.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2020, Xilinx
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of FINN nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from finn.custom_op.registry import getCustomOp
+from finn.transformation import Transformation
+from finn.util.fpgadataflow import is_fpgadataflow_node
+
+
+class MinimizeAccumulatorWidth(Transformation):
+    """For relevant nodes, call the accumulator width minimization
+    functions to save on resources. May alter tensor DataType for
+    certain nodes if they produce an accumulator as result."""
+
+    def __init__(self):
+        super().__init__()
+
+    def apply(self, model):
+        for node in model.graph.node:
+            if is_fpgadataflow_node(node) is True:
+                inst = getCustomOp(node)
+                if hasattr(inst, "minimize_accumulator_width"):
+                    inst.minimize_accumulator_width(model)
+        return (model, False)
diff --git a/src/finn/transformation/streamline/round_thresholds.py b/src/finn/transformation/streamline/round_thresholds.py
index c33281d85449c173a4631297fd1d67ac0aed8c81..8626ef40619b067c6672c9017ddcb747998c3f2c 100644
--- a/src/finn/transformation/streamline/round_thresholds.py
+++ b/src/finn/transformation/streamline/round_thresholds.py
@@ -51,10 +51,20 @@ class RoundAndClipThresholds(Transformation):
                     model.set_tensor_datatype(n.input[1], idtype)
                     graph_modified = True
                 if idtype.is_integer() and not idtype.signed() and (Tnew < 0).any():
-                    # clip any negative thresholds
+                    # clip any negative thresholds if input is unsigned
                     Tnew = np.clip(Tnew, 0, None)
                     model.set_initializer(n.input[1], Tnew)
                     # use same datatype as inputs for thresholds
                     model.set_tensor_datatype(n.input[1], idtype)
                     graph_modified = True
+                if idtype.is_integer() and (
+                    (Tnew < (idtype.min() - 1)).any()
+                    or (Tnew > (idtype.max() + 1)).any()
+                ):
+                    # clip any large thresholds to input range + 1
+                    Tnew = np.clip(Tnew, idtype.min() - 1, idtype.max() + 1)
+                    model.set_initializer(n.input[1], Tnew)
+                    # use same datatype as inputs for thresholds
+                    model.set_tensor_datatype(n.input[1], idtype)
+                    graph_modified = True
         return (model, graph_modified)
diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py
index 6c92e9b2765b1c2be6f95ee148964bccfb3cd7be..62d5947e3b7e06375cc9d48a2cf32b4f685e7861 100644
--- a/src/finn/util/basic.py
+++ b/src/finn/util/basic.py
@@ -259,6 +259,33 @@ def pad_tensor_to_multiple_of(ndarray, pad_to_dims, val=0, distr_pad=False):
     return ret
 
 
+def calculate_matvec_accumulator_range(matrix, vec_dt):
+    """Calculate the minimum and maximum possible result (accumulator) values
+    for a dot product x * A, given matrix A of dims (MW, MH), and vector (1, MW)
+    with datatype vec_dt. Returns (acc_min, acc_max).
+    """
+    min_weight = matrix.min()
+    max_weight = matrix.max()
+    perceptive_field_elems = matrix.shape[0]
+    min_input = vec_dt.min()
+    max_input = vec_dt.max()
+    # calculate minimum and maximum values of accumulator
+    # assume inputs span the whole range of the input datatype
+    acc_min = perceptive_field_elems * min(
+        min_weight * max_input,
+        min_weight * min_input,
+        max_weight * max_input,
+        max_weight * min_input,
+    )
+    acc_max = perceptive_field_elems * max(
+        min_weight * max_input,
+        min_weight * min_input,
+        max_weight * max_input,
+        max_weight * min_input,
+    )
+    return (acc_min, acc_max)
+
+
 def gen_finn_dt_tensor(finn_dt, tensor_shape):
     """Generates random tensor in given shape and with given FINN DataType."""
     if type(tensor_shape) == list:
diff --git a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
index d77065ad9396d0cc8dd57a39ed823fffcb30ee47..30d5ae64cfec84e089426d389a8cb607cd71c12f 100644
--- a/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
+++ b/tests/fpgadataflow/test_convert_to_hls_layers_fc.py
@@ -123,6 +123,7 @@ def test_convert_to_hls_layers_tfc_w1a1():
     # do forward pass in PyTorch/Brevitas
     expected = tfc.forward(input_tensor).detach().numpy()
     assert np.isclose(produced, expected, atol=1e-3).all()
+    os.remove(export_onnx_path)
 
 
 @pytest.mark.vivado