diff --git a/fetch-repos.sh b/fetch-repos.sh index b0f6400ed142b203b1c9f6d7ea4ac6ababcf34d1..b7b616e166266e810305f543e0ca39276b363372 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -27,7 +27,7 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -QONNX_COMMIT="f702b17cdb9d5e57f85f43a5d33890647e063de6" +QONNX_COMMIT="13d777a2aa0dc449dc3de7aa369c1e155d6ce2c2 " FINN_EXP_COMMIT="9cbd2787b5160e2b44e0e8164a0df1457dbd5366" BREVITAS_COMMIT="a5b71d6de1389d3e7db898fef72e014842670f03" PYVERILATOR_COMMIT="766e457465f5c0dd315490d7b9cc5d74f9a76f4f" @@ -38,7 +38,7 @@ AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" EXP_BOARD_FILES_MD5="30eecc497c31050bd46d10ea20eba232" -QONNX_URL="https://github.com/fastmachinelearning/qonnx.git" +QONNX_URL="https://github.com/i-colbert/qonnx.git" FINN_EXP_URL="https://github.com/Xilinx/finn-experimental.git" BREVITAS_URL="https://github.com/Xilinx/brevitas.git" PYVERILATOR_URL="https://github.com/maltanar/pyverilator.git" diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index df9d1f1e70674f7bc91460e154f4e24af08df79c..ed19b93bb2d986dff339967791f52b02c496fed7 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -350,13 +350,23 @@ class MatrixVectorActivation(HLSCustomOp): # adder tree addertree_luts = (W + A) * (2 * Q - 1) # accumulator - acc_bits = W + A + np.ceil(math.log(MW, 2)) + acc_datatype = self.get_accumulator_datatype() + # if accDataType is not set, then it will default to INT32, which would + # be a large overestimate in most (if not all) cases. In this scenario, + # we would use the minimum accumulator as determined by the data types. + alpha = math.log(MW, 2) + W + A - 1 - int(idt.signed()) + phi = lambda x_: math.log(1 + pow(2, -x_), 2) + acc_bits = min( + acc_datatype.bitwidth(), + np.ceil(alpha + phi(alpha) + 1) + ) acc_luts = acc_bits # thresholds and threshold comparators thr_luts = 0 comp_luts = 0 noact = self.get_nodeattr("noActivation") - if noact == 0: + tmem_style = self.get_nodeattr("ram_style_thresholds") + if (noact == 0) and (tmem_style == "distributed"): odt = self.get_output_datatype() B = odt.bitwidth() thr_luts = (2**B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64) @@ -405,6 +415,10 @@ class MatrixVectorActivation(HLSCustomOp): else: raise Exception("Undefined input ind for this layer type") + def get_accumulator_datatype(self): + """Returns FINN DataType of accumulator""" + return DataType[self.get_nodeattr("accDataType")] + def get_weight_datatype(self): """Returns FINN DataType of weights.""" return DataType[self.get_nodeattr("weightDataType")] diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index a411d245a9a397e6d827abfa8ee4a784f207ecd5..a0b9268957910a01ab9726108ab3582ec602794c 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -216,6 +216,10 @@ class VectorVectorActivation(HLSCustomOp): """Returns FINN DataType of weights.""" return DataType[self.get_nodeattr("weightDataType")] + def get_accumulator_datatype(self): + """Returns FINN DataType of accumulator""" + return DataType[self.get_nodeattr("accDataType")] + def get_output_datatype(self, ind=0): """Returns FINN DataType of output.""" return DataType[self.get_nodeattr("outputDataType")] @@ -1172,14 +1176,25 @@ class VectorVectorActivation(HLSCustomOp): else: mult_luts = (2 * math.ceil((W + A) / 6) - 1) * (W + A) # accumulator + acc_datatype = self.get_accumulator_datatype() + acc_bits = acc_datatype.bitwidth() k_h, k_w = self.get_nodeattr("Kernel") - acc_bits = W + A + math.ceil(math.log(k_h * k_w, 2)) + # if accDataType is not set, then it will default to INT32, which would + # be a large overestimate in most (if not all) cases. In this scenario, + # we would use the minimum accumulator as determined by the data types. + alpha = math.log(k_h * k_w, 2) + W + A - 1 - int(idt.signed()) + phi = lambda x_: math.log(1 + pow(2, -x_), 2) + acc_bits = min( + acc_datatype.bitwidth(), + np.ceil(alpha + phi(alpha) + 1) + ) acc_luts = acc_bits # thresholds and threshold comparators thr_luts = 0 comp_luts = 0 noact = self.get_nodeattr("noActivation") - if noact == 0: + tmem_style = self.get_nodeattr("ram_style_thresholds") + if (noact == 0) and (tmem_style == "distributed"): odt = self.get_output_datatype() B = odt.bitwidth() thr_luts = (2**B - 1) * acc_bits * math.ceil(self.calc_tmem() / 64)