diff --git a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py index 6aa26af453b61aac1784cbb5f094a9cc6cc608fd..468e660117da9c1b5ae2d4dbcb67482bfec36763 100644 --- a/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/matrixvectoractivation.py @@ -589,69 +589,73 @@ class MatrixVectorActivation(HLSCustomOp): return ret def minimize_accumulator_width(self, model): - weights = model.get_initializer(self.onnx_node.input[1]) - # since in the calculation the values of the weight matrix are used, - # for the bipolar case they need to be converted to bipolar - if self.get_nodeattr("binaryXnorMode"): - weights = 2 * weights - 1 - if len(self.onnx_node.input) > 2: - thresholds = model.get_initializer(self.onnx_node.input[2]) - else: - thresholds = None - idt = self.get_input_datatype() - # calculate minimum and maximum values of accumulator - (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) - if thresholds is not None: - threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) - # set threshold datatype (and accumulator datatype implicitly) - min_threshold = thresholds.min() - max_threshold = thresholds.max() - # clip threshold values - clip_upper = None - clip_lower = None - if max_threshold > acc_max + 1: - clip_upper = acc_max + 1 - if min_threshold < acc_min: - clip_lower = acc_min - if (clip_lower is not None) or (clip_upper is not None): - warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name) - thresholds = np.clip(thresholds, clip_lower, clip_upper) - model.set_initializer(self.onnx_node.input[2], thresholds) + """Minimize the accumulator bit width according to the weight values, + input data types, and size of dot product""" + if not self.get_nodeattr("runtime_writeable_weights"): + weights = model.get_initializer(self.onnx_node.input[1]) + # since in the calculation the values of the weight matrix are used, + # for the bipolar case they need to be converted to bipolar + if self.get_nodeattr("binaryXnorMode"): + weights = 2 * weights - 1 + if len(self.onnx_node.input) > 2: + thresholds = model.get_initializer(self.onnx_node.input[2]) + else: + thresholds = None + idt = self.get_input_datatype() + # calculate minimum and maximum values of accumulator according to the + # weight values using the bounds derived in https://arxiv.org/abs/2301.13376 + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + if thresholds is not None: threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + # set threshold datatype (and accumulator datatype implicitly) min_threshold = thresholds.min() max_threshold = thresholds.max() - # get range required by threshold values - tdt_min = min(acc_min, min_threshold) - tdt_max = max(acc_max, max_threshold) - if tdt_min < 0: - if abs(tdt_min) > tdt_max: - tdt = DataType.get_smallest_possible(tdt_min) + # clip threshold values + clip_upper = None + clip_lower = None + if max_threshold > acc_max + 1: + clip_upper = acc_max + 1 + if min_threshold < acc_min: + clip_lower = acc_min + if (clip_lower is not None) or (clip_upper is not None): + warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name) + thresholds = np.clip(thresholds, clip_lower, clip_upper) + model.set_initializer(self.onnx_node.input[2], thresholds) + threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + min_threshold = thresholds.min() + max_threshold = thresholds.max() + # get range required by threshold values + tdt_min = min(acc_min, min_threshold) + tdt_max = max(acc_max, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(-tdt_max - 1) else: - tdt = DataType.get_smallest_possible(-tdt_max - 1) + tdt = DataType.get_smallest_possible(tdt_max) + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds in %s can't be expressed with type %s" % ( + self.onnx_node.name, + str(tdt), + ) + self.set_nodeattr("accDataType", tdt.name) else: - tdt = DataType.get_smallest_possible(tdt_max) - assert np.vectorize(tdt.allowed)( - threshold_tensor - ).all(), "Thresholds in %s can't be expressed with type %s" % ( - self.onnx_node.name, - str(tdt), - ) - self.set_nodeattr("accDataType", tdt.name) - else: - if acc_min < 0: - if abs(acc_min) > acc_max: - adt = DataType.get_smallest_possible(acc_min) + if acc_min < 0: + if abs(acc_min) > acc_max: + adt = DataType.get_smallest_possible(acc_min) + else: + adt = DataType.get_smallest_possible(-acc_max - 1) else: - adt = DataType.get_smallest_possible(-acc_max - 1) - else: - adt = DataType.get_smallest_possible(acc_max) - # ensure a datatype divisible by 8-bits in case this is the last node - bw = roundup_to_integer_multiple(adt.bitwidth(), 8) - new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - adt = DataType[new_adt_name] - self.set_nodeattr("accDataType", adt.name) - # for no-activation nodes, output dt = acc dt - self.set_nodeattr("outputDataType", adt.name) + adt = DataType.get_smallest_possible(acc_max) + # ensure a datatype divisible by 8-bits in case this is the last node + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + self.set_nodeattr("accDataType", adt.name) + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) return DataType[self.get_nodeattr("accDataType")] def minimize_weight_bit_width(self, model): diff --git a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py index 796225a712203c5cb311590e478367835d5b15f5..f38abcc7632320746178b3dfac59815f49d67501 100644 --- a/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py +++ b/src/finn/custom_op/fpgadataflow/vectorvectoractivation.py @@ -104,69 +104,72 @@ class VectorVectorActivation(HLSCustomOp): return my_attrs def minimize_accumulator_width(self, model): - weights = model.get_initializer(self.onnx_node.input[1]) - k_h, k_w = self.get_nodeattr("Kernel") - fm = self.get_nodeattr("Channels") - # put weights into the shape expected by calculate_matvec_accumulator_range - weights = weights.reshape(fm, k_h * k_w).transpose() - if len(self.onnx_node.input) > 2: - thresholds = model.get_initializer(self.onnx_node.input[2]) - else: - thresholds = None - idt = self.get_input_datatype() - # calculate minimum and maximum values of accumulator - (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) - if thresholds is not None: - threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) - # set threshold datatype (and accumulator datatype implicitly) - min_threshold = thresholds.min() - max_threshold = thresholds.max() - # clip threshold values - clip_upper = None - clip_lower = None - if max_threshold > acc_max + 1: - clip_upper = acc_max + 1 - if min_threshold < acc_min: - clip_lower = acc_min - if (clip_lower is not None) or (clip_upper is not None): - warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name) - thresholds = np.clip(thresholds, clip_lower, clip_upper) - model.set_initializer(self.onnx_node.input[2], thresholds) + """Minimize the accumulator bit width according to the weight values, + input data types, and size of dot product""" + if not self.get_nodeattr("runtime_writeable_weights"): + weights = model.get_initializer(self.onnx_node.input[1]) + k_h, k_w = self.get_nodeattr("Kernel") + fm = self.get_nodeattr("Channels") + # put weights into the shape expected by calculate_matvec_accumulator_range + weights = weights.reshape(fm, k_h * k_w).transpose() + if len(self.onnx_node.input) > 2: + thresholds = model.get_initializer(self.onnx_node.input[2]) + else: + thresholds = None + idt = self.get_input_datatype() + # calculate minimum and maximum values of accumulator + (acc_min, acc_max) = calculate_matvec_accumulator_range(weights, idt) + if thresholds is not None: threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + # set threshold datatype (and accumulator datatype implicitly) min_threshold = thresholds.min() max_threshold = thresholds.max() - # get range required by threshold values - tdt_min = min(acc_min, min_threshold) - tdt_max = max(acc_max, max_threshold) - if tdt_min < 0: - if abs(tdt_min) > tdt_max: - tdt = DataType.get_smallest_possible(tdt_min) + # clip threshold values + clip_upper = None + clip_lower = None + if max_threshold > acc_max + 1: + clip_upper = acc_max + 1 + if min_threshold < acc_min: + clip_lower = acc_min + if (clip_lower is not None) or (clip_upper is not None): + warnings.warn("Clipping some thresholds in %s" % self.onnx_node.name) + thresholds = np.clip(thresholds, clip_lower, clip_upper) + model.set_initializer(self.onnx_node.input[2], thresholds) + threshold_tensor = self.get_hls_compatible_threshold_tensor(thresholds) + min_threshold = thresholds.min() + max_threshold = thresholds.max() + # get range required by threshold values + tdt_min = min(acc_min, min_threshold) + tdt_max = max(acc_max, max_threshold) + if tdt_min < 0: + if abs(tdt_min) > tdt_max: + tdt = DataType.get_smallest_possible(tdt_min) + else: + tdt = DataType.get_smallest_possible(-tdt_max - 1) else: - tdt = DataType.get_smallest_possible(-tdt_max - 1) + tdt = DataType.get_smallest_possible(tdt_max) + assert np.vectorize(tdt.allowed)( + threshold_tensor + ).all(), "Thresholds in %s can't be expressed with type %s" % ( + self.onnx_node.name, + str(tdt), + ) + self.set_nodeattr("accDataType", tdt.name) else: - tdt = DataType.get_smallest_possible(tdt_max) - assert np.vectorize(tdt.allowed)( - threshold_tensor - ).all(), "Thresholds in %s can't be expressed with type %s" % ( - self.onnx_node.name, - str(tdt), - ) - self.set_nodeattr("accDataType", tdt.name) - else: - if acc_min < 0: - if abs(acc_min) > acc_max: - adt = DataType.get_smallest_possible(acc_min) + if acc_min < 0: + if abs(acc_min) > acc_max: + adt = DataType.get_smallest_possible(acc_min) + else: + adt = DataType.get_smallest_possible(-acc_max - 1) else: - adt = DataType.get_smallest_possible(-acc_max - 1) - else: - adt = DataType.get_smallest_possible(acc_max) - # ensure a datatype divisible by 8-bits in case this is the last node - bw = roundup_to_integer_multiple(adt.bitwidth(), 8) - new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) - adt = DataType[new_adt_name] - self.set_nodeattr("accDataType", adt.name) - # for no-activation nodes, output dt = acc dt - self.set_nodeattr("outputDataType", adt.name) + adt = DataType.get_smallest_possible(acc_max) + # ensure a datatype divisible by 8-bits in case this is the last node + bw = roundup_to_integer_multiple(adt.bitwidth(), 8) + new_adt_name = adt.name.replace(str(adt.bitwidth()), str(bw)) + adt = DataType[new_adt_name] + self.set_nodeattr("accDataType", adt.name) + # for no-activation nodes, output dt = acc dt + self.set_nodeattr("outputDataType", adt.name) return DataType[self.get_nodeattr("accDataType")] def minimize_weight_bit_width(self, model):