def overwrite_range(in_ranges, out_ranges, in_dtypes=None, out_dtypes=None): if in_dtypes is None: in_dtypes = [np.int8] * len(in_ranges) if out_dtypes is None: out_dtypes = [np.int8] * len(out_ranges) proto_in_qs = [ QType.from_min_max_sq(*min_max, dtype=dtype) for min_max, dtype in zip(in_ranges, in_dtypes) ] proto_out_qs = [ QType.from_min_max_sq(*min_max, dtype=dtype) for min_max, dtype in zip(out_ranges, out_dtypes) ] def handler(G, qrec, node, in_qs=None, out_qs=None): nonlocal proto_in_qs, proto_out_qs if qrec is None: return QRec.scaled(in_qs=list(proto_in_qs), out_qs=list(proto_out_qs)) new_in_qs = [ q1 if q1 else q2 for q1, q2 in zip_longest(qrec.in_qs, proto_in_qs) ] if qrec.in_qs else in_qs new_out_qs = [ q1 if q1 else q2 for q1, q2 in zip_longest(qrec.out_qs, proto_out_qs) ] if qrec.out_qs else out_qs return QRec.scaled(in_qs=new_in_qs, out_qs=new_out_qs) return handler
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if isinstance( params, (HSwishActivationParameters, HSigmoidActivationParameters)): in_q = in_qs[0] max_val = in_q.scale * pow(2, in_q.bits - 1) if max_val < 6: in_qs = [QType.from_min_max_sq(-6, 6, dtype=in_q.dtype)] if force_out_q: fusion = kwargs.get('fusion', None) if fusion and fusion.fusion_type in [ 'conv_active_pool', 'conv_active' ]: if not isinstance( params, (SigmoidActivationParameters, TanHActivationParameters, HSwishActivationParameters, HSigmoidActivationParameters)): in_qs = [deepcopy(force_out_q)] o_q = deepcopy(force_out_q) else: o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None in_qs = [QType.from_min_max_sq(-8, 8, dtype=np.int8, forced=True)] o_q = QType.from_min_max_sq(min_val=-1.0, max_val=1.0, dtype=out_dtype, forced=True) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs): return [ QType.from_min_max_sq(stats['range_in'][idx]['min'], stats['range_in'][idx]['max'], dtype=np.int8) if dim is not None else None for idx, dim in enumerate(params.in_dims) ]
def _common(cls, node: TFLiteNode, **kwargs): custom_opts = node.get_custom_options() G = kwargs['G'] opts = kwargs['opts'] all_nodes = kwargs['all_nodes'] importer = kwargs['importer'] inputs = [all_nodes[t] for t in node.input] outputs = [ all_nodes.get(node.output[idx]) if idx < len(node.output) else None for idx in range(4) ] # inp_shapes = [input[2].shape for input in inputs] if 'max_bb_before_nms' not in custom_opts: custom_opts['max_bb_before_nms'] = 300 params = SSDDetectorParameters(node.name, parameters=custom_opts) overriden_outputs = [] for idx, output in enumerate(outputs): if output: overriden_outputs.append(node.output[idx]) continue oparams = G.add_output() otensor = TensorBase("Detect_%s" % idx) overriden_outputs.append(otensor) importer.provisional_outputs[otensor] = (oparams, 0, None) # covers the case where not all outputs are generated by the conversion tool node.override_outputs(overriden_outputs) for idx, inp in enumerate(inputs): G.add_edge( NNEdge(from_node=inp[0], to_node=params, from_idx=inp[1], to_idx=idx)) if opts.get('load_quantization'): in_qtypes = [ QType.from_min_max_sq(tensor.qtype.min_val, tensor.qtype.max_val) if (tensor.qtype.is_asymmetric or not tensor.qtype.signed) else tensor.qtype for tensor in node.input ] o_boxes_qtype = QType(min_val=-2, max_val=2, dtype=np.int16, scale=2**(-14)) o_scores_qtype = node.input[1].qtype o_class_qtype = QType(scale=1, dtype=np.int8) qrec = QRec.scaled(in_qs=in_qtypes, out_qs=[ o_boxes_qtype, o_class_qtype, o_scores_qtype, o_class_qtype ]) G.quantization[NodeId(params)] = qrec return params
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None o_q = QType.from_min_max_sq(min_val=-1.0, max_val=1.0, dtype=out_dtype) return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
def get_weights_qtype_by_channel(cls, filter_shape, out_idx, weights_node): assert len(filter_shape) == 4 or len(filter_shape) == 2 dqweights = weights_node.dqvalue filter_axis = tuple(idx for idx in range(len(filter_shape)) if idx != out_idx) # get the minimums and maximums above and below 0 w_mins = np.minimum(np.min(dqweights, axis=filter_axis), 0) w_maxes = np.maximum(np.max(dqweights, axis=filter_axis), 0) wqtype = QType.from_min_max_sq(w_mins, w_maxes, quantized_dimension=out_idx, narrow_range=True, scale_zero_as_one=True) tiny_weight_scales = wqtype.scale < QType.kInt8NearZeroTolerance if np.count_nonzero(tiny_weight_scales): # Sets weights scaled under a very small value to zero to avoid # silly mult biases. shape = tuple( slice(None) if idx != out_idx else tiny_weight_scales for idx, _ in enumerate(dqweights.shape)) if np.any(shape): dqweights[shape] = 0 wqtype.scale = np.where(tiny_weight_scales, 1, wqtype.scale) weights_node.value = dqweights weights_node.qtype = None # weights_node.value = wqtype.quantize(dqweights) # weights_node.qtype = deepcopy(wqtype) return wqtype
def _update_qrecs(self, G, qrecs, all_nodes, ranges_dict): for node, idx, _, qtype in all_nodes.values(): if qtype is None and node.name not in ranges_dict.keys(): continue if node.name not in G: continue nid = NodeId(node) qrec = qrecs.get(nid) if not qrec: in_qs = [None] * G.num_in_edges(node) out_qs = [None] * len(G.indexed_out_edges(node)) qrec = QRec.scaled(in_qs=in_qs, out_qs=out_qs) qrecs[nid] = qrec if node.name in ranges_dict.keys(): out_min, out_max = ranges_dict[node.name]["range"] dtype = ranges_dict[node.name].get("dtype", np.int8) bits = ranges_dict[node.name].get("n_bits", 8) channel = ranges_dict[node.name].get("per_channel", None) qtype = QType.from_min_max_sq(out_min, out_max, dtype=dtype, bits=bits, quantized_dimension=channel) qrec.out_qs[idx] = qtype
def match(self, G: GraphView, set_identity: bool = True): if not G.quantization: return sigs_swishes = [ node for node in G.nodes() if isinstance(node, (SigmoidActivationParameters, HSigmoidActivationParameters, HSwishActivationParameters)) ] qrecs = [G.quantization[NodeId(node)] for node in sigs_swishes] for sig_swish, qrec in zip(sigs_swishes, qrecs): in_edge = [ edge for edge in G.in_edges(sig_swish.name) if edge.to_idx == 0 ][0] in_q = qrec.in_qs[0] min_val, max_val = in_q.min_val, in_q.max_val if isinstance( sig_swish, (HSigmoidActivationParameters, SigmoidActivationParameters)): # Hard sigmoid implements a RELU, be sure 6 can be representable min_val, max_val = 0, 6 elif isinstance(sig_swish, HSwishActivationParameters): min_val, max_val = 0, in_q.max_val * 6 new_in_q = QType.from_min_max_sq(min_val=min_val, max_val=max_val, dtype=in_q.dtype) propagate_qtype_up(G, new_in_q, in_edge) if set_identity: self.set_identity(G) return False
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # NOTE: The autotiler kernel scales and clips after the operation and before the # activation so there is no change if this is in a fusion or not scaled_idx = params.force_quantized_index if isinstance( params, MatrixAddParameters) else None in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None if force_out_q: o_q = deepcopy(force_out_q) if o_q.is_asymmetric: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) o_q.set_forced(flags=['dtype', 'zero_point']) in_qs = [ in_q.set_forced(flags=['dtype', 'zero_point']) for in_q in in_qs ] return QRec.scaled(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_q = in_qs[0] if params.lower_bound != 0: raise NotImplementedError( 'relu with non zero lower bound is not implemented for NE16 quantizer' ) cls.check_valid_ranges(params, stats, idx=0, dirs='out') if force_out_q: # since the relu is done by setting 0 zero point and scaling to the upper bound # we cannot be forced to something that does not meet this requirement if not force_out_q.zero_point_asymmetric_zero: return None if params.upper_bound is not None and not np.isclose( force_out_q.max, params.upper_bound, atol=0.01): return None # if the output has been forced then propagate it in_q = force_out_q else: upper = params.upper_bound if params.upper_bound is not None else stats[ 'range_out'][0]['max'] in_q = QType.from_min_max_sq(0, upper, dtype=in_q.dtype, asymmetric=True, ne16=True, dont_copy_attr=['ne16']) o_q = deepcopy(in_q) o_q.set_forced() qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q], ne16=True) compute_in_out_scale(qrec) return qrec
def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs): return [QType.from_min_max_sq( stats['range_in'][idx]['min'], stats['range_in'][idx]['max'], dtype=np.int8, asymmetric=in_qs and in_qs[idx].asymmetric and cls.can_handle_asymmetric_input(params, **kwargs)) if dim is not None else None for idx, dim in enumerate(params.in_dims)]
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] fusion = kwargs.get('fusion', None) G = kwargs['G'] weights_node = cls.get_weights_node(G, fusion if fusion else params) min_val, max_val = None, None weights_q = QType.from_array_sq( arr=weights_node.dqvalue, quantized_dimension=cls.get_quantized_dimension(params, opts), dtype=np.int8, narrow_range=opts['narrow_weights']) if fusion and fusion.fusion_type in [ 'conv_active_pool', 'conv_active' ]: stats = kwargs['all_stats'][NodeId(fusion, fusion.contained_nodes()[0])] if isinstance( fusion.contained_nodes()[1], (SigmoidActivationParameters, TanHActivationParameters, HSwishActivationParameters)): stats = kwargs['all_stats'][NodeId( fusion, fusion.contained_nodes()[0])] elif fusion and isinstance(fusion.contained_nodes()[1], HSigmoidActivationParameters): # Hard sigmoid implements a RELU, be sure 6 can be representable min_val, max_val = 0, 6 else: # Take stats from activation after the convolution stats = kwargs['all_stats'][NodeId( fusion, fusion.contained_nodes()[1])] if min_val is None or max_val is None: min_val, max_val = stats['range_out'][0]['min'], stats[ 'range_out'][0]['max'] if force_out_q: o_q = force_out_q else: o_q = QType.from_min_max_sq(min_val=min_val, max_val=max_val, dtype=out_dtype) biases_q = QType(dtype=np.int32, scale=weights_q.scale * in_qs[0].scale) mul_biases_q = MultMulBiasScaleQType.from_filter( in_qs[0], weights_q, o_q, params) # returning the new weights and biases qs will force backprop # TODO - ACC_Q LOOKS WRONG AFTER THIS return MultScalableFilterQuantizationRecord( in_qs=[in_qs[0], weights_q, biases_q], out_qs=[o_q], acc_q=biases_q, calc_q=biases_q, mul_biases_q=mul_biases_q)
def _common(cls, node, **kwargs): if kwargs['opts'].get('load_quantization' ) and not kwargs['opts'].get('use_hard_sigmoid'): kwargs['in_qs'] = [QType.from_min_max_sq(-8, 8, dtype=np.int8)] params_class = SigmoidActivationParameters if not kwargs['opts'].get( 'use_hard_sigmoid') else HSigmoidActivationParameters return super(Logistic, cls)._common(node, params_class=params_class, **kwargs)
def _common(cls, node, **kwargs): if kwargs['opts'].get('load_quantization') and kwargs['opts'].get( 'use_lut_tanh'): kwargs['in_qs'] = [QType.from_min_max_sq(-8, 8, dtype=np.int8)] params_class = TanHActivationParameters if kwargs['opts'].get( 'use_lut_tanh') else HTanHActivationParameters return super(Tanh, cls)._common(node, params_class=params_class, **kwargs)
def _load_quantization(qrecs, node_recs): for tensor in node_recs: qtype = tensor.qtype if qtype: if qtype.is_sq and qtype.is_asymmetric: qtype = QType.from_min_max_sq(qtype.min_val, qtype.max_val, quantized_dimension=qtype.quantized_dimension) qrecs[NodeId(node_recs[tensor][0])] = MultConstantQuantizationRecord( in_qs=[qtype], out_qs=[qtype])
def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs): opts = kwargs['opts'] in_dtype = np.uint8 if opts.get( 'force_input_size', 8) == 8 else np.uint16 return [QType.from_min_max_sq(stats['range_in'][idx]['min'], stats['range_in'][idx]['max'], dtype=in_dtype, asymmetric=len(stats['range_in'][idx]) == 1) if dim is not None and stats['range_in'][idx] else None for idx, dim in enumerate(params.in_dims)]
def _quantize(cls, params, in_qs, stats, **kwargs): _, dtype = cls.get_float_opts(**kwargs) names = {val: idx for idx, val in enumerate(RNNParameters.INPUT_NAMES)} edges = kwargs['G'].indexed_in_edges(params.name) in_qs = deepcopy(in_qs) w_q = in_qs[names['i_2_i_w']] in_qs[names['i_2_i_w']] = QType.from_min_max_sq( w_q.min_val, w_q.max_val, dtype=dtype, dont_generate_value=True) w_q = in_qs[names['r_2_i_w']] in_qs[names['r_2_i_w']] = QType.from_min_max_sq( w_q.min_val, w_q.max_val, dtype=dtype, concatenated_nodes=[edges[names['i_2_i_w']].from_node.name]) return QRec.float(in_qs=in_qs, out_qs=[QType(dtype=dtype)], float_dtype=dtype)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) in_q = in_qs[0] if not fusion and in_q.dtype == np.int32: return None if isinstance(params, (HSwishActivationParameters, HSigmoidActivationParameters)): max_val = in_q.scale * pow(2, in_q.bits - 1) if max_val < 6: in_q = QType.from_min_max_sq(-6, 6, dtype=in_q.dtype, forced=True) elif isinstance(params, SigmoidActivationParameters): in_q = QType.from_min_max_sq(-8, 8, dtype=in_q.dtype, forced=True) if force_out_q: if force_out_q.signed != in_q.signed: return None if fusion and fusion.fusion_type in ['conv_active_pool', 'conv_active']: if not isinstance(params, (SigmoidActivationParameters, HTanHActivationParameters, HSwishActivationParameters, HSigmoidActivationParameters)): in_q = deepcopy(force_out_q) o_q = deepcopy(force_out_q) # activation cannot move zeropoint unless it is a reduction step if o_q.zero_point != in_q.zero_point and in_q.dtype != np.int32: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') zero_point = in_q.zero_point if in_q.zero_point != 0 else None o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=in_q.dtype, zero_point=zero_point) qrec = QRec.scaled(in_qs=[in_q], out_qs=[o_q]) if isinstance(params, (SigmoidScaledSymmetricMult, TanHActivationParameters)): compute_in_out_scale(qrec, extra_scale=QType.Pow2(bits=32, q=7, signed=True).scale/qrec.in_qs[0].scale) elif isinstance(params, HSwishActivationParameters): compute_in_out_scale(qrec, extra_scale=qrec.in_qs[0].scale * 1/6) else: compute_in_out_scale(qrec) return qrec
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: o_q = deepcopy(force_out_q) else: o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) return MultQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
def get_weights_qtype_by_tensor(cls, weights_node): dqweights = weights_node.dqvalue w_mins = np.minimum(np.min(dqweights), 0) w_maxes = np.maximum(np.max(dqweights), 0) wqtype = QType.from_min_max_sq(w_mins, w_maxes, narrow_range=True, scale_zero_as_one=True) # weights_node.value = wqtype.quantize(dqweights) # weights_node.qtype = deepcopy(wqtype) return wqtype
def _quantize(cls, params, in_qs, stats, **kwargs): # copy in_qs because we may modify it in_qs = in_qs.copy() opts = kwargs['opts'] fusion = kwargs.get('fusion', None) force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] G = kwargs['G'] # only attempt channel scaling if the second input is constant # if len(in_qs) > 2: in2_node, in_qs = cls.move_constant(G, fusion if fusion else params, in_qs) if in2_node: kwargs['graph_update']['requires_adjust'] = True in_q2 = QType.from_array_sq(arr=in2_node.dqvalue, quantized_dimension=0, dtype=np.int8, narrow_range=True, bits=8) else: in_q2 = in_qs[1].make_symmetric_signed() in_q1 = in_qs[0].make_symmetric_signed() min_val, max_val = cls.get_min_max(fusion, stats, kwargs['all_stats'], params) if force_out_q: o_q = force_out_q # can't be forced to something not np.int8 if o_q.dtype != np.int8 or o_q.asymmetric: return None LOG.warning( 'node %s output forced to range %s/%s - actual range %s/%s %s', params.name, o_q.min, o_q.max, min_val, max_val, "asymmetric" if o_q.asymmetric else "symmetric") else: o_q = QType.from_min_max_sq(min_val=min_val, max_val=max_val, dtype=out_dtype) if len(in_qs) == 3: biases_q = QType(dtype=np.int32, scale=in_q1.scale * in_q2.scale) out_in_qs = [in_q1, in_q2, biases_q] else: out_in_qs = [in_q1, in_q2] mul_biases_q = MultMulBiasScaleQType() mul_biases_q.scale = in_q1.scale * in_q2.scale / o_q.scale return QRec.scaled(in_qs=out_in_qs, out_qs=[o_q], mul_biases_q=mul_biases_q)
def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs): opts = kwargs['opts'] fusion = kwargs.get('fusion', None) return [ QType.from_min_max_sq( stats['range_in'][idx]['min'], stats['range_in'][idx]['max'], dtype=np.uint8 if cls.can_ne16(params, opts, fusion) else np.int8, asymmetric=in_qs[idx].is_asymmetric and cls.can_handle_asymmetric_input(params, **kwargs)) if dim is not None else None for idx, dim in enumerate(params.in_dims) ]
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_q = in_qs[0] cls.check_valid_ranges(params, stats, idx=0, dirs='out') if force_out_q: # if the output has been forced then propagate it in_q = force_out_q else: upper = params.upper_bound if params.upper_bound is not None else stats['range_out'][0]['max'] in_q = QType.from_min_max_sq(0, upper, dtype=np.uint8, asymmetric=True) return QRec.scaled(in_qs=[in_q], out_qs=[in_q], ne16=True)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] opts = kwargs['opts'] if force_out_q: o_q = deepcopy(force_out_q) else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype, asymmetric=opts['allow_asymmetric']) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # if forced set what we are forced to if force_out_q: o_q = deepcopy(force_out_q) # if value is already quantized then keep the same quantization elif params.qtype: o_q = deepcopy(params.qtype) # derive quantization from statistics else: o_q = QType.from_min_max_sq(min_val=stats['range_out'][0]['min'], max_val=stats['range_out'][0]['max'], dtype=out_dtype) return MultConstantQuantizationRecord(out_qs=[o_q])
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, _ = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] if force_out_q: return None out_dtype = params.output_dtype in_dtype = params.input_dtype in_q = QType(scale=1, dtype=in_dtype) out_q = QType.from_min_max_sq(-1, 1, dtype=out_dtype, narrow_range=True) return MultQuantizationRecord(in_qs=[in_q], out_qs=[out_q])
def _quantize_sw(cls, params, in_qs, stats, inout_dtype, asym=False, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # NOTE: The autotiler kernel scales and clips after the operation and before the # activation so there is no change if this is in a fusion or not scaled_idx = params.force_quantized_index if isinstance( params, MatrixAddParameters) else None if not asym: in_qs = cls.force_symmetric_and_dtype(in_qs) if in_qs is None: return None if force_out_q: o_q = deepcopy(force_out_q) if (o_q.asymmetric and not asym) or o_q.dtype != inout_dtype: return None # important to set ne16 here so the o_q matches the force_out_q since # this attribute is not copied by deepcopy if force_out_q.attr.ne16: o_q.attr.ne16 = True else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=inout_dtype, asymmetric=asym, dont_copy_attr=['ne16'], ne16=asym) if asym: o_q.set_forced(flags=['dtype']) in_qs = [in_q.set_forced(flags=['dtype']) for in_q in in_qs] else: o_q.set_forced(flags=['dtype', 'zero_point']) in_qs = [ in_q.set_forced(flags=['dtype', 'zero_point']) for in_q in in_qs ] return QRec.scaled(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx, ne16=asym)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] # NOTE: The autotiler kernel scales and clips after the operation and before the # activation so there is no change if this is in a fusion or not scaled_idx = params.force_quantized_index if isinstance( params, MatrixAddParameters) else None if force_out_q: o_q = deepcopy(force_out_q) else: o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) return MultAddQuantizationRecord(in_qs=in_qs, out_qs=[o_q], scaled_idx=scaled_idx)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_mult_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] in_qs = cls.force_symmetric_and_dtype(in_qs, dtype=np.int8) if in_qs is None: return None if force_out_q: o_q = deepcopy(force_out_q) if o_q.is_asymmetric: return None else: cls.check_valid_ranges(params, stats, idx=0, dirs='out') o_q = QType.from_min_max_sq(stats['range_out'][0]['min'], stats['range_out'][0]['max'], dtype=out_dtype) return QRec.scaled(in_qs=in_qs, out_qs=[o_q])