def __init__(self, min_init=-6, max_init=6, num_bits=8, ema=False, ema_decay=0.999, per_channel=False, out_channels=1, quant_delay=0, symmetric=False, narrow_range=False): """init FakeQuantWithMinMax layer""" super(FakeQuantWithMinMax, self).__init__() self.min_init = min_init self.num_bits = num_bits self.max_init = max_init self.ema = ema self.ema_decay = ema_decay self.per_channel = per_channel self.out_channels = out_channels self.quant_delay = quant_delay self.symmetric = symmetric self.narrow_range = narrow_range if per_channel: min_array = np.array([ self.min_init for i in range(0, self.out_channels) ]).astype(np.float32) max_array = np.array([ self.max_init for i in range(0, self.channel_size) ]).astype(np.float32) self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True) self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False) else: min_array = np.array([min_init]).reshape(1).astype(np.float32) max_array = np.array([max_init]).reshape(1).astype(np.float32) self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False) if context.get_context('device_target') == "Ascend": self.fake_quant_train = FakeQuantWithMinMaxD( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True, min_init=self.minq, max_init=self.maxq) self.fake_quant_infer = FakeQuantWithMinMaxD( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False, min_init=self.minq, max_init=self.maxq) elif context.get_context('device_target') == "GPU": self.fake_quant_train = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True) self.fake_quant_infer = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=ema_decay, quant_delay=quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False) else: raise ValueError("Not support platform.")
def __init__(self, min_init=-6, max_init=6, num_bits=8, ema=False, ema_decay=0.999, per_channel=False, out_channels=1, quant_delay=0, symmetric=False, narrow_range=False): super(FakeQuantWithMinMax, self).__init__() self.min_init = min_init self.num_bits = num_bits self.max_init = max_init self.ema = ema self.ema_decay = ema_decay self.per_channel = per_channel self.out_channels = out_channels self.quant_delay = quant_delay self.symmetric = symmetric self.narrow_range = narrow_range if per_channel: min_array = np.array([ self.min_init for i in range(0, self.out_channels) ]).astype(np.float32) max_array = np.array([ self.max_init for i in range(0, self.out_channels) ]).astype(np.float32) self.fake_quant_train = P.FakeQuantWithMinMaxPerChannel( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True) self.fake_quant_infer = P.FakeQuantWithMinMaxPerChannel( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False) else: min_array = np.array([min_init]).reshape(1).astype(np.float32) max_array = np.array([max_init]).reshape(1).astype(np.float32) self.fake_quant_train = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=True) self.fake_quant_infer = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=False) self.minq = Parameter(Tensor(min_array), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor(max_array), name='quant_max', requires_grad=False)
def __init__(self, min_init=-6, max_init=6, num_bits=8, ema=False, ema_decay=0.999, per_channel=False, channel_size=1, quant_delay=0, symmetric=False, narrow_range=False, training=True): """init FakeQuantWithMinMax ascend layer""" super(FakeQuantWithMinMaxD, self).__init__() self.min_init = min_init self.num_bits = num_bits self.max_init = max_init self.ema = ema self.ema_decay = ema_decay self.per_channel = per_channel self.channel_size = channel_size self.quant_delay = quant_delay self.symmetric = symmetric self.narrow_range = narrow_range self.training = training if not per_channel: self.fake_quant = P.FakeQuantWithMinMax( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=training) self.ema_update = P.FakeQuantWithMinMaxUpdate( num_bits=self.num_bits, ema=self.ema, ema_decay=self.ema_decay, quant_delay=self.quant_delay, symmetric=self.symmetric, narrow_range=self.narrow_range, training=training) else: raise RuntimeError("not support per channel") if isinstance(min_init, Parameter): self.minq = min_init self.maxq = max_init else: self.minq = Parameter(Tensor( np.array([min_init]).astype(np.float32)), name='quant_min', requires_grad=False) self.maxq = Parameter(Tensor( np.array([max_init]).astype(np.float32)), name='quant_max', requires_grad=False) self.reduce_min = P.ReduceMin() self.reduce_max = P.ReduceMax()