def forward(self, *inputs, **kwargs): # quantize input tensor configer = NndctGraphHolder() inputs, _ = process_inputs_and_params(self.node, self.quantizer, inputs=list(inputs)) if (configer.node_quantizable_with_params(self.node) and not self.param_quantized): # quantize weights/scale and bias for batch norm if not configer.is_conv_like( self.node) or self.node.node_attr( self.node.op.AttrName.BIAS_TERM): param_names = self.params_name[:2] params = [self.weight, self.bias] else: param_names = [self.params_name[0]] params = [self.weight] __, __ = process_inputs_and_params( self.node, self.quantizer, inputs=[], params=params, param_names=param_names) self.param_quantized = True output = super().forward(*inputs, **kwargs) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): if self.bias is not None: params = [self.weight, self.bias] else: params = [self.weight] [input], __ = process_inputs_and_params(self.node, self.quant_mode, self.quantizer, inputs=[input], valid_inputs=self.valid_inputs, params=[], param_names=[]) if (not self.param_quantized): __, __ = process_inputs_and_params(self.node, self.quant_mode, self.quantizer, inputs=[], valid_inputs=[], params=params, param_names=self.params_name) self.param_quantized = True output = super().forward(input) if (self.need_quant_output): [output] = post_quant_process(self.node, self.valid_output, [output], [output, output]) return output
def forward(self, input): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input]) if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value: output = super().forward(input) elif self.quant_mode > 0: output = torch.empty_like(input) input_name = self.node.in_nodes[0] fragpos = self.quantizer.get_bnfp(input_name, False)[1] if(input.device == torch.device("cpu")): Ttable = TANH_TABLE.table.to(torch.device("cpu")) output = output.to(torch.device("cpu")) else: Ttable = TANH_TABLE.table.cuda() output = output.cuda() NndctTanhTableLookup(input, Ttable, output, fragpos) else: output = super().forward(input) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input]) output = super().forward(input) # scale to DPU accuracy if self.output_size != [1, 1]: print( "NNDCT-Waring: For adaptive average pooling, DPU only supports output size 1" ) needScale = False scale = 1.0 if input.shape[2] == 3 and input.shape[3] == 3: needScale = True scale = 9.0 * 7.0 / 64.0 elif input.shape[2] == 5 and input.shape[3] == 5: needScale = True scale = 25.0 * 10.0 / 256.0 elif input.shape[2] == 6 and input.shape[3] == 6: needScale = True scale = 36.0 * 7.0 / 256.0 elif input.shape[2] == 7 and input.shape[3] == 7: needScale = True scale = 49.0 * 21.0 / 1024.0 elif input.shape[2] == 14 and input.shape[3] == 14: needScale = True scale = 196.0 * 21.0 / 4096.0 if needScale: NndctScale(output, scale) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input, other, alpha=1): [input, other], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input, other]) output = torch.sub(input=input, other=other, alpha=alpha) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quant_mode, self.quantizer, inputs=[input], valid_inputs=self.valid_inputs) output = super().forward(input) # scale to DPU accuracy needScale = False scale = 1.0 if self.node.node_attr(self.node.op.AttrName.KERNEL) == [3, 3]: needScale = True scale = 9.0 * 7.0 / 64.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [5, 5]: needScale = True scale = 25.0 * 10.0 / 256.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [6, 6]: needScale = True scale = 36.0 * 7.0 / 256.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [7, 7]: needScale = True scale = 49.0 * 21.0 / 1024.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [14, 14]: needScale = True scale = 196.0 * 21.0 / 4096.0 if needScale: NndctScale(output, scale) [output] = post_quant_process(self.node, self.valid_output, [output], [output, output]) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input]) if NndctOption.nndct_quant_off.value or NndctOption.nndct_cv_app.value: output = super().forward(input) # quantize output [output] = post_quant_process(self.node, [output]) elif self.quant_mode > 0: output = torch.empty_like(input) if NndctOption.nndct_tanh_sigmoid_sim.value > 0: NndctSigmoidSimulation(input, output) [output] = post_quant_process(self.node, [output]) else: input_name = self.node.in_nodes[0] fragpos = self.quantizer.get_bnfp(input_name, False)[1] quant_device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE) Ttable = SIGMOID_TABLE.table.to(quant_device) output = output.to(quant_device) NndctSigmoidTableLookup(input, Ttable, output, fragpos) else: output = super().forward(input) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input]) output = super().forward(input) [output] = post_quant_process(self.node, [output]) return output
def forward(self, *args, **kwargs): inputs = [] def collect_inputs(inputs, value): if isinstance(value, torch.Tensor): inputs.append(value) elif isinstance(value, (tuple, list)): for i in value: collect_inputs(inputs, i) for k, v in kwargs.items(): collect_inputs(inputs, v) inptus, _ = process_inputs_and_params(self.node, self.quantizer, inputs=inputs) try: output = caller(*args, **kwargs) except TypeError as e: NndctScreenLogger().warning_once( f"{str(e)}. The arguments of function will convert to positional arguments." ) inputs = list(args) + list(kwargs.values()) output = caller(*inputs) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): if self.quant_mode is None or NndctOption.nndct_quant_off.value: return torch.mul(input, torch.div(F.relu6(torch.add(input, 3.)), 6.)) else: [input], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input]) output = F.relu6(torch.add(input, 3.)) # scale to DPU accuracy scale = 2731.0 / 16384.0 NndctScale(output, scale) #mth = 4 if self.quantizer.lstm else 2 mth = 2 output = NndctFixNeuron(output, output, maxamp=[128, 128], method=mth) output = torch.mul(input, output) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input], ) # check input shape if self.node.out_tensors[0].is_complete_tensor() and self.node.out_tensors[0].ndim == 4: # py_utils.blob_to_torch_format(self.node.out_tensors[0]) if not (self.node.out_tensors[0].shape[1:] == list(input.size())[1:]): NndctScreenLogger().warning(f"The shape of input ({input.shape[1:]}) should be the same with that of dummy input ({self.node.out_tensors[0].shape[1:]})") # py_utils.blob_to_nndct_format(self.node.out_tensors[0]) output = input if (self.node.in_quant_part and NndctOption.nndct_stat.value > 2): print('Channel number of input data: {}'.format(output.shape[1])) print('Input data histogram: {}'.format( output.histc(bins = 10).cpu().detach().numpy() )) print('Network input channel-wise statistic [Min, Max, Mean, Std]:') t = output.transpose(0, 1) for c in range(t.shape[0]): print('[{}, {}, {}, {}]'.format( t[c].min(), t[c].max(), t[c].mean(), t[c].std() )) print('histogram: {}'.format( t[c].histc(bins = 10).cpu().detach().numpy() )) if self.node.in_quant_part: [output] = post_quant_process(self.node, [output]) return output
def forward(self, tensors): inputs, _ = process_inputs_and_params( self.node, self.quantizer, inputs=tensors) output = super().forward(inputs) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input, dim, keepdim): input, _ = process_inputs_and_params(self.node, self.quantizer, inputs=input) output = torch.mean(input, dim, keepdim) [output] = post_quant_process(self.node, [output]) return output
def forward(self, tensors, dim): inputs, _ = process_inputs_and_params( self.node, self.quantizer, inputs=tensors) output = torch.cat(inputs, dim) [output] = post_quant_process(self.node, [output]) return output
def forward(self, *inputs, **kwargs): inputs, _ = process_inputs_and_params(self.node, self.quantizer, inputs=list(inputs)) output = super().forward(*inputs, **kwargs) [output] = post_quant_process(self.node, [output]) # output = super().forward(*inputs, **kwargs) return output
def forward(self, input, size): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input], ) output = input.expand(size).clone() [output] = post_quant_process(self.node, [output]) return output
def forward(self, input, *args, **kwargs): [input], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input]) output = getattr(input, self.op_type, None)(*args, **kwargs) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quant_mode, self.quantizer, inputs=[input], valid_inputs=self.valid_inputs) output = super().forward(input) if (self.need_quant_output): [output] = post_quant_process(self.node, self.valid_output, [output], [output, output]) return output
def forward(self, tensors, dim): inputs, _ = process_inputs_and_params(self.node, self.quant_mode, self.quantizer, inputs=tensors, valid_inputs=self.valid_inputs) output = torch.cat(inputs, dim) if (self.need_quant_output): [output] = post_quant_process(self.node, self.valid_output, [output], [output, output]) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quantizer, inputs=[input]) output = super().forward(input) # scale to DPU accuracy needScale = False scale = 1.0 if self.node.node_attr(self.node.op.AttrName.KERNEL) == [3, 3]: needScale = True scale = 9.0 * 7.0 / 64.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [5, 5]: needScale = True scale = 25.0 * 10.0 / 256.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) in [[6, 6], [3, 6], [6, 3]]: needScale = True scale = 36.0 * 7.0 / 256.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [7, 7]: needScale = True scale = 49.0 * 21.0 / 1024.0 elif self.node.node_attr(self.node.op.AttrName.KERNEL) == [14, 14]: needScale = True scale = 196.0 * 21.0 / 4096.0 else: rec = self.node.node_attr( self.node.op.AttrName.KERNEL)[0] * self.node.node_attr( self.node.op.AttrName.KERNEL)[1] max_factor = math.ceil(math.log(rec * 128, 2)) diff = 1.0 multi_factor = 0.0 shift_factor = 0.0 for shift_factor_ in range(max_factor): factor = round((2**shift_factor_) / rec) diff_ = abs(factor / (2**shift_factor_) - 1 / rec) if diff_ < diff: multi_factor = factor diff = diff_ shift_factor = shift_factor_ scale = rec * multi_factor / (2**shift_factor) if needScale: NndctScale(output, scale) [output] = post_quant_process(self.node, [output]) return output
def forward(self, *args, **kwargs): if len(self._match_inputs) == 0: def _check_kwargs(value): if isinstance( value, Tensor) and value in self.node.in_tensors: return True elif isinstance(value, (tuple, list)): check_result = [ _check_kwargs(i) for i in value ] return any(check_result) for key in kwargs.keys(): if _check_kwargs(self.node.node_config(key)): self._match_inputs.append(key) for key in self._match_inputs: if isinstance(kwargs[key], (tuple, list)): inputs = kwargs[key] else: inputs = [kwargs[key]] if self.quantizer and self.quantizer.configer.is_node_quantizable( self.node, lstm=False): inptus, _ = process_inputs_and_params( self.node, self.quant_mode, self.quantizer, inputs=inputs, valid_inputs=self.valid_inputs) if isinstance(kwargs[key], (tuple, list)): kwargs[key] = inputs else: kwargs[key] = inputs[0] output = caller(*args, **kwargs) if (self.need_quant_output): [output ] = post_quant_process(self.node, self.valid_output, [output], [output, output]) else: output = caller(*args, **kwargs) return output
def forward(self, input, size=None, scale_factor=None, mode='nearest', align_corners=None): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input], ) output = torch.nn.functional.interpolate(input, size, scale_factor, mode, align_corners) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input], ) output = input if NndctOption.nndct_stat.value > 2: print('Channel number of input data: {}'.format(output.shape[1])) print('Input data histogram: {}'.format( output.histc(bins = 10).cpu().detach().numpy() )) print('Network input channel-wise statistic [Min, Max, Mean, Std]:') t = output.transpose(0, 1) for c in range(t.shape[0]): print('[{}, {}, {}, {}]'.format( t[c].min(), t[c].max(), t[c].mean(), t[c].std() )) print('histogram: {}'.format( t[c].histc(bins = 10).cpu().detach().numpy() )) [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): [input], _ = process_inputs_and_params( self.node, self.quant_mode, self.quantizer, inputs=[input], valid_inputs=self.valid_inputs, ) # check input shape if self.node.out_tensors[0].is_complete_tensor() and self.node.out_tensors[0].ndim == 4: py_utils.blob_to_torch_format(self.node.out_tensors[0]) if not (self.node.out_tensors[0].shape[1:] == list(input.size())[1:]): raise RuntimeError(f"The shape of input ({input.size()}) should be the same with that of dummy input ({[None] + self.node.out_tensors[0].shape[1:]})") py_utils.blob_to_nndct_format(self.node.out_tensors[0]) output = input [output] = post_quant_process(self.node, self.valid_output, [output], [output, output]) return output
def forward(self, *inputs, **kwargs): if self.quantizer and self.quantizer.configer.is_node_quantizable( self.node, lstm=False): inputs, _ = process_inputs_and_params( self.node, self.quant_mode, self.quantizer, inputs=list(inputs), valid_inputs=self.valid_inputs) output = super().forward(*inputs, **kwargs) if (self.need_quant_output): [output ] = post_quant_process(self.node, self.valid_output, [output], [output, output]) else: output = super().forward(*inputs, **kwargs) return output
def forward(self, input:torch.Tensor, channel_max:Union[torch.Tensor, Sequence[Any], float]): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input], ) if isinstance(channel_max, (list, tuple)): channel_max = torch.Tensor(channel_max).to(input.device) elif isinstance(channel_max, float): channel_max = torch.Tensor([channel_max]).to(input.device) if self.node.in_quant_part: channel_max = quant_reluk_params(self.node, channel_max) output = F.relu(input) - F.relu(input-channel_max) if self.node.in_quant_part: [output] = post_quant_process(self.node, [output]) return output
def forward(self, input:torch.Tensor, channel_scale:Union[torch.Tensor, Sequence[Any], float]): [input], _ = process_inputs_and_params( self.node, self.quantizer, inputs=[input], ) if isinstance(channel_scale, (list, tuple)): channel_scale = torch.Tensor(channel_scale).to(input.device) elif isinstance(channel_scale, float): channel_scale = torch.Tensor([channel_scale]).to(input.device) ''' if self.node.in_quant_part: channel_scale = quant_channel_scale_params(self.node, channel_scale) ''' output = input * channel_scale if self.node.in_quant_part: [output] = post_quant_process(self.node, [output]) return output
def forward(self, input): [input], _ = process_inputs_and_params(self.node, self.quant_mode, self.quantizer, inputs=[input], valid_inputs=self.valid_inputs) if NndctOption.nndct_quant_off.value: output = super().forward(input) elif self.quant_mode > 0: output = torch.empty_like(input) input_name = self.node.in_nodes[0] fragpos = self.quantizer.get_bnfp(input_name, False)[1] NndctTanhTableLookup(input.cuda(), TANH_TABLE.table.cuda(), output.cuda(), fragpos) else: output = super().forward(input) [output] = post_quant_process(self.node, self.valid_output, [output], [output, output]) return output
def forward(self, *args, **kwargs): inputs = [] def collect_inputs(inputs, value): if isinstance(value, torch.Tensor): inputs.append(value) elif isinstance(value, (tuple, list)): for i in value: collect_inputs(inputs, i) for k, v in kwargs.items(): collect_inputs(inputs, v) inptus, _ = process_inputs_and_params(self.node, self.quantizer, inputs=inputs) output = caller(*args, **kwargs) [output] = post_quant_process(self.node, [output]) return output
def forward(self, *args): inputs = [] def collect_inputs(inputs, value): if isinstance(value, torch.Tensor): inputs.append(value) elif isinstance(value, (tuple, list)): for i in value: collect_inputs(inputs, i) for v in args: collect_inputs(inputs, v) inptus, _ = process_inputs_and_params(self.node, self.quantizer, inputs=inputs) caller_map = GLOBAL_MAP.get_ele(NNDCT_KEYS.NODE_CALLER_MAP) output = caller_map[self.node.name](*args) [output] = post_quant_process(self.node, [output]) return output