def forward(self, x, adj=None, feature=None, road_adj=None, road_feature=None): x = x.view(-1, self.channels * self.nb_flow, self.height, self.width) for i in range(self.gcn_layer): # convolutional learning x = self.conv[i](x) # dynamic OD gating # d_out = self.gcn_dynamic[i](feature, adj) if i == 0 else self.gcn_dynamic[i](SiLU()(d_out), adj) d_out = self.gcn_dynamic[i]( feature, adj) if i == 0 else self.gcn_dynamic[i](d_out, adj) d_gate = F.hardsigmoid(d_out) # d_gate = self.dynamic_gating[i](d_out) d_gate = d_gate.view(-1, self.height, self.width, self.num_init_features).permute(0, 3, 1, 2) d_gate = d_gate.contiguous() # static OD gating s_out = self.gcn_static[i]( road_feature, road_adj) if i == 0 else self.gcn_static[i]( s_out, road_adj) s_gate = F.hardsigmoid(s_out) # s_gate = self.static_gating[i](s_out) s_gate = s_gate.view(-1, self.height, self.width, self.num_init_features).permute(0, 3, 1, 2) s_gate = s_gate.contiguous() if self.gate_type == 1: x *= d_gate elif self.gate_type == 2: x *= s_gate else: x *= (self.alpha * d_gate + self.beta * s_gate) # SiLU activation x = x * torch.sigmoid(x) if self.gate_type == 1: flow_input = d_gate elif self.gate_type == 2: flow_input = s_gate else: flow_input = self.alpha * d_gate + self.beta * s_gate return x, flow_input
def forward(self, input: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(input, 1) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) return scale * input
def forward(self, x: torch.Tensor) -> torch.Tensor: channel_attention = self.pooling(x) channel_attention = self.reduce_conv(channel_attention) channel_attention = self.act(channel_attention) channel_attention = self.expand_conv(channel_attention) channel_attention = F.hardsigmoid(channel_attention, inplace=True) return x * channel_attention
def message(self, x_i: Tensor, x_j: Tensor, index: Tensor, edge_attr: Optional[Tensor] = None) -> Tensor: # construct edge message msg = self.msg_fn(x_i, x_j, edge_attr) gate = self.gate_nn(msg) if gate.dim() == 1: gate = gate.view(-1, 1) assert gate.dim() == msg.dim() and gate.size(0) == msg.size(0) # apply activation if self.attention_type == "global": gate = softmax(gate, index) elif self.attention_type == "local": gate = F.hardsigmoid(gate) # update message if self.local_nn is not None: msg = self.local_nn(x_j) else: msg = x_j msg = gate * msg if self.weighted_average: return torch.cat([msg, gate], dim=1) else: return msg
def forward(self, x: Tensor) -> Tensor: scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) return scale * x
def forward(self, x): x = self.conv(x) x = self.avg_pool1d(x) x = self.avg_pool2d(x) x = self.avg_pool3d(x) x = self.adaptive_avg_pool1d(x) x = self.adaptive_avg_pool2d(x) x = self.adaptive_avg_pool3d(x) x = F.avg_pool1d(x, 3) x = F.avg_pool2d(x, 3) x = F.avg_pool3d(x, 3) x = F.adaptive_avg_pool1d(x, (1)) x = F.adaptive_avg_pool2d(x, (1, 1)) x = F.adaptive_avg_pool3d(x, (1, 1, 1)) x = torch.mean(x) x = torch.mean(x, [2, 3], False) x = x.mean() x = x.mean([2, 3], True) x = F.interpolate(x, 4, mode='nearest') x = F.interpolate(x, 4, mode='linear') x = self.leaky_relu(x) x = F.leaky_relu(x) x = F.leaky_relu(x, inplace=True) x = x.leaky_relu() x.leaky_relu_() x = self.hardsigmoid(x) x = F.hardsigmoid(x) x = F.hardsigmoid(x, inplace=True) x = x.hardsigmoid() x.hardsigmoid_() x = self.sigmoid(x) x = torch.sigmoid(x) # F.sigmoid is deprecated x = x.sigmoid() x.sigmoid_() x = self.tanh(x) # F.tanh is deprecated x = torch.tanh(x) x = x.tanh() x.tanh_() x = self.conv(x) return x
def forward(self, x: Tensor) -> Tensor: # * 传入的x是一个特征矩阵(x: Tensor);该正向传播返回的也是一个特征矩阵(-> Tensor) # * 需要对每一个channel都进行池化操作 scale = F.adaptive_avg_pool2d(x, output_size=(1, 1)) # * 自适应的池化操作,输出的维度是1*1;这样就可以将特征矩阵的每一个channel的数据平均池化为一个数字(1*1大小) scale = self.fc1(scale) scale = F.relu(scale, inplace=True) # * 分别经过fc1和其对应的relu激活 scale = self.fc2(scale) scale = F.hardsigmoid(scale, inplace=True) # * 分别经过fc2和其对应的hardsigmoid激活 return scale * x # ^ 得到的数要与channel上的数字进行相乘
def forward(self, x: torch.Tensor): if self.threshold is None: threshold = get_kwta_threshold(x, self.sparsity) else: if self.threshold == "auto": in_features = x.numel() // x.shape[0] self.threshold = nn.Linear(in_features, out_features=1, bias=False).weight if torch.cuda.is_available(): self.threshold = self.threshold.cuda() shape = list(x.shape)[1:] threshold = self.threshold.view(shape) if self.training: x_scaled = self.hardness * (x - threshold) if self.hard: return F.hardsigmoid(x_scaled, inplace=True) return x_scaled.sigmoid() return KWinnersTakeAllThresholdFunction.apply(x, threshold)
def forward(self, x): out = self.gap(x) out = F.relu(self.fc1(out)) out = F.hardsigmoid(self.fc2(out)) return x * out.expand_as(x)
def _scale(self, input: Tensor, inplace: bool) -> Tensor: scale = F.adaptive_avg_pool2d(input, 1) scale = self.fc1(scale) scale = self.relu(scale) scale = self.fc2(scale) return F.hardsigmoid(scale, inplace=inplace)
def forward(self, x): x = self.conv(x) x = F.hardsigmoid(x, inplace=True) x = self.conv2(x) return x
def hardswish_forward_0(x): return x * F.hardsigmoid(x)
def forward(self, input): x = F.adaptive_avg_pool2d(input, 1) x = self.conv1(x) x = self.activation(x) x = self.conv2(x) return input * F.hardsigmoid(x)
def hardsigmoid(input, *args, **kwargs): return _wrap_tensor(input, F.hardsigmoid(input.F, *args, **kwargs))
def forward(self, input): x = F.adaptive_avg_pool2d(input, 1) x = self.conv1(x) x = self.relu(x) x = self.conv2(x) return input * F.hardsigmoid(x, inplace=True)
def forward(x): return x * F.hardsigmoid(x) # for torchscript and CoreML return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
def optimize_layer(self, node, float_layer, layer_inputs, layer_act_group, net_inputs, net_loss, last_quant_mods, device): batch_factor = 0.5 if layer_inputs[0].size(0) == 1 else 1 layer = node.module float_data = np.fabs( float_layer.weight.cpu().detach().numpy().flatten()) quant_data = np.fabs(layer.weight.cpu().detach().numpy().flatten()) q_noise = np.square(float_data - quant_data).mean() sqnr = 10 * np.log10(np.square(float_data).mean() / q_noise) quantize_efficiency = sqnr / 8.0 lr_factor = NndctOption.nndct_finetune_lr_factor.value lr_factor = lr_factor * batch_factor if quantize_efficiency > 4.5: lr_factor = 0.1 * lr_factor * batch_factor lr_w = lr_factor * layer.weight.std().item() # lr_w=1e-3 opt_weight = torch.optim.Adam([layer.weight], lr=lr_w) opt_bias = None lr_b = 0 if hasattr(layer, "bias") and layer.bias is not None: if layer.bias.flatten().shape[0] == 1: lr_b = 0.0 else: lr_b = lr_factor * layer.bias.std().item() # lr_b = lr_factor * layer.bias.std().item() # lr_b=1e-3 opt_bias = torch.optim.Adam([layer.bias], lr=lr_b) #print(f"learning rate: lr_w={lr_w}, lr_b={lr_b}") #print(f"pre quant efficiency:{quantize_efficiency}") iters = 20 total_loss = AverageMeter("layer_loss") best_params = self.get_layer_params(layer) handlers = self.hook_cache_output([float_layer]) for input_args in zip(*net_inputs): with torch.no_grad(): f_model = self._float_model.to(device) f_model.eval() new_input_args = [] for ip in input_args: if isinstance(ip, torch.Tensor): new_input_args.append(ip.to(device)) _ = f_model(*new_input_args) torch.cuda.empty_cache() self.clean_hooks(handlers) for i in range(iters): for idx, layer_input in enumerate(layer_inputs): train_output = self._cached_outputs[float_layer][idx].to( device) qout = layer(layer_input.to(device)) # train_output = train_output.to(device) if node in layer_act_group: act_node = layer_act_group[node] q_act_layer = act_node.module inplace = q_act_layer.inplace q_act_layer.inplace = False qout = q_act_layer(qout) q_act_layer.inplace = inplace if act_node.op.type == NNDCT_OP.RELU: train_output = F.relu(train_output) elif act_node.op.type == NNDCT_OP.RELU6: train_output = F.relu6(train_output) elif act_node.op.type == NNDCT_OP.HSIGMOID: train_output = F.hardsigmoid(train_output) elif act_node.op.type == NNDCT_OP.HSWISH: train_output = F.hardswish(train_output) else: raise NotImplementedError() if NndctOption.nndct_quant_opt.value > 0: loss = F.mse_loss(qout, train_output) + F.mse_loss( layer.weight, float_layer.weight.detach().to(device)) else: loss = F.mse_loss(qout, train_output) total_loss.update(loss.item()) opt_weight.zero_grad() if opt_bias: opt_bias.zero_grad() loss.backward() opt_weight.step() if opt_bias: opt_bias.step() float_data = np.fabs(layer.weight.cpu().detach().numpy().flatten()) layer.param_quantized = False handlers = self.hook_cache_output(last_quant_mods, hook_type="single") eval_loss = self.eval_loss(net_inputs, last_quant_mods, device) self.clean_hooks(handlers) quant_data = np.fabs(layer.weight.cpu().detach().numpy().flatten()) q_noise = np.square(float_data - quant_data).mean() sqnr = 10 * np.log10(np.square(float_data).mean() / q_noise) quantize_efficiency = sqnr / 8.0 #print(f"post quant efficiency:{quantize_efficiency}") # print(f"eval loss:{eval_loss} best loss:{net_loss}") if eval_loss < net_loss: best_params = self.get_layer_params(layer) net_loss = eval_loss else: self.set_layer_params(layer, best_params[0], best_params[1]) break # self.set_layer_params(layer, best_params[0], best_params[1]) #print(f"{node.name}\n{total_loss}") #print(f"opt net loss:{net_loss}") # self.clean_hooks() del self.cached_outputs[float_layer] # del cached_outputs torch.cuda.empty_cache() # print(f"iter:{i}") return net_loss
def forward(self, x, y, z, w): x = F.hardsigmoid(x) y = F.hardsigmoid(y) z = F.hardsigmoid(z) w = hardsigmoid_forward_0(w) return x, y, z, w