class GCN(nn.Module): # 初始化层:输入feature,输出feature,权重,偏移 def __init__(self, in_features, out_features, bias=True): super(GCN, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(torch.FloatTensor( in_features, out_features)) # FloatTensor建立tensor # 常见用法self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size)): # 首先可以把这个函数理解为类型转换函数,将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter # 绑定到这个module里面,所以经过类型转换这个self.v变成了模型的一部分,成为了模型中根据训练可以改动的参数了。 # 使用这个函数的目的也是想让某些变量在学习的过程中不断的修改其值以达到最优化。 if bias: self.bias = Parameter(torch.FloatTensor(out_features)) else: self.register_parameter("bias", None) # Parameters与register_parameter都会向parameters写入参数,但是后者可以支持字符串命名 # self.reset_parameters() # # 初始化权重 # def reset_parameters(self): # stdv = 1.0 / math.sqrt(self.weight.size(1)) # # size()函数主要是用来统计矩阵元素个数,或矩阵某一维上的元素个数的函数 size(1)为行 # self.weight.data.uniform_(-stdv, stdv) # uniform() 方法将随机生成下一个实数,它在 [x, y] 范围内 # if self.bias is not None: # self.bias.data.uniform_(-stdv, stdv) """ 前馈运算 即计算A~ X W(0) input X与权重W相乘,然后adj矩阵与他们的积稀疏乘 直接输入与权重之间进行torch.mm操作,得到support,即XW support与adj进行torch.spmm操作,得到output,即AXW选择是否加bias """ def forward(self, input, adj): support = torch.mm(input.cpu(), self.weight.cpu()) # torch.mm(a, b)是矩阵a和b矩阵相乘,torch.mul(a, b)是矩阵a和b对应位相乘,a和b的维度必须相等 output = torch.spmm(adj.cpu(), support.cpu()) if self.bias is not None: return output + self.bias.cpu() else: return output # 通过设置断点,可以看出output的形式是0.01,0.01,0.01,0.01,0.01,#0.01,0.94],里面的值代表该x对应标签不同的概率,故此值可转换为#[0,0,0,0,0,0,1],对应我们之前把标签onthot后的第七种标签 def __repr__(self): return (self.__class__.__name__ + " (" + str(self.in_features) + " -> " + str(self.out_features) + ")")
class CosineGraphAttentionLayer(nn.Module): def __init__(self, requires_grad=True): super(CosineGraphAttentionLayer, self).__init__() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if requires_grad: # unifrom initialization self.beta = Parameter(torch.Tensor(1).uniform_(0, 1), requires_grad=requires_grad) else: self.beta = torch.autograd.Variable( torch.zeros(1), requires_grad=requires_grad).to(device) self.epoch_count = 0 self.timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) def forward(self, xi, xj, adj=1.0): xi_norm2 = torch.norm(xi, 2, 1).view(-1, 1) xj_norm2 = torch.norm(xj, 2, 1).view(-1, 1).t() # add a minor constant (1e-7) to denominator to prevent division by # zero error cos = self.beta * torch.div(torch.mm(xi, xj.t()), torch.mm(xi_norm2, xj_norm2) + 1e-7) # neighborhood masking (inspired by this repo: # https://github.com/danielegrattarola/keras-gat) if isinstance(adj, (float, int)): adj = torch.eye(xi.shape[0]).cuda() else: adj = to_dense(adj) mask = (1. - to_dense(adj)) * -1e9 masked = cos + mask # masked = to_sparse(masked) # propagation matrix # sparsemax = Sparsemax(dim=1) # P = sparsemax(masked) P = F.softmax(masked, dim=1) # attention-guided propagation self.epoch_count += 1 output = torch.mm(P, xj) return output def save_attention(self, P): if self.epoch_count % 25 == 0: print('Saving Attention for {}{}'.format(P.shape[0], P.shape[1])) save_file = 'tmp/GCNRx_attention_weight_{}_{}_{}'.format( self.timestamp, P.shape[0], P.shape[1]) + '.pkl' with open(save_file, 'wb') as f: pickle.dump([ P.cpu().detach().numpy(), self.beta.cpu().detach().numpy() ], f) def __repr__(self): return self.__class__.__name__ + ' ()'
class Gumbel_Generator(nn.Module): def __init__(self, sz=10, temp=10, temp_drop_frac=0.9999): super(Gumbel_Generator, self).__init__() self.gen_matrix = Parameter(torch.rand(sz, sz, 2)) #gen_matrix 为邻接矩阵的概率 self.temperature = temp self.temp_drop_frac = temp_drop_frac def drop_temperature(self): # 降温过程 self.temperature = self.temperature * self.temp_drop_frac def sample(self, hard=False): # 采样——得到一个临近矩阵 self.logp = self.gen_matrix.view(-1, 2) out = gumbel_softmax(self.logp, self.temperature, hard) if hard: hh = torch.zeros(self.gen_matrix.size()[0]**2, 2) for i in range(out.size()[0]): hh[i, out[i]] = 1 out = hh if use_cuda: out = out.cuda() out_matrix = out[:, 0].view(self.gen_matrix.size()[0], self.gen_matrix.size()[0]) return out_matrix def get_temperature(self): return self.temperature def get_cross_entropy(self, obj_matrix): # 计算与目标矩阵的距离 logps = F.softmax(self.gen_matrix, 2) logps = torch.log(logps[:, :, 0] + 1e-10) * obj_matrix + torch.log( logps[:, :, 1] + 1e-10) * (1 - obj_matrix) result = -torch.sum(logps) result = result.cpu() if use_cuda else result return result.data.numpy() def get_entropy(self): logps = F.softmax(self.gen_matrix, 2) result = torch.mean(torch.sum(logps * torch.log(logps + 1e-10), 1)) result = result.cpu() if use_cuda else result return (-result.data.numpy()) def randomization(self, fraction): # 将gen_matrix重新随机初始化,fraction为重置比特的比例 sz = self.gen_matrix.size()[0] numbers = int(fraction * sz * sz) original = self.gen_matrix.cpu().data.numpy() for i in range(numbers): ii = np.random.choice(range(sz), (2, 1)) z = torch.rand(2).cuda() if use_cuda else torch.rand(2) self.gen_matrix.data[ii[0], ii[1], :] = z
class SpectralAttack(BaseAttack): """ Spectral attack for graph data. Parameters """ def __init__(self, model=None, nnodes=None, loss_type='CE', feature_shape=None, attack_structure=True, attack_features=False, regularization_weight=0.0, device='cpu'): super(SpectralAttack, self).__init__(model, nnodes, attack_structure, attack_features, device) assert attack_structure or attack_features, 'attack_feature or attack_structure cannot be both False' self.loss_type = loss_type self.modified_adj = None self.modified_features = None self.regularization_weight = regularization_weight if attack_features: assert True, 'Current Spectral Attack does not support attack feature' if attack_structure: assert nnodes is not None, 'Please give nnodes=' self.adj_changes = Parameter( torch.FloatTensor(int(nnodes * (nnodes - 1) / 2))) self.adj_changes.data.fill_(0) self.complementary = None def attack(self, ori_features, ori_adj, labels, idx_target, idx_train, idx_test, n_perturbations, att_lr, epochs=200, verbose=False, reduction='mean', **kwargs): """ Generate perturbations on the input graph """ victim_model = self.surrogate self.sparse_features = sp.issparse(ori_features) # ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device) ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device) ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True) victim_model.eval() for t in tqdm(range(epochs), desc='Perturb Adj'): modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj, device=self.device) output = victim_model( ori_features, adj_norm) # forward of gcn need to normalize adj first self.loss = self._loss(output[idx_target], labels[idx_target]) # New: add regularization term for spectral distance eigen_mse = 0 eigen_norm = self.norm = torch.norm(ori_e) if self.regularization_weight != 0: e, v = torch.symeig(adj_norm, eigenvectors=True) # eigen_mse = F.mse_loss(ori_e, e, reduction=reduction) eigen_mse = torch.norm(ori_e, e) reg_loss = eigen_mse / eigen_norm * self.regularization_weight if verbose and t % 20 == 0: loss_target, acc_target = calc_acc(output, labels, idx_target) print('-- Epoch {}, '.format(t), 'class loss = {:.4f} | '.format(self.loss.item()), 'reg loss = {:.8f} | '.format(reg_loss), 'eigen_mse = {:.8f} | '.format(eigen_mse), 'eigen_norm = {:.4f} | '.format(eigen_norm), 'acc = {}'.format(acc_target)) self.loss += reg_loss adj_grad = torch.autograd.grad(self.loss, self.adj_changes)[0] if self.loss_type == 'CE': # lr = 200 / np.sqrt(t+1) lr = att_lr / np.sqrt(t + 1) self.adj_changes.data.add_(lr * adj_grad) if self.loss_type == 'CW': # lr = 0.1 / np.sqrt(t+1) lr = att_lr / np.sqrt(t + 1) self.adj_changes.data.add_(lr * adj_grad) self.projection(n_perturbations) self.random_sample(ori_adj, ori_features, labels, idx_target, n_perturbations) self.modified_adj = self.get_modified_adj(ori_adj).detach() self.check_adj_tensor(self.modified_adj) # for sanity check ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device) ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True) adj_norm = utils.normalize_adj_tensor(self.modified_adj, device=self.device) e, v = torch.symeig(adj_norm, eigenvectors=True) self.adj = ori_adj.detach() self.labels = labels.detach() self.ori_e = ori_e self.ori_v = ori_v self.e = e self.v = v def random_sample(self, ori_adj, ori_features, labels, idx_target, n_perturbations): K = 10 best_loss = -1000 victim_model = self.surrogate with torch.no_grad(): s = self.adj_changes.cpu().detach().numpy() for i in range(K): sampled = np.random.binomial(1, s) # randm = np.random.uniform(size=s.shape[0]) # sampled = np.where(s > randm, 1, 0) # if sampled.sum() > n_perturbations: # continue while sampled.sum() > n_perturbations: sampled = np.random.binomial(1, s) self.adj_changes.data.copy_(torch.tensor(sampled)) modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj, device=self.device) output = victim_model(ori_features, adj_norm) loss = self._loss(output[idx_target], labels[idx_target]) # loss = F.nll_loss(output[idx_target], labels[idx_target]) # print(loss) if best_loss < loss: best_loss = loss best_s = sampled self.adj_changes.data.copy_(torch.tensor(best_s)) def get_modified_adj(self, ori_adj): if self.complementary is None: self.complementary = (torch.ones_like(ori_adj) - torch.eye( self.nnodes).to(self.device) - ori_adj) - ori_adj m = torch.zeros((self.nnodes, self.nnodes)).to(self.device) tril_indices = torch.tril_indices(row=self.nnodes, col=self.nnodes, offset=-1) m[tril_indices[0], tril_indices[1]] = self.adj_changes m = m + m.t() modified_adj = self.complementary * m + ori_adj return modified_adj def projection(self, n_perturbations): if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations: left = (self.adj_changes - 1).min() right = self.adj_changes.max() miu = self.bisection(left, right, n_perturbations, epsilon=1e-5) self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data - miu, min=0, max=1)) else: self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data, min=0, max=1)) def _loss(self, output, labels): if self.loss_type == "CE": loss = F.nll_loss(output, labels) if self.loss_type == "CW": onehot = utils.tensor2onehot(labels) best_second_class = (output - 1000 * onehot).argmax(1).detach() margin = output[np.arange(len(output)), labels] - \ output[np.arange(len(output)), best_second_class] k = 0 loss = -torch.clamp(margin, min=k).mean() # loss = torch.clamp(margin.sum()+50, min=k) return loss def bisection(self, a, b, n_perturbations, epsilon): def func(x): return torch.clamp(self.adj_changes - x, 0, 1).sum() - n_perturbations miu = a while ((b - a) >= epsilon): miu = (a + b) / 2 # Check if middle point is root if (func(miu) == 0.0): break # Decide the side to repeat the steps if (func(miu) * func(a) < 0): b = miu else: a = miu # print("The value of root is : ","%.4f" % miu) return miu
class RBFI(nn.Module): def __init__(self, in_features, out_features, andor="*", modinf=False, regular_deriv=False, min_input=0.0, max_input=1.0, min_slope=0.001, max_slope=10.0): """ Implementation of RBF module with logloss. :param in_features: Number of input features. :param out_features: Number of output features. :param andor: '^' for and, 'v' for or, '*' for mixed. :param modinf: Whether to aggregate using max (if True) of sum (if False). :param regular_deriv: Whether to use regular derivatives or not. :param min_input: minimum value for w (and therefore min value for input) :param max_input: max, as above. :param min_slope: min value for u, defining the slope. :param max_slope: max value for u, defining the slope. """ super(RBFI, self).__init__() self.in_features = in_features self.out_features = out_features self.andor = andor self.modinf = modinf self.regular_deriv = regular_deriv self.w = BoundedParameter(torch.Tensor(out_features, in_features), lower_bound=min_input, upper_bound=max_input) self.u = BoundedParameter(torch.Tensor(out_features, in_features), lower_bound=min_slope, upper_bound=max_slope) if andor == 'v': self.andor01 = Parameter(torch.ones((1, out_features))) elif andor == '^': self.andor01 = Parameter(torch.zeros(( 1, out_features, ))) else: self.andor01 = Parameter(torch.Tensor( 1, out_features, )) self.andor01.data.random_(0, 2) self.andor01.requires_grad = False self.w.data.uniform_(min_input, max_input) # Initialization of u. self.u.data.uniform_(0.2, 0.7) # These could be parameters. self.u.data.clamp_(min_slope, max_slope) def dumps(self): """Writes itself to a string.""" # Creates a dictionary d = dict( in_features=self.in_features, out_features=self.out_features, min_input=self.w.lower_bound, max_input=self.w.upper_bound, min_slope=self.u.lower_bound, max_slope=self.u.upper_bound, modinf=self.modinf, regular_deriv=self.regular_deriv, andor=self.andor, andor01=self.andor01.cpu().numpy(), u=self.u.data.cpu().numpy(), w=self.w.data.cpu().numpy(), ) return Serializable.dumps(d) @staticmethod def loads(s, device): """Reads itself from string s.""" d = Serializable.loads(s) m = RBFI(d['in_features'], d['out_features'], andor=d['andor'], modinf=d['modinf'], regular_deriv=d['regular_deriv'], min_input=d['min_input'], max_input=d['max_input'], min_slope=d['min_slope'], max_slope=d['max_slope']) m.u.data = torch.from_numpy(d['u']).to(device) m.w.data = torch.from_numpy(d['w']).to(device) m.andor01.data = torch.from_numpy(d['andor01']).to(device) return m def forward(self, x): # Let n be the input size, and m the output size. # The tensor x is of shape * n. To make room for the output, # we view it as of shape * 1 n. s = list(x.shape) new_s = s[:-1] + [1, s[-1]] xx = x.view(*new_s) xuw = self.u * (xx - self.w) xuwsq = xuw * xuw # Aggregates into a modulus. if self.modinf: # We want to get the largest square, which is the min one as we changed signs. if self.regular_deriv: z, _ = torch.max(xuwsq, -1) y = torch.exp(-z) else: z = SharedFeedbackMax.apply(xuwsq) y = LargeAttractorExp.apply(z) else: z = torch.sum(xuwsq, -1) if self.regular_deriv: y = torch.exp(-z) else: y = LargeAttractorExp.apply(z) # Takes into account and-orness. if self.andor == '^': return y elif self.andor == 'v': return 1.0 - y else: return y + self.andor01 * (1.0 - 2.0 * y) def overall_sensitivity(self): """Returns the sensitivity to adversarial examples of the layer.""" if self.modinf: s = torch.max(torch.max(self.u, -1)[0], -1)[0].item() else: s = torch.max(torch.sqrt(torch.sum(self.u * self.u, -1)))[0].item() s *= np.sqrt(2. / np.e) return s def sensitivity(self, previous_layer): """Given the sensitivity of the previous layer (a vector of length equal to the number of inputs), it computes the sensitivity to adversarial examples of the current layer, as a vector of length equal to the output size of the layer. If the input sensitivity of the previous layer is None, then unit sensitivity is assumed.""" if previous_layer is None: previous_layer = self.w.new(1, self.in_features) previous_layer.fill_(1.) else: previous_layer = previous_layer.view(1, self.in_features) u_prod = previous_layer * self.u if self.modinf: # s = torch.max(u_prod, -1)[0] s = SharedFeedbackMax.apply(u_prod) else: s = torch.sqrt(torch.sum(u_prod * u_prod, -1)) s = s * np.sqrt(2. / np.e) return s
class BiAAttention(nn.Module): ''' Bi-Affine attention layer. ''' def __init__(self, input_size_decoder, input_size_encoder, num_labels, biaffine=True, **kwargs): ''' Args: input_size_encoder: int the dimension of the encoder input. input_size_decoder: int the dimension of the decoder input. num_labels: int the number of labels of the crf layer biaffine: bool if apply bi-affine parameter. **kwargs: ''' super(BiAAttention, self).__init__() self.input_size_encoder = input_size_encoder self.input_size_decoder = input_size_decoder self.num_labels = num_labels self.biaffine = biaffine self.W_d = Parameter( torch.Tensor(self.num_labels, self.input_size_decoder)) self.W_e = Parameter( torch.Tensor(self.num_labels, self.input_size_encoder)) self.b = Parameter(torch.Tensor(self.num_labels, 1, 1)) if self.biaffine: self.U = Parameter( torch.Tensor(self.num_labels, self.input_size_decoder, self.input_size_encoder)) else: self.register_parameter('U', None) self.reset_parameters() def reset_parameters(self): nn.init.xavier_uniform(self.W_d) nn.init.xavier_uniform(self.W_e) nn.init.constant(self.b, 0.) if self.biaffine: nn.init.xavier_uniform(self.U) def forward(self, input_d, input_e, mask_d=None, mask_e=None): ''' Args: input_d: Tensor the decoder input tensor with shape = [batch, length_decoder, input_size] input_e: Tensor the child input tensor with shape = [batch, length_encoder, input_size] mask_d: Tensor or None the mask tensor for decoder with shape = [batch, length_decoder] mask_e: Tensor or None the mask tensor for encoder with shape = [batch, length_encoder] Returns: Tensor the energy tensor with shape = [batch, num_label, length, length] ''' assert input_d.size(0) == input_e.size( 0), 'batch sizes of encoder and decoder are requires to be equal.' batch, length_decoder, _ = input_d.size() _, length_encoder, _ = input_e.size() # compute decoder part: [num_label, input_size_decoder] * [batch, input_size_decoder, length_decoder] # the output shape is [batch, num_label, length_decoder] out_d = torch.matmul(self.W_d, input_d.transpose(1, 2)).unsqueeze(3) # compute decoder part: [num_label, input_size_encoder] * [batch, input_size_encoder, length_encoder] # the output shape is [batch, num_label, length_encoder] out_e = torch.matmul(self.W_e, input_e.transpose(1, 2)).unsqueeze(2) # output shape [batch, num_label, length_decoder, length_encoder] if self.biaffine: # compute bi-affine part # [batch, 1, length_decoder, input_size_decoder] * [num_labels, input_size_decoder, input_size_encoder] # output shape [batch, num_label, length_decoder, input_size_encoder] # output = torch.matmul(input_d.unsqueeze(1), self.U) output = torch.matmul(input_d.unsqueeze(1).cpu(), self.U.cpu()).cuda() # [batch, num_label, length_decoder, input_size_encoder] * [batch, 1, input_size_encoder, length_encoder] # output shape [batch, num_label, length_decoder, length_encoder] # output = torch.matmul(output, input_e.unsqueeze(1).transpose(2, 3)) output = torch.matmul(output.cpu(), input_e.unsqueeze(1).transpose( 2, 3).cpu()).cuda() # output = output + self.b output = output + out_d + out_e + self.b else: output = out_d + out_e + self.b if mask_d is not None: output = output * mask_d.unsqueeze(1).unsqueeze( 3) * mask_e.unsqueeze(1).unsqueeze(2) return output
class PGDAttack(BaseAttack): """PGD attack for graph data. Parameters ---------- model : model to attack. Default `None`. nnodes : int number of nodes in the input graph loss_type: str attack loss type, chosen from ['CE', 'CW'] feature_shape : tuple shape of the input node features attack_structure : bool whether to attack graph structure attack_features : bool whether to attack node features device: str 'cpu' or 'cuda' Examples -------- >>> from deeprobust.graph.data import Dataset >>> from deeprobust.graph.defense import GCN >>> from deeprobust.graph.global_attack import PGDAttack >>> from deeprobust.graph.utils import preprocess >>> data = Dataset(root='/tmp/', name='cora') >>> adj, features, labels = data.adj, data.features, data.labels >>> adj, features, labels = preprocess(adj, features, labels, preprocess_adj=False) # conver to tensor >>> idx_train, idx_val, idx_test = data.idx_train, data.idx_val, data.idx_test >>> # Setup Victim Model >>> victim_model = GCN(nfeat=features.shape[1], nclass=labels.max().item()+1, nhid=16, dropout=0.5, weight_decay=5e-4, device='cpu').to('cpu') >>> victim_model.fit(features, adj, labels, idx_train) >>> # Setup Attack Model >>> model = PGDAttack(model=victim_model, nnodes=adj.shape[0], loss_type='CE', device='cpu').to('cpu') >>> model.attack(features, adj, labels, idx_train, n_perturbations=10) >>> modified_adj = model.modified_adj """ def __init__(self, model=None, nnodes=None, loss_type='CE', feature_shape=None, attack_structure=True, attack_features=False, device='cpu'): super(PGDAttack, self).__init__(model, nnodes, attack_structure, attack_features, device) assert attack_features or attack_structure, 'attack_features or attack_structure cannot be both False' self.loss_type = loss_type self.modified_adj = None self.modified_features = None if attack_structure: assert nnodes is not None, 'Please give nnodes=' self.adj_changes = Parameter(torch.FloatTensor(int(nnodes*(nnodes-1)/2))) self.adj_changes.data.fill_(0) if attack_features: assert True, 'Topology Attack does not support attack feature' self.complementary = None def attack(self, ori_features, ori_adj, labels, idx_train, n_perturbations, epochs=200, **kwargs): """Generate perturbations on the input graph. Parameters ---------- ori_features : Original (unperturbed) node feature matrix ori_adj : Original (unperturbed) adjacency matrix labels : node labels idx_train : node training indices n_perturbations : int Number of perturbations on the input graph. Perturbations could be edge removals/additions or feature removals/additions. epochs: number of training epochs """ victim_model = self.surrogate self.sparse_features = sp.issparse(ori_features) ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device) victim_model.eval() for t in tqdm(range(epochs)): modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj) output = victim_model(ori_features, adj_norm) # loss = F.nll_loss(output[idx_train], labels[idx_train]) loss = self._loss(output[idx_train], labels[idx_train]) adj_grad = torch.autograd.grad(loss, self.adj_changes)[0] if self.loss_type == 'CE': lr = 200 / np.sqrt(t+1) self.adj_changes.data.add_(lr * adj_grad) if self.loss_type == 'CW': lr = 0.1 / np.sqrt(t+1) self.adj_changes.data.add_(lr * adj_grad) self.projection(n_perturbations) self.random_sample(ori_adj, ori_features, labels, idx_train, n_perturbations) self.modified_adj = self.get_modified_adj(ori_adj).detach() def random_sample(self, ori_adj, ori_features, labels, idx_train, n_perturbations): K = 20 best_loss = -1000 victim_model = self.surrogate with torch.no_grad(): s = self.adj_changes.cpu().detach().numpy() for i in range(K): sampled = np.random.binomial(1, s) print(sampled.sum()) if sampled.sum() > n_perturbations: continue self.adj_changes.data.copy_(torch.tensor(sampled)) modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj) output = victim_model(ori_features, adj_norm) loss = self._loss(output[idx_train], labels[idx_train]) # loss = F.nll_loss(output[idx_train], labels[idx_train]) print(loss) if best_loss < loss: best_loss = loss best_s = sampled self.adj_changes.data.copy_(torch.tensor(best_s)) def _loss(self, output, labels): if self.loss_type == "CE": loss = F.nll_loss(output, labels) if self.loss_type == "CW": onehot = utils.tensor2onehot(labels) best_second_class = (output - 1000*onehot).argmax(1) margin = output[np.arange(len(output)), labels] - \ output[np.arange(len(output)), best_second_class] k = 0 loss = -torch.clamp(margin, min=k).mean() # loss = torch.clamp(margin.sum()+50, min=k) return loss def projection(self, n_perturbations): # projected = torch.clamp(self.adj_changes, 0, 1) if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations: left = (self.adj_changes - 1).min() right = self.adj_changes.max() miu = self.bisection(left, right, n_perturbations, epsilon=1e-5) self.adj_changes.data.copy_(torch.clamp(self.adj_changes.data - miu, min=0, max=1)) else: self.adj_changes.data.copy_(torch.clamp(self.adj_changes.data, min=0, max=1)) def get_modified_adj(self, ori_adj): if self.complementary is None: self.complementary = (torch.ones_like(ori_adj) - torch.eye(self.nnodes).to(self.device) - ori_adj) - ori_adj m = torch.zeros((self.nnodes, self.nnodes)).to(self.device) tril_indices = torch.tril_indices(row=self.nnodes-1, col=self.nnodes-1, offset=0) m[tril_indices[0], tril_indices[1]] = self.adj_changes # m += m.t() m = m + m.t() modified_adj = self.complementary * m + ori_adj return modified_adj def bisection(self, a, b, n_perturbations, epsilon): def func(x): return torch.clamp(self.adj_changes-x, 0, 1).sum() - n_perturbations miu = a while ((b-a) >= epsilon): miu = (a+b)/2 # Check if middle point is root if (func(miu) == 0.0): break # Decide the side to repeat the steps if (func(miu)*func(a) < 0): b = miu else: a = miu # print("The value of root is : ","%.4f" % miu) return miu
class PGDAttack(BaseAttack): """ Spectral attack for graph data """ def __init__(self, model=None, nnodes=None, loss_type='CE', feature_shape=None, attack_structure=True, attack_features=False, loss_weight=1.0, regularization_weight=0.0, device='cpu'): super(PGDAttack, self).__init__(model, nnodes, attack_structure, attack_features, device) assert attack_structure or attack_features, 'attack_feature or attack_structure cannot be both False' self.loss_type = loss_type self.modified_adj = None self.modified_features = None self.loss_weight = loss_weight self.regularization_weight = regularization_weight if attack_features: assert True, 'Current Spectral Attack does not support attack feature' if attack_structure: assert nnodes is not None, 'Please give nnodes=' self.adj_changes = Parameter( torch.FloatTensor(int(nnodes * (nnodes - 1) / 2))) torch.nn.init.uniform_(self.adj_changes, 0.0, 0.001) # self.adj_changes.data.fill_(0) self.complementary = None def set_model(self, model): self.surrogate = model def attack(self, ori_features, ori_adj, labels, idx_target, n_perturbations, att_lr, epochs=200, distance_type='l2', sample_type='sample', opt_type='max', verbose=True, **kwargs): """ Generate perturbations on the input graph """ victim_model = self.surrogate self.sparse_features = sp.issparse(ori_features) # ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device) ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device) ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True) l, r, m = 0, 0, 0 victim_model.eval() # for t in tqdm(range(epochs), desc='Perturb Adj'): for t in tqdm(range(epochs)): modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj, device=self.device) output = victim_model( ori_features, adj_norm) # forward of gcn need to normalize adj first task_loss = self._loss(output[idx_target], labels[idx_target]) # spectral distance term for spectral distance eigen_mse = torch.tensor(0) eigen_self = torch.tensor(0) eigen_gf = torch.tensor(0) eigen_norm = self.norm = torch.norm(ori_e) if self.regularization_weight != 0: # add noise to make the graph asymmetric modified_adj_noise = modified_adj # modified_adj_noise = self.add_random_noise(modified_adj) adj_norm_noise = utils.normalize_adj_tensor(modified_adj_noise, device=self.device) e, v = torch.symeig(adj_norm_noise, eigenvectors=True) eigen_mse = torch.norm(ori_e - e) eigen_self = torch.norm(e) # low-rank loss in GF-attack idx = torch.argsort(e)[:128] mask = torch.zeros_like(e).bool() mask[idx] = True eigen_gf = torch.pow(torch.norm(e * mask, p=2), 2) * torch.pow( torch.norm(torch.matmul(v.detach() * mask, ori_features), p=2), 2) reg_loss = 0 if distance_type == 'l2': reg_loss = eigen_mse / eigen_norm elif distance_type == 'normDiv': reg_loss = eigen_self / eigen_norm elif distance_type == 'gf': reg_loss = eigen_gf else: exit(f'unknown distance metric: {distance_type}') if verbose and t % 20 == 0: loss_target, acc_target = calc_acc(output, labels, idx_target) print( '-- Epoch {}, '.format(t), 'ptb budget/true = {:.1f}/{:.1f}'.format( n_perturbations, torch.clamp(self.adj_changes, 0, 1).sum()), 'l/r/m = {:.4f}/{:.4f}/{:.4f}'.format(l, r, m), 'class loss = {:.4f} | '.format(task_loss.item()), 'reg loss = {:.4f} | '.format(reg_loss.item()), 'mse_norm = {:4f} | '.format(eigen_norm), 'eigen_mse = {:.4f} | '.format(eigen_mse), 'eigen_self = {:.4f} | '.format(eigen_self), 'acc/mis = {:.4f}/{:.4f}'.format(acc_target, 1 - acc_target)) self.loss = self.loss_weight * task_loss + self.regularization_weight * reg_loss adj_grad = torch.autograd.grad(self.loss, self.adj_changes)[0] if self.loss_type == 'CE': lr = att_lr / np.sqrt(t + 1) self.adj_changes.data.add_(lr * adj_grad) if self.loss_type == 'CW': lr = att_lr / np.sqrt(t + 1) self.adj_changes.data.add_(lr * adj_grad) # return self.adj_changes.cpu().detach().numpy() if verbose and t % 20 == 0: print('budget/true={:.1f}/{:.1f}'.format( n_perturbations, torch.clamp(self.adj_changes, 0, 1).sum())) if sample_type == 'sample': l, r, m = self.projection(n_perturbations) elif sample_type == 'greedy': self.greedy(n_perturbations) elif sample_type == 'greedy2': self.greedy2(n_perturbations) elif sample_type == 'greedy3': self.greedy3(n_perturbations) else: exit(f"unkown sample type {sample_type}") if verbose and t % 20 == 0: print('budget/true={:.1f}/{:.1f}'.format( n_perturbations, torch.clamp(self.adj_changes, 0, 1).sum())) if sample_type == 'sample': self.random_sample(ori_adj, ori_features, labels, idx_target, n_perturbations) elif sample_type == 'greedy': self.greedy(n_perturbations) elif sample_type == 'greedy2': self.greedy2(n_perturbations) elif sample_type == 'greedy3': self.greedy3(n_perturbations) else: exit(f"unkown sample type {sample_type}") print("final ptb budget/true= {:.1f}/{:.1f}".format( n_perturbations, self.adj_changes.sum())) self.modified_adj = self.get_modified_adj(ori_adj).detach() self.check_adj_tensor(self.modified_adj) # for sanity check ori_adj_norm = utils.normalize_adj_tensor(ori_adj, device=self.device) ori_e, ori_v = torch.symeig(ori_adj_norm, eigenvectors=True) adj_norm = utils.normalize_adj_tensor(self.modified_adj, device=self.device) e, v = torch.symeig(adj_norm, eigenvectors=True) self.adj = ori_adj.detach() self.labels = labels.detach() self.ori_e = ori_e self.ori_v = ori_v self.e = e self.v = v def greedy(self, n_perturbations): s = self.adj_changes.cpu().detach().numpy() # l = min(s) # r = max(s) # noise = np.random.normal((l+r)/2, 0.1*(r-l), s.shape) # s += noise s_vec = np.squeeze(np.reshape(s, (1, -1))) # max_index = (-np.absolute(s_vec)).argsort()[:n_perturbations] max_index = (-s_vec).argsort()[:n_perturbations] mask = np.zeros_like(s_vec) mask[max_index] = 1.0 best_s = np.reshape(mask, s.shape) self.adj_changes.data.copy_( torch.clamp(torch.tensor(best_s), min=0, max=1)) def greedy3(self, n_perturbations): s = self.adj_changes.cpu().detach().numpy() s_vec = np.squeeze(np.reshape(s, (1, -1))) # max_index = (-np.absolute(s_vec)).argsort()[:n_perturbations] max_index = (s_vec).argsort()[:n_perturbations] mask = np.zeros_like(s_vec) mask[max_index] = 1.0 best_s = np.reshape(mask, s.shape) self.adj_changes.data.copy_( torch.clamp(torch.tensor(best_s), min=0, max=1)) def greedy2(self, n_perturbations): s = self.adj_changes.cpu().detach().numpy() l = min(s) r = max(s) noise = np.random.normal((l + r) / 2, 0.4 * (r - l), s.shape) s += noise s_vec = np.squeeze(np.reshape(s, (1, -1))) max_index = (-np.absolute(s_vec)).argsort()[:n_perturbations] mask = np.zeros_like(s_vec) mask[max_index] = 1.0 best_s = np.reshape(mask, s.shape) self.adj_changes.data.copy_( torch.clamp(torch.tensor(best_s), min=0, max=1)) def random_sample(self, ori_adj, ori_features, labels, idx_target, n_perturbations): K = 10 best_loss = -1000 victim_model = self.surrogate with torch.no_grad(): s = self.adj_changes.cpu().detach().numpy() for i in range(K): sampled = np.random.binomial(1, s) # randm = np.random.uniform(size=s.shape[0]) # sampled = np.where(s > randm, 1, 0) # if sampled.sum() > n_perturbations: # continue while sampled.sum() > n_perturbations: sampled = np.random.binomial(1, s) # if sampled.sum() > n_perturbations: # indices = np.transpose(np.nonzero(sampled)) # candidate_idx = [m for m in range(indices.shape[0])] # chosen_idx = np.random.choice(candidate_idx, n_perturbations, replace=False) # chosen_indices = indices[chosen_idx, :] # sampled = np.zeros_like(sampled) # for idx in chosen_indices: # sampled[idx] = 1 self.adj_changes.data.copy_(torch.tensor(sampled)) modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj, device=self.device) output = victim_model(ori_features, adj_norm) loss = self._loss(output[idx_target], labels[idx_target]) # loss = F.nll_loss(output[idx_target], labels[idx_target]) # print(loss) if best_loss < loss: best_loss = loss best_s = sampled self.adj_changes.data.copy_(torch.tensor(best_s)) def get_modified_adj(self, ori_adj): if self.complementary is None: self.complementary = (torch.ones_like(ori_adj) - torch.eye( self.nnodes).to(self.device) - ori_adj) - ori_adj m = torch.zeros((self.nnodes, self.nnodes)).to(self.device) tril_indices = torch.tril_indices(row=self.nnodes, col=self.nnodes, offset=-1) m[tril_indices[0], tril_indices[1]] = self.adj_changes m = m + m.t() modified_adj = self.complementary * m + ori_adj return modified_adj def add_random_noise(self, ori_adj): noise = 1e-4 * torch.rand(self.nnodes, self.nnodes).to(self.device) return (noise + torch.transpose(noise, 0, 1)) / 2.0 + ori_adj def projection2(self, n_perturbations): s = self.adj_changes.cpu().detach().numpy() n = np.squeeze(np.reshape(s, (1, -1))).shape[0] self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data, min=0, max=n_perturbations / n)) return 0, 0, 0 def projection(self, n_perturbations): l, r, m = 0, 0, 0 if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations: left = (self.adj_changes).min() right = self.adj_changes.max() miu = self.bisection(left, right, n_perturbations, epsilon=1e-5) l = left.cpu().detach() r = right.cpu().detach() m = miu.cpu().detach() self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data - miu, min=0, max=1)) else: self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data, min=0, max=1)) return l, r, m def _loss(self, output, labels): if self.loss_type == "CE": loss = F.nll_loss(output, labels) if self.loss_type == "CW": onehot = utils.tensor2onehot(labels) best_second_class = (output - 1000 * onehot).argmax(1).detach() margin = output[np.arange(len(output)), labels] - \ output[np.arange(len(output)), best_second_class] k = 0 loss = -torch.clamp(margin, min=k).mean() # loss = torch.clamp(margin.sum()+50, min=k) return loss def bisection(self, a, b, n_perturbations, epsilon): def func(x): return torch.clamp(self.adj_changes - x, 0, 1).sum() - n_perturbations miu = a while ((b - a) >= epsilon): miu = (a + b) / 2 # Check if middle point is root if (func(miu) == 0.0): b = miu break # Decide the side to repeat the steps if (func(miu) * func(a) < 0): b = miu else: a = miu # print("The value of root is : ","%.4f" % miu) return miu
class GPRegressor(nn.Module): def __init__(self, kernel, sn=0.1, lr=1e-1, scheduler=False, prior=True): super(GPRegressor, self).__init__() self.sn = Parameter(torch.Tensor([sn])) self.kernel = kernel self.loss_func = NLMLLoss() opt = [p for p in self.parameters() if p.requires_grad] self.optimizer = optim.Adam(opt, lr=lr) if prior: self.prior = torch.distributions.Beta(2, 2).log_prob else: self.prior = None if scheduler: self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, patience=2, verbose=True, mode='max') else: self.scheduler = None def loss(self, X, y, jitter, val=None): K = self.kernel(X, X) inds = list(range(len(K))) K[[inds], [inds]] += self.sn + jitter L = torch.potrf(K, upper=False) alpha = torch.trtrs(y, L, upper=False)[0] alpha = torch.trtrs(alpha, L.t(), upper=True)[0] loss = self.loss_func(L, alpha, y) if self.prior is not None: loss -= self.prior(self.sn) if val is not None: X_val, y_val = val k_star = self.kernel(X, X_val) mu = k_star.t() @ alpha mse = nn.MSELoss()(mu, y_val) return loss, mse else: return loss def forward(self, X): """ Gaussian process regression predictions. Parameters: X: m x d points to predict Returns: mu: m x 1 predicted means var: m x m predicted covariance Follows Algorithm 2.1 from GPML. """ ### Implement prior ### ### Scaling k_star = self.kernel(self.X, X) mu = k_star.t() @ self.alpha v = torch.trtrs(k_star, self.L, upper=False)[0] k_ss = self.kernel(X, X) var = k_ss - v.t() @ v return mu, var def fit(self, X, y, its=100, jitter=1e-6, verbose=True, val=None, chkpt=None): self.X = X self.y = y self._fit(X, y, its, jitter, verbose, val, chkpt) self._set_pars(jitter) return self.history def _fit(self, X, y, its, jitter, verbose, val, chkpt): self.history = [] if val is not None and chkpt is not None: best_mse = 1e14 for it in range(its): if val is not None: loss, mse = self.loss(X, y, jitter, val=val) mse = mse.item() if chkpt is not None and mse < best_mse: torch.save(self.state_dict(), chkpt) else: loss = self.loss(X, y, jitter) # backward self.optimizer.zero_grad() loss.backward(retain_graph=False) # update parameters self.optimizer.step() self.sn.data.clamp_(min=1e-6) # if self.scheduler is not None: # self.scheduler.step(loss) if verbose: update = '\rIteration %d of %d\tNLML: %.4f\tsn: %.6f\t' \ %(it + 1, its, loss, self.sn.cpu().detach().numpy()[0]) print(update, end='') if val is not None: print('val mse: %.4f' % mse, end='') if val is None: h = (loss.item(), self.sn.item()) else: h = (loss.item(), self.sn.item(), mse) del mse self.history.append(h) del loss def _set_pars(self, jitter): Ky = self.kernel(self.X, self.X) inds = list(range(len(Ky))) Ky[[inds], [inds]] += self.sn + jitter self.L = torch.potrf(Ky, upper=False) self.alpha = torch.trtrs(self.y, self.L, upper=False)[0] self.alpha = torch.trtrs(self.alpha, self.L.t(), upper=True)[0]
class PGDAttack(BaseAttack): def __init__(self, model=None, nnodes=None, loss_type='CE', feature_shape=None, attack_structure=True, attack_features=False, device='cpu'): super(PGDAttack, self).__init__(model, nnodes, attack_structure, attack_features, device) assert attack_features or attack_structure, 'attack_features or attack_structure cannot be both False' self.loss_type = loss_type self.modified_adj = None self.modified_features = None if attack_structure: assert nnodes is not None, 'Please give nnodes=' self.adj_changes = Parameter( torch.FloatTensor(int(nnodes * (nnodes - 1) / 2))) self.adj_changes.data.fill_(0) if attack_features: assert True, 'Topology Attack does not support attack feature' self.complementary = None def attack(self, ori_features, ori_adj, labels, idx_train, n_perturbations, epochs=200, **kwargs): victim_model = self.surrogate self.sparse_features = sp.issparse(ori_features) ori_adj, ori_features, labels = utils.to_tensor(ori_adj, ori_features, labels, device=self.device) victim_model.eval() for t in tqdm(range(epochs)): modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj) output = victim_model(ori_features, adj_norm) # loss = F.nll_loss(output[idx_train], labels[idx_train]) loss = self._loss(output[idx_train], labels[idx_train]) adj_grad = torch.autograd.grad(loss, self.adj_changes)[0] if self.loss_type == 'CE': lr = 200 / np.sqrt(t + 1) self.adj_changes.data.add_(lr * adj_grad) if self.loss_type == 'CW': lr = 0.1 / np.sqrt(t + 1) self.adj_changes.data.add_(lr * adj_grad) self.projection(n_perturbations) self.random_sample(ori_adj, ori_features, labels, idx_train, n_perturbations) self.modified_adj = self.get_modified_adj(ori_adj).detach() def random_sample(self, ori_adj, ori_features, labels, idx_train, n_perturbations): K = 20 best_loss = -1000 victim_model = self.surrogate with torch.no_grad(): s = self.adj_changes.cpu().detach().numpy() for i in range(K): sampled = np.random.binomial(1, s) print(sampled.sum()) if sampled.sum() > n_perturbations: continue self.adj_changes.data.copy_(torch.tensor(sampled)) modified_adj = self.get_modified_adj(ori_adj) adj_norm = utils.normalize_adj_tensor(modified_adj) output = victim_model(ori_features, adj_norm) loss = self._loss(output[idx_train], labels[idx_train]) # loss = F.nll_loss(output[idx_train], labels[idx_train]) print(loss) if best_loss < loss: best_loss = loss best_s = sampled self.adj_changes.data.copy_(torch.tensor(best_s)) def _loss(self, output, labels): if self.loss_type == "CE": loss = F.nll_loss(output, labels) if self.loss_type == "CW": onehot = utils.tensor2onehot(labels) best_second_class = (output - 1000 * onehot).argmax(1) margin = output[np.arange(len(output)), labels] - \ output[np.arange(len(output)), best_second_class] k = 0 loss = -torch.clamp(margin, min=k).mean() # loss = torch.clamp(margin.sum()+50, min=k) return loss def projection(self, n_perturbations): # projected = torch.clamp(self.adj_changes, 0, 1) if torch.clamp(self.adj_changes, 0, 1).sum() > n_perturbations: left = (self.adj_changes - 1).min() right = self.adj_changes.max() miu = self.bisection(left, right, n_perturbations, epsilon=1e-5) self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data - miu, min=0, max=1)) else: self.adj_changes.data.copy_( torch.clamp(self.adj_changes.data, min=0, max=1)) def get_modified_adj(self, ori_adj): if self.complementary is None: self.complementary = (torch.ones_like(ori_adj) - torch.eye( self.nnodes).to(self.device) - ori_adj) - ori_adj m = torch.zeros((self.nnodes, self.nnodes)).to(self.device) tril_indices = torch.tril_indices(row=self.nnodes - 1, col=self.nnodes - 1, offset=0) m[tril_indices[0], tril_indices[1]] = self.adj_changes # m += m.t() m = m + m.t() modified_adj = self.complementary * m + ori_adj return modified_adj def bisection(self, a, b, n_perturbations, epsilon): def func(x): return torch.clamp(self.adj_changes - x, 0, 1).sum() - n_perturbations miu = a while ((b - a) >= epsilon): miu = (a + b) / 2 # Check if middle point is root if (func(miu) == 0.0): break # Decide the side to repeat the steps if (func(miu) * func(a) < 0): b = miu else: a = miu # print("The value of root is : ","%.4f" % miu) return miu
class MixedOp(nn.Module): """mixed operation """ MODE = None # full, two, None, full_v2 def __init__(self, C_in, C_out, stride): super(MixedOp, self).__init__() self._ops = nn.ModuleList() self.shortcut = None self.candidate = PRIMITIVES self.active_index = [0] self.inactive_index = None self.current_prob_over_ops = None if stride == 1 and C_in == C_out: OPS.update(OPS_ZERO) self.candidate = PRIMITIVES + ['zero'] self.shortcut = Identity(C_in, C_out, stride) for primitive in self.candidate: # if primitive == 'identity' and C_in != C_out: # continue op = OPS[primitive](C_in, C_out, stride, False) self._ops.append(op) self.n_choices = len(self._ops) self.path_gate = Parameter(torch.Tensor( self.n_choices)) # binary gates self.alpha = Parameter(torch.Tensor( self.n_choices)) # architecture parameters #self.alpha = Variable( #1e-3*torch.randn(1, len(self._ops)).cuda(), requires_grad=True) def binarize(self): # reset binary gates self.path_gate.data.zero_() # sample two ops according to `probs` probs = F.softmax(self.alpha, dim=0) sample_op = torch.multinomial(probs.data, 2, replacement=False) probs_slice = F.softmax(torch.stack( [self.alpha[idx] for idx in sample_op]), dim=0) self.current_prob_over_ops = torch.zeros_like(probs) for i, idx in enumerate(sample_op): self.current_prob_over_ops[idx] = probs_slice[i] # chose one to be active and the other to be inactive according to probs_slice c = torch.multinomial(probs_slice.data, 1)[0] # 0 or 1 active_op = sample_op[c].item() inactive_op = sample_op[1 - c].item() self.active_index = [active_op] self.inactive_index = [inactive_op] # set binary gate self.path_gate.data[active_op] = 1.0 # avoid over-regularization for _i in range(len(probs)): for name, param in self._ops[_i].named_parameters(): param.grad = None @property def chosen_index(self): probs = self.alpha.cpu().numpy() index = int(np.argmax(probs)) return index, probs[index] def set_chosen_op_active(self): chosen_idx, _ = self.chosen_index self.active_index = [chosen_idx] self.inactive_index = [_i for _i in range(0, chosen_idx)] + \ [_i for _i in range( chosen_idx + 1, self.n_choices)] def is_zero_layer(self): return self.active_op.is_zero_layer() @property def active_op(self): """ assume only one path is active """ return self._ops[self.active_index[0]] @property def active_op_name(self): """ assume only one path is active """ return self.candidate[self.active_index[0]] def set_arch_param_grad(self): binary_grads = self.path_gate.grad.data if self.active_op.is_zero_layer(): self.alpha.grad = None return if self.alpha.grad is None: self.alpha.grad = torch.zeros_like(self.alpha.data) involved_idx = self.active_index + self.inactive_index probs_slice = F.softmax(torch.stack( [self.alpha[idx] for idx in involved_idx]), dim=0).data for i in range(2): for j in range(2): origin_i = involved_idx[i] origin_j = involved_idx[j] self.alpha.grad.data[origin_i] += \ binary_grads[origin_j] * probs_slice[j] * \ (delta_ij(i, j) - probs_slice[i]) for _i, idx in enumerate(self.active_index): self.active_index[_i] = (idx, self.alpha.data[idx].item()) for _i, idx in enumerate(self.inactive_index): self.inactive_index[_i] = (idx, self.alpha.data[idx].item()) return def rescale_updated_arch_param(self): if not isinstance(self.active_index[0], tuple): assert self.active_op.is_zero_layer() return involved_idx = [ idx for idx, _ in (self.active_index + self.inactive_index) ] old_alphas = [ alpha for _, alpha in (self.active_index + self.inactive_index) ] new_alphas = [self.alpha.data[idx] for idx in involved_idx] offset = math.log( sum([math.exp(alpha) for alpha in new_alphas]) / sum([math.exp(alpha) for alpha in old_alphas])) for idx in involved_idx: self.alpha.data[idx] -= offset @property def module_str(self): chosen_index, probs = self.chosen_index return 'MixedOp(%s, %.3f)' % (self.candidate[chosen_index], probs) def forward(self, x): output = 0 # Only 2 of N op weights input, and only activate one op for _i in self.active_index: oi = self._ops[_i](x) output = output + self.path_gate[_i] * oi for _i in self.inactive_index: oi = self._ops[_i](x) output = output + self.path_gate[_i] * oi.detach() return output
class ONN(nn.Module): def __init__(self, features_size, max_num_hidden_layers, qtd_neuron_per_hidden_layer, n_classes, loss_fun, batch_size=1, b=0.99, n=0.001, s=0.2, use_cuda=False): super(ONN, self).__init__() if torch.cuda.is_available() and use_cuda: print("Using CUDA :]") self.device = torch.device( "cuda:0" if torch.cuda.is_available() and use_cuda else "cpu") self.features_size = features_size self.max_num_hidden_layers = max_num_hidden_layers self.qtd_neuron_per_hidden_layer = qtd_neuron_per_hidden_layer self.n_classes = n_classes self.batch_size = batch_size self.b = Parameter(torch.tensor(b), requires_grad=False).to(self.device) self.n = Parameter(torch.tensor(n), requires_grad=False).to(self.device) self.s = Parameter(torch.tensor(s), requires_grad=False).to(self.device) self.loss_fun = loss_fun self.t = 0 self.hidden_layers = [] self.output_layers = [] self.hidden_layers.append( nn.Linear(features_size, qtd_neuron_per_hidden_layer)) for i in range(max_num_hidden_layers - 1): self.hidden_layers.append( nn.Linear(qtd_neuron_per_hidden_layer, qtd_neuron_per_hidden_layer)) for i in range(max_num_hidden_layers): self.output_layers.append( nn.Linear(qtd_neuron_per_hidden_layer, n_classes)) self.hidden_layers = nn.ModuleList(self.hidden_layers).to(self.device) self.output_layers = nn.ModuleList(self.output_layers).to(self.device) self.alpha = Parameter(torch.Tensor(self.max_num_hidden_layers).fill_( 1 / (self.max_num_hidden_layers + 1)), requires_grad=False).to(self.device) self.loss_array = [] def monitor_updates(self, W, alpha, dW): W = W.cpu().numpy() dW = dW.cpu().numpy() alpha = alpha.cpu().numpy() n = self.n.cpu().numpy() param_scale = np.linalg.norm(W.ravel()) update = n * alpha * dW # simple SGD update update_scale = np.linalg.norm(update.ravel()) W += update self.t += 1 update_ratio = update_scale / param_scale if update_ratio > 1e-3: print('%d frame : update_ratio : %.5f ' % (self.t, update_ratio)) def zero_grad(self): for i in range(self.max_num_hidden_layers): self.output_layers[i].weight.grad.data.fill_(0) self.output_layers[i].bias.grad.data.fill_(0) self.hidden_layers[i].weight.grad.data.fill_(0) self.hidden_layers[i].bias.grad.data.fill_(0) def update_weights(self, X, Y, show_loss): if self.loss_fun == 'mse': Y = torch.from_numpy(Y).to(self.device) predictions_per_layer = self.forward(X) losses_per_layer = [] for out in predictions_per_layer: # print ('out = ', out) if self.loss_fun == 'cel': criterion = nn.CrossEntropyLoss().to(self.device) loss = criterion(out.view(self.batch_size, self.n_classes), Y.view(self.batch_size).long()) if self.loss_fun == 'mse': criterion = nn.MSELoss().to(self.device) loss = criterion( out.view(self.batch_size, self.n_classes), Y.view(self.batch_size, self.n_classes).float()) losses_per_layer.append(loss) w = [None] * len(losses_per_layer) b = [None] * len(losses_per_layer) with torch.no_grad(): for i in range(len(losses_per_layer)): losses_per_layer[i].backward(retain_graph=True) self.output_layers[i].weight.data -= self.n * \ self.alpha[i] * self.output_layers[i].weight.grad.data self.output_layers[i].bias.data -= self.n * \ self.alpha[i] * self.output_layers[i].bias.grad.data # self.monitor_updates(self.output_layers[i].weight.data,self.alpha[i],self.output_layers[i].weight.grad.data) for j in range(i + 1): if w[j] is None: w[j] = self.alpha[i] * self.hidden_layers[ j].weight.grad.data b[j] = self.alpha[i] * self.hidden_layers[ j].bias.grad.data else: w[j] += self.alpha[i] * self.hidden_layers[ j].weight.grad.data b[j] += self.alpha[i] * self.hidden_layers[ j].bias.grad.data self.zero_grad() for i in range(len(losses_per_layer)): self.hidden_layers[i].weight.data -= self.n * w[i] self.hidden_layers[i].bias.data -= self.n * b[i] for i in range(len(losses_per_layer)): self.alpha[i] *= torch.pow(self.b, losses_per_layer[i]) self.alpha[i] = torch.max(self.alpha[i], self.s / self.max_num_hidden_layers) z_t = torch.sum(self.alpha) self.alpha = Parameter(self.alpha / z_t, requires_grad=False).to(self.device) if show_loss: real_output = torch.sum( torch.mul( self.alpha.view(self.max_num_hidden_layers, 1).repeat( 1, self.batch_size).view(self.max_num_hidden_layers, self.batch_size, 1), predictions_per_layer), 0) if self.loss_fun == 'cel': criterion = nn.CrossEntropyLoss().to(self.device) loss = criterion(out.view(self.batch_size, self.n_classes), Y.view(self.batch_size).long()) if self.loss_fun == 'mse': criterion = nn.MSELoss().to(self.device) loss = criterion( out.view(self.batch_size, self.n_classes), Y.view(self.batch_size, self.n_classes).float()) # criterion = nn.CrossEntropyLoss().to(self.device) # loss = criterion(real_output.view(self.batch_size, self.n_classes), Y.view(self.batch_size).long()) self.loss_array.append(loss) if (len(self.loss_array) % 1000) == 0: print( "WARNING: Set 'show_loss' to 'False' when not debugging. " "It will deteriorate the fitting performance.") loss = torch.Tensor(self.loss_array).mean().cpu().numpy() print("Alpha:" + str(self.alpha.data.cpu().numpy())) print("Training Loss: " + str(loss)) self.loss_array.clear() def forward(self, X): hidden_connections = [] X = torch.from_numpy(X).float().to(self.device) x = F.relu(self.hidden_layers[0](X)) hidden_connections.append(x) for i in range(1, self.max_num_hidden_layers): hidden_connections.append( F.relu(self.hidden_layers[i](hidden_connections[i - 1]))) output_class = [] for i in range(self.max_num_hidden_layers): output_class.append(self.output_layers[i](hidden_connections[i])) pred_per_layer = torch.stack(output_class) # print('pred_per_layer : ', pred_per_layer) return pred_per_layer def validate_input_X(self, data): if len(data.shape) != 2: raise Exception( "Wrong dimension for this X data. It should have only two dimensions." ) def validate_input_Y(self, data): if len(data.shape) != 2: raise Exception( "Wrong dimension for this Y data. It should have only one dimensions." ) def partial_fit_(self, X_data, Y_data, show_loss=True): self.validate_input_X(X_data) self.validate_input_Y(Y_data) self.update_weights(X_data, Y_data, show_loss) def partial_fit(self, X_data, Y_data, show_loss=False): self.partial_fit_(X_data, Y_data, show_loss) def predict_(self, X_data): self.validate_input_X(X_data) if self.loss_fun == 'cel': return torch.argmax(torch.sum( torch.mul( self.alpha.view(self.max_num_hidden_layers, 1).repeat( 1, len(X_data)).view(self.max_num_hidden_layers, len(X_data), 1), self.forward(X_data)), 0), dim=1).cpu().numpy() if self.loss_fun == 'mse': return torch.sum( torch.mul( self.alpha.view(self.max_num_hidden_layers, 1).repeat( 1, len(X_data)).view(self.max_num_hidden_layers, len(X_data), 1), self.forward(X_data)), 0).cpu().detach().numpy() def predict(self, X_data): pred = self.predict_(X_data) return pred def export_params_to_json(self): state_dict = self.state_dict() params_gp = {} for key, tensor in state_dict.items(): params_gp[key] = tensor.cpu().numpy().tolist() return json.dumps(params_gp) def load_params_from_json(self, json_data): params = json.loads(json_data) o_dict = collections.OrderedDict() for key, tensor in params.items(): o_dict[key] = torch.tensor(tensor).to(self.device) self.load_state_dict(o_dict)
class Gumbel_Generator_Old(nn.Module): def __init__(self, sz=10, temp=10, temp_drop_frac=0.9999): super(Gumbel_Generator_Old, self).__init__() # 将类Gumbe_Generator_Old 对象转换为类 nn.Module 的对象 self.sz = sz # 将一个不可训练的类型Tensor 转换成可以训练的类型Parameter 经过这个类型转换这个self,***就成了模型的中一部分 # 成为了模型中根据训练可以改动的参数了 self.gen_matrix = Parameter(torch.rand( sz, sz, 2)) # torch.rand()返回一个张量,包含了从区间(0,1)随机抽取的一组随机数 self.new_matrix = Parameter(torch.zeros(5, 5, 2)) # gen_matrix 为邻接矩阵的概率 self.temperature = temp self.temp_drop_frac = temp_drop_frac def symmetry(self): matrix = self.gen_matrix.permute(2, 0, 1) temp_matrix = torch.triu(matrix, 1) + torch.triu(matrix, 1).permute( 0, 2, 1) self.gen_matrix.data = temp_matrix.permute(1, 2, 0) def drop_temp(self): # 降温过程 self.temperature = self.temperature * self.temp_drop_frac def sample_all(self, hard=False, epoch=1): # 采样——得到一个邻接矩阵 # self.symmetry() self.logp = self.gen_matrix.view( -1, 2) # view先变成一维的tensor,再按照参数转换成对应维度的tensor out = gumbel_softmax(self.logp, self.temperature, hard) if hard: hh = torch.zeros(self.gen_matrix.size()[0]**2, 2) for i in range(out.size()[0]): hh[i, out[i]] = 1 out = hh if use_cuda: out = out.cuda() out_matrix = out[:, 0].view(self.gen_matrix.size()[0], self.gen_matrix.size()[0]) return out_matrix def sample_small(self, list, hard=False): indices = np.ix_(list, list) self.logp = self.gen_matrix[indices].view( -1, 2) # view先变成一维的tensor,再按照参数转换成对应维度的tensor out = gumbel_softmax(self.logp, self.temperature, hard) # hard 干什么用的 if hard: hh = torch.zeros(self.gen_matrix[indices].size()[0]**2, 2) for i in range(out.size()[0]): hh[i, out[i]] = 1 out = hh if use_cuda: out = out.cuda() out_matrix = out[:, 0].view(len(list), len(list)) return out_matrix def sample_adj_ij(self, list, j, hard=False, sample_time=1): # self.logp = self.gen_matrix[:,i] self.logp = self.gen_matrix[list, j] out = gumbel_softmax(self.logp, self.temperature, hard=hard) if use_cuda: out = out.cuda() # print(out) if hard: out_matrix = out.float() else: out_matrix = out[:, 0] return out_matrix def sample_adj_i(self, i, hard=False, sample_time=1): # self.symmetry() self.logp = self.gen_matrix[:, i] out = gumbel_softmax(self.logp, self.temperature, hard=hard) if use_cuda: out = out.cuda() # print(out) if hard: out_matrix = out.float() else: out_matrix = out[:, 0] return out_matrix def get_temperature(self): return self.temperature def get_cross_entropy(self, obj_matrix): # 计算与目标矩阵的距离 logps = F.softmax(self.gen_matrix, 2) logps = torch.log(logps[:, :, 0] + 1e-10) * obj_matrix + torch.log( logps[:, :, 1] + 1e-10) * (1 - obj_matrix) result = -torch.sum(logps) result = result.cpu() if use_cuda else result return result.data.numpy() def get_entropy(self): logps = F.softmax(self.gen_matrix, 2) result = torch.mean(torch.sum(logps * torch.log(logps + 1e-10), 1)) result = result.cpu() if use_cuda else result return (-result.data.numpy()) def randomization(self, fraction): # 将gen_matrix重新随机初始化,fraction为重置比特的比例 sz = self.gen_matrix.size()[0] numbers = int(fraction * sz * sz) original = self.gen_matrix.cpu().data.numpy() for i in range(numbers): ii = np.random.choice(range(sz), (2, 1)) z = torch.rand(2).cuda() if use_cuda else torch.rand(2) self.gen_matrix.data[ii[0], ii[1], :] = z def init(self, mean, var): init.normal_(self.gen_matrix, mean=mean, std=var)