def forward(self, g, h, h_en): """Forward computation """ with g.local_scope(): h_src, h_dst = expand_as_pair(h) h_src_en, h_dst_en = expand_as_pair(h_en) g.srcdata['x'] = h_src g.dstdata['x'] = h_dst g.srcdata['en'] = h_src_en g.dstdata['en'] = h_dst_en if not self.batch_norm: #g.update_all(self.message, fn.mean('e', 'x')) g.apply_edges(self.message) g.update_all(fn.copy_e('e', 'e'), fn.max('e', 'x')) g.update_all(fn.copy_e('e_en', 'e_en'), fn.mean('e_en', 'en')) else: g.apply_edges(self.message) g.edata['e'] = self.bn(g.edata['e']) g.update_all(fn.copy_e('e', 'e'), fn.max('e', 'x')) g.update_all(fn.copy_e('e_en', 'e_en'), fn.mean('e_en', 'en')) return g.dstdata['x'], g.dstdata['en'] #+ h_en
def forward(self, g, feat): """ :param g: DGLGraph 二分图(只包含一种关系) :param feat: tensor(N_src, d_in) or (tensor(N_src, d_in), tensor(N_dst, d_in)) 输入特征 :return: tensor(N_dst, K*d_out) 该关系关于目标顶点的表示 """ with g.local_scope(): feat_src, feat_dst = expand_as_pair(feat, g) feat_src = self.fc_src(self.feat_drop(feat_src)).view(-1, self.num_heads, self.out_dim) feat_dst = self.fc_dst(self.feat_drop(feat_dst)).view(-1, self.num_heads, self.out_dim) # a^T (z_u || z_v) = (a_l^T || a_r^T) (z_u || z_v) = a_l^T z_u + a_r^T z_v = el + er el = (feat_src * self.attn_src[:, :self.out_dim]).sum(dim=-1, keepdim=True) # (N_src, K, 1) er = (feat_dst * self.attn_src[:, self.out_dim:]).sum(dim=-1, keepdim=True) # (N_dst, K, 1) g.srcdata.update({'ft': feat_src, 'el': el}) g.dstdata['er'] = er g.apply_edges(fn.u_add_v('el', 'er', 'e')) e = self.leaky_relu(g.edata.pop('e')) g.edata['a'] = edge_softmax(g, e) # (E, K, 1) # 消息传递 g.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) ret = g.dstdata['ft'].view(-1, self.num_heads * self.out_dim) if self.activation: ret = self.activation(ret) return ret
def __init__(self, in_feats, out_feats, aggregator_type, feat_drop=0., bias=True, norm=None, activation=None): super(SAGEConvMLP, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.feat_drop = nn.Dropout(feat_drop) self.activation = activation # aggregator type: mean/pool/lstm/gcn if aggregator_type == 'pool': self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats) if aggregator_type == 'lstm': self.lstm = nn.LSTM(self._in_src_feats, self._in_src_feats, batch_first=True) if aggregator_type != 'gcn': self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=bias) self.reset_parameters()
def __init__(self, in_feats: int, attn_feats: int, out_feats: int, num_heads: int, feat_drop: float = 0., attn_drop: float = 0., negative_slope: float = 0.2, allow_zero_in_degree: bool = False, is_increasing: bool = True, reparam_method: str = 'ReLU'): super(GATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self.fc = ReparameterizedLinear(in_features=self._in_src_feats, out_features=out_feats * num_heads, bias=False, is_increasing=is_increasing, reparam_method=reparam_method) self.attn_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) self.fc_attn = nn.Linear(in_features=attn_feats, out_features=out_feats * num_heads, bias=False) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) self.reset_parameters()
def forward(self, graph, feat): """Compute Graph Isomorphism Network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. If ``apply_func`` is not None, :math:`D_{in}` should fit the input dimensionality requirement of ``apply_func``. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output dimensionality of ``apply_func``. If ``apply_func`` is None, :math:`D_{out}` should be the same as input dimensionality. """ graph = graph.local_var() feat_src, feat_dst = expand_as_pair(feat) graph.srcdata['h'] = feat_src graph.send_and_recv(graph.edges(), fn.copy_u('h', 'm'), self._reducer('m', 'neigh')) rst = (1 + self.eps) * feat_dst + graph.dstdata['neigh'] if self.apply_func is not None: rst = self.apply_func(rst) return rst
def forward(self, g, feat): """ :param g: DGLGraph 二分图(只包含一种关系) :param feat: tensor(N_src, d_in) or (tensor(N_src, d_in), tensor(N_dst, d_in)) 输入特征 :return: tensor(N_dst, d_out) 目标顶点该关于关系的表示 """ with g.local_scope(): feat_src, feat_dst = expand_as_pair(feat, g) # (N_src, d_in) -> (N_src, d_out) -> (N_src, K, d_out/K) k = self.k_linear(feat_src).view(-1, self.num_heads, self.d_k) v = self.v_linear(feat_src).view(-1, self.num_heads, self.d_k) q = self.q_linear(feat_dst).view(-1, self.num_heads, self.d_k) # k[:, h] @= w_att[h] => k[n, h, j] = ∑(i) k[n, h, i] * w_att[h, i, j] k = torch.einsum('nhi,hij->nhj', k, self.w_att) v = torch.einsum('nhi,hij->nhj', v, self.w_msg) g.srcdata.update({'k': k, 'v': v}) g.dstdata['q'] = q g.apply_edges(fn.v_dot_u('q', 'k', 't')) # g.edata['t']: (E, K, 1) attn = g.edata.pop('t').squeeze(dim=-1) * self.mu / math.sqrt( self.d_k) attn = edge_softmax(g, attn) # (E, K) self.attn = attn.detach() g.edata['t'] = attn.unsqueeze(dim=-1) # (E, K, 1) g.update_all(fn.u_mul_e('v', 't', 'm'), fn.sum('m', 'h')) out = g.dstdata['h'].view(-1, self.out_dim) # (N_dst, d_out) return out
def forward(self, graph, feat, edge_weight=None): with graph.local_scope(): aggregate_fn = fn.copy_src('h', 'm') if edge_weight is not None: #assert edge_weight.shape[0] == graph.number_of_edges() graph.edata['_edge_weight'] = edge_weight aggregate_fn = fn.u_sub_e('h', '_edge_weight', 'm') feat_src, feat_dst = expand_as_pair(feat, graph) # aggregate first then mult W graph.srcdata['h'] = feat_src graph.apply_edges( lambda edges: {'h': edges.data['_edge_weight'] * 2}) graph.update_all(aggregate_fn, fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] rst = th.matmul(feat_src, self.weight1) + th.matmul( rst, self.weight2) if self.activation is not None: rst = self.activation(rst) return rst
def forward(self, graph, feat, weight=None, alpha=None, gene_num=None): self.alpha = alpha self.gene_num = gene_num with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError('There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) # print(f"feat_src : {feat_src.shape}, feat_dst {feat_dst.shape}") if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError('External weight is provided while at the same time the' ' module has defined its own weight parameter. Please' ' create the module with flag weight=False.') else: weight = self.weight if weight is not None: feat_src = torch.matmul(feat_src, weight) graph.srcdata['h'] = feat_src graph.update_all(self.edge_selection_simple, fn.sum(msg='m', out='h')) rst = graph.dstdata['h'] if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = torch.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = torch.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst
def __init__( self, edge_feats, num_etypes, in_feats, out_feats, num_heads, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, bias=False, alpha=0.0, ): super(myGATConv, self).__init__() self._edge_feats = edge_feats self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self.edge_emb = nn.Embedding(num_etypes, edge_feats) if isinstance(in_feats, tuple): self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_e = nn.Linear(edge_feats, edge_feats * num_heads, bias=False) self.attn_l = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.attn_e = nn.Parameter( th.FloatTensor(size=(1, num_heads, edge_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer("res_fc", None) self.reset_parameters() self.activation = activation self.bias = bias if bias: self.bias_param = nn.Parameter(th.zeros((1, num_heads, out_feats))) self.alpha = alpha
def __init__(self, in_feats, out_feats): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=False) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats) self.reset_parameters()
def forward(self, graph, feat, weight=None, edge_weight=None): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( 'There are 0-in-degree nodes in the graph, ' 'output for those nodes will be invalid. ' 'This is harmful for some applications, ' 'causing silent performance regression. ' 'Adding self-loop on the input graph by ' 'calling `g = dgl.add_self_loop(g)` will resolve ' 'the issue. Setting ``allow_zero_in_degree`` ' 'to be `True` when constructing this module will ' 'suppress the check and let the code run.') #aggregate_fn = fn.copy_src('h', 'm') if edge_weight is not None: assert edge_weight.shape[0] == graph.number_of_edges() graph.edata['_edge_weight'] = edge_weight aggregate_fn = fn.u_mul_e('h', '_edge_weight', 'm') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5) shp = norm.shape + (1, ) * (feat_src.dim() - 1) norm = th.reshape(norm, shp) feat_src = feat_src * norm feat_sumsrc = th.matmul(feat_src, self.w1) feat_prodsrc = (th.matmul( th.cat( (feat_src, th.ones([feat_src.shape[0], 1]).to('cuda:0')), 1), self.w2)) graph.srcdata['h_sum'] = feat_sumsrc graph.srcdata['h_prod'] = feat_prodsrc graph.update_all(fn.copy_src('h_sum', 'm_sum'), self._elementwise_sum) graph.update_all(fn.copy_src('h_prod', 'm_prod'), self._elementwise_product) rst = graph.dstdata['h_sum'] + th.matmul(graph.dstdata['h_prod'], self.v) if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1, ) * (feat_dst.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self._activation is not None: rst = self._activation(rst) return rst
def __init__(self, in_feats, out_feats, num_heads, num_nodes, layerid, feat_drop=0., attn_drop=0., negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, fix=False): super(GATConvs, self).__init__() self._num_heads = num_heads self.num_nodes = num_nodes self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self.layerid = layerid if isinstance(in_feats, tuple): self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc1 = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc2 = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc3 = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.attn_l = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer('res_fc', None) self.reset_parameters() self.activation = activation self.fix = fix
def __init__(self, in_feats, out_feats, K=3, num_heads=1, feat_drop=0.0, edge_drop=0.0, attn_drop=0.0, negative_slope=0.2, use_attn_dst=True, residual=False, activation=None, allow_zero_in_degree=False, norm='sym'): super(GATHAConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._K = K self._norm = norm self.attn_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) if use_attn_dst: self.attn_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) else: self.register_buffer("attn_r", None) self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.position_emb = nn.Parameter( torch.FloatTensor(size=(K + 1, num_heads, out_feats))) self.hop_attn_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) self.hop_attn_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) self.hop_attn_bias_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, 1))) self.hop_attn_bias_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, 1))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.register_buffer("res_fc", None) self.reset_parameters() self._activation = activation
def __init__( self, in_feats, # out_feats, aggregator_type): super(SAGEConvAggLoc, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._aggre_type = aggregator_type
def __init__(self, in_feats, out_feats, num_heads, basis, attn_basis, basis_coef, feat_drop=0., attn_drop=0., negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, bias=True, use_checkpoint=False): super(GATConv, self).__init__() self._basis = basis self._basis_coef = basis_coef self._attn_basis = attn_basis self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree # if isinstance(in_feats, tuple): # self.fc_src = nn.Linear( # self._in_src_feats, out_feats * num_heads, bias=False) # self.fc_dst = nn.Linear( # self._in_dst_feats, out_feats * num_heads, bias=False) # else: # self.fc = nn.Linear( # self._in_src_feats, out_feats * num_heads, bias=False) # self.attn_l = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) # self.attn_r = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if bias: self.bias = nn.Parameter(th.FloatTensor(size=(num_heads * out_feats,))) else: self.register_buffer('bias', None) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear( self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer('res_fc', None) self.reset_parameters() self.activation = activation self.dummy_tensor = th.ones(1, dtype=th.float32, requires_grad=True) self.use_checkpoint = use_checkpoint
def __init__( self, in_feats, out_feats, num_heads=1, feat_drop=0.0, attn_drop=0.0, edge_drop=0.0, negative_slope=0.2, use_attn_dst=True, residual=False, activation=None, allow_zero_in_degree=False, use_symmetric_norm=False, ): super(GATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._use_symmetric_norm = use_symmetric_norm if isinstance(in_feats, tuple): self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.attn_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) if use_attn_dst: self.attn_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) else: self.register_buffer("attn_r", None) self.feat_drop = nn.Dropout(feat_drop) assert feat_drop == 0.0 # not implemented self.attn_drop = nn.Dropout(attn_drop) assert attn_drop == 0.0 # not implemented self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.register_buffer("res_fc", None) self.reset_parameters() self._activation = activation
def forward(self, graph, feat, weight=None, edge_weight=None): with graph.local_scope(): aggregate_fn = fn.copy_src('h', 'm') if edge_weight is not None: assert edge_weight.shape[0] == graph.number_of_edges() graph.edata['_edge_weight'] = edge_weight aggregate_fn = fn.u_mul_e('h', '_edge_weight', 'm') # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = torch.tanh(torch.matmul(feat_src, weight)) graph.srcdata['h'] = feat_src graph.update_all(aggregate_fn, self._elementwise_product) rst = graph.dstdata['h'] else: # aggregate first then mult W graph.srcdata['h'] = feat_src graph.update_all(aggregate_fn, self._elementwise_product) rst = graph.dstdata['h'] if weight is not None: rst = torch.matmul(rst, weight) if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = torch.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = torch.reshape(norm, shp) rst = rst * norm if not self._out: rst = torch.matmul(rst, self.weight2) if self._activation is not None: rst = self._activation(rst) return rst
def __init__( self, node_feats, edge_feats, out_feats, n_heads=1, attn_drop=0.0, edge_drop=0.0, negative_slope=0.2, residual=True, activation=None, use_attn_dst=True, allow_zero_in_degree=True, use_symmetric_norm=False, ): super(GATConv, self).__init__() self._n_heads = n_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(node_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._use_symmetric_norm = use_symmetric_norm # feat fc self.src_fc = nn.Linear(self._in_src_feats, out_feats * n_heads, bias=False) if residual: self.dst_fc = nn.Linear(self._in_src_feats, out_feats * n_heads) self.bias = None else: self.dst_fc = None self.bias = nn.Parameter(out_feats * n_heads) # attn fc self.attn_src_fc = nn.Linear(self._in_src_feats, n_heads, bias=False) if use_attn_dst: self.attn_dst_fc = nn.Linear(self._in_src_feats, n_heads, bias=False) else: self.attn_dst_fc = None if edge_feats > 0: self.attn_edge_fc = nn.Linear(edge_feats, n_heads, bias=False) else: self.attn_edge_fc = None self.attn_drop = nn.Dropout(attn_drop) self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope, inplace=True) self.activation = activation self.reset_parameters()
def __init__(self, in_feats, out_feats, num_heads, n_nodes, local_n_nodes, apply_gather=False, no_remote=True, feat_drop=0., attn_drop=0., negative_slope=0.2, residual=False, activation=None): super(GATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._n_nodes = n_nodes self._local_n_nodes = local_n_nodes self._no_remote = no_remote self._apply_gather = apply_gather if isinstance(in_feats, tuple): assert False # FIXME self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.attn_l = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer('res_fc', None) self.reset_parameters() self.activation = activation
def __init__( self, in_feats, out_feats, num_heads=1, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, norm="none", ): super(GATConv, self).__init__() if norm not in ("none", "both"): raise DGLError('Invalid norm value. Must be either "none", "both".' ' But got "{}".'.format(norm)) self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._norm = norm if isinstance(in_feats, tuple): self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.attn_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer("res_fc", None) self.reset_parameters() self._activation = activation
def forward(self, graph, feat): r"""Compute Graph Isomorphism Network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. If ``apply_func`` is not None, :math:`D_{in}` should fit the input dimensionality requirement of ``apply_func``. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output dimensionality of ``apply_func``. If ``apply_func`` is None, :math:`D_{out}` should be the same as input dimensionality. """ graph = graph.local_var() dgl_context = dgl.utils.to_dgl_context(feat.device) g = graph._graph.get_immutable_gidx(dgl_context) feat_src, feat_dst = expand_as_pair(feat) with self.cm.zoomIn(namespace=[self, torch], graph=g, node_feats={ 'fsrc': feat_src, 'fdst': feat_dst }) as v: if self.aggregator_type == 'sum': rst = sum([nb.fsrc for nb in v.innbs]) elif self.aggregator_type == 'mean': rst = self.cm.mean([nb.fsrc for nb in v.innbs]) elif self.aggregator_type == 'max': rst = self.cm.max([nb.fsrc for nb in v.innbs]) else: raise NotImplementedError('Cannot find aggregator typoe', self.aggregator_type) # Temp workaround for rst = (1 + self.eps) * v.fdst + rst rst = v.fdst + self.eps * v.fdst + rst self.cm.collect_output(rst) rst = self.cm.zoomOut() if self.apply_func is not None: rst = self.apply_func(rst) return rst
def __init__(self, in_feats, out_feats, num_heads, feat_drop=0., attn_drop=0., negative_slope=0.2, residual=True, allow_zero_in_degree=False): super(GATConvFF, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree if isinstance(in_feats, tuple): self.fc_src = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear(self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc = nn.Linear(self._in_src_feats, out_feats * num_heads, bias=False) self.attn_l = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear(self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer('res_fc', None) self.reset_parameters() self.layer_norm = th.nn.LayerNorm(out_feats * num_heads) self.ff_layer_norm = th.nn.LayerNorm(out_feats * num_heads) self.activation = PositionwiseFeedForward( model_dim=out_feats * num_heads, d_hidden=4 * out_feats * num_heads)
def __init__(self, in_feats, out_feats, aggr, feat_drop=0., activation=None): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggr = aggr self.feat_drop = nn.Dropout(feat_drop) self.activation = activation self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=False) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats) self.reset_parameters()
def __init__(self, in_feats, out_feats, edata_channels): super(EGATLayer, self).__init__() self._edata_channels = edata_channels self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self.fc = nn.Linear(self._in_src_feats, out_feats * edata_channels, bias=False) self.edge_fc = nn.Linear(self._edata_channels, self._edata_channels, bias=False) self.nfeat_with_e_fc = nn.Linear(out_feats+1, out_feats , bias=False) self.attn_l = nn.Parameter(th.FloatTensor(size=(1, edata_channels, out_feats))) self.attn_r = nn.Parameter(th.FloatTensor(size=(1, edata_channels, out_feats))) self.leaky_relu = nn.LeakyReLU(0.2) self.reset_parameters()
def forward(self, graph, feat, edge_weight): with graph.local_scope(): feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata['h'] = feat_src graph.srcdata['norm_h'] = F.normalize(feat_src, p=2, dim=-1) if isinstance(feat, tuple) or graph.is_block: graph.dstdata['norm_h'] = F.normalize(feat_dst, p=2, dim=-1) e = self.beta * edge_weight graph.edata['p'] = edge_softmax(graph, e, norm_by='src') graph.update_all(fn.u_mul_e('norm_h', 'p', 'm'), fn.sum('m', 'h')) rst = graph.dstdata.pop('h') rst = (1 + self.eps) * feat + rst return rst
def forward(self, G: DGLBlock, feat): feat_src, feat_dst = expand_as_pair(input_=feat, g=G) # print(G) with G.local_scope(): funcs = {} for srctype, etype, dsttype in G.canonical_etypes: k_linear = self.k_linears[self.node_dict[srctype]] v_linear = self.v_linears[self.node_dict[srctype]] q_linear = self.q_linears[self.node_dict[dsttype]] G.srcnodes[srctype].data['k'] = k_linear(feat_src[srctype]).view(-1, self.n_heads, self.d_k) G.srcnodes[srctype].data['v'] = v_linear(feat_src[srctype]).view(-1, self.n_heads, self.d_k) G.dstnodes[dsttype].data['q'] = q_linear(feat_dst[dsttype]).view(-1, self.n_heads, self.d_k) G.apply_edges(func=self.edge_attention, etype=etype) if G.batch_num_edges(etype=etype).item() > 0: funcs[etype] = (self.message_func, self.reduce_func) # print("funcs", funcs.keys()) G.multi_update_all(funcs, cross_reducer='mean') new_h = {} for ntype in G.ntypes: ''' Step 3: Target-specific Aggregation x = norm( W[node_type] * gelu( Agg(x) ) + x ) ''' nty_id = self.node_dict[ntype] alpha = torch.sigmoid(self.skip[nty_id]) # print(ntype, G.srcnodes[ntype].data.keys(), G.dstnodes[ntype].data.keys()) if "t" in G.dstnodes[ntype].data: trans_out = self.dropout(self.a_linears[nty_id].forward(G.dstnodes[ntype].data['t'])) else: trans_out = self.dropout(feat_dst[ntype]) trans_out = trans_out * alpha + feat_dst[ntype] * (1 - alpha) if self.use_norm: new_h[ntype] = self.norms[nty_id](trans_out) else: new_h[ntype] = trans_out return new_h
def __init__(self, in_feats, out_feats, gnn_mlps, bias=True): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats # din attention self.atten_src = nn.utils.weight_norm(nn.Linear(self._in_src_feats, out_feats), name='weight') self.atten_dst = nn.utils.weight_norm(nn.Linear(self._in_src_feats, out_feats), name='weight') self.atten_sub = nn.utils.weight_norm(nn.Linear(self._in_src_feats, out_feats), name='weight') self.atten_mul = nn.utils.weight_norm(nn.Linear(self._in_src_feats, out_feats), name='weight') self.atten_out = nn.utils.weight_norm(nn.Linear(out_feats, 1), name='weight') self.leaky_relu = nn.LeakyReLU(0.2) # other self.fc_pool = nn.utils.weight_norm(nn.Linear(self._in_src_feats, self._in_src_feats), name='weight') self.fc_pool2 = nn.utils.weight_norm(nn.Linear(self._in_src_feats, self._in_src_feats), name='weight') self.fc_self = nn.utils.weight_norm(nn.Linear(self._in_dst_feats, out_feats, bias=bias), name='weight') self.fc_neigh = nn.utils.weight_norm(nn.Linear(self._in_src_feats, out_feats, bias=bias), name='weight') self.fc_neigh2 = nn.utils.weight_norm(nn.Linear(self._in_src_feats, out_feats, bias=bias), name='weight') # mlps self.out_mlp = nn.ModuleList() for i in range(gnn_mlps): self.out_mlp.append(nn.utils.weight_norm(nn.Linear(out_feats, out_feats), name='weight')) self.reset_parameters()
def __init__(self, in_feats, out_feats, aggregator_type, feat_drop=0., bias=True, norm=None, activation=None): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.feat_drop = nn.Dropout(feat_drop) self.activation = activation # aggregator type: mean/pool/lstm/gcn if self._aggre_type == 'cheb': # default using activation TODO self._cheb_k = 2 # it should be consistent with the sampling since every block is a bipartitte graph # in spectral method there's no fc_neigh or fc_self self._cheb_linear = nn.Linear(self._cheb_k * self._in_src_feats, out_feats) if aggregator_type == 'ginmean': self._gin_reducer = fn.mean self.fc_gin = nn.Linear( self._in_src_feats, out_feats) # default apply_func is nn.linear if aggregator_type == 'pool': self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats) if aggregator_type == 'lstm': self.lstm = nn.LSTM(self._in_src_feats, self._in_src_feats, batch_first=True) if aggregator_type != 'gcn' and aggregator_type != 'ginmean' and aggregator_type != 'cheb': self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias) if aggregator_type != 'ginmean' and aggregator_type != 'cheb': self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=bias) self.reset_parameters()
def __init__(self, in_feats, out_feats, num_heads, feat_drop=0., attn_drop=0., negative_slope=0.2, residual=False, activation=None): super(Conv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self.cache_atte = None if isinstance(in_feats, tuple): self.fc_src = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False) self.fc_dst = nn.Linear( self._in_dst_feats, out_feats * num_heads, bias=False) else: self.fc = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False) self.attn_l = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) self.attn_r = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear( self._in_dst_feats, num_heads * out_feats, bias=False) else: self.res_fc = Identity() else: self.register_buffer('res_fc', None) self.reset_parameters() self.activation = activation
def forward(self, graph, feat, weight): with graph.local_scope(): feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata['h'] = feat_src graph.dstdata['h'] = feat_dst if self._norm == 'both': degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm rel_weight_feature = weight[graph.edata['etype']] # get rel weight graph.edata['r'] = self.rel_dropout(rel_weight_feature) # gcn graph.srcdata['h'] = feat_src graph.apply_edges(fn.u_mul_e('h', 'r', 'm')) graph.update_all(fn.copy_e('m', 'm'), fn.mean(msg='m', out='mix')) rst = graph.dstdata['mix'] # self_loop self_loop_h = torch.matmul(graph.dstdata['h'], self.loop_weight) rst = self_loop_h + rst # need to check if norm is needed if self._norm != 'none': degs = graph.in_degrees().float().clamp(min=1) if self._norm == 'both': norm = torch.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = torch.reshape(norm, shp) rst = rst * norm self.ent_dropout(rst) if self.activation is not None: rst = self.activation(rst) return rst