示例#1
0
def load_node_pair(adj, file_path, n_hop=10):
    from os.path import join as pjoin
    if file_path and os.path.exists(pjoin(file_path, 'pos_node_edge.pt')):
        pos_neighbor = torch.load(pjoin(file_path, 'pos_node_edge.pt'))
        neg_neighbor = torch.load(pjoin(file_path, 'neg_node_edge.pt'))
    else:
        adj = adj.to('cuda:8')
        adj = adj > 0
        neighbor = torch.mm(adj.float(), adj.float().t()) > 0
        size = neighbor.size(1)

        pos_neighbor = torch.zeros_like(neighbor)
        sort_weight, indices = torch.sum(neighbor.float(), dim=0).sort()
        min_count, max_count = int(size * 0.1), int(size * 0.9)
        min_pos, max_pos = sort_weight[min_count], sort_weight[max_count]
        min_count = (sort_weight < max(min_pos, 2)).sum()
        max_count = (sort_weight <= max_pos).sum()
        pos_select = indices[min_count:max_count]
        pos_neighbor[:, pos_select] = neighbor[:, pos_select]

        neg_neighbor = neighbor
        for i in range(1, n_hop):
            neg_neighbor = torch.mm(neg_neighbor.float(), neighbor.float()) > 0
        neg_neighbor = ~neg_neighbor

        pos_neighbor, _ = dense_to_sparse(pos_neighbor)
        pos_neighbor = remove_self_loops(pos_neighbor)[0]
        neg_neighbor, _ = dense_to_sparse(neg_neighbor)
        pos_neighbor = pos_neighbor.cpu()
        neg_neighbor = neg_neighbor.cpu()
        if file_path:
            torch.save(pos_neighbor, pjoin(file_path, 'pos_node_edge.pt'))
            torch.save(neg_neighbor, pjoin(file_path, 'neg_node_edge.pt'))
    return pos_neighbor, neg_neighbor
示例#2
0
def load_pattern_pair(adj, file_path, n_hop=10):
    if file_path and os.path.exists(file_path + '/neg_edge.pt'):
        pos_mask = torch.load(file_path + '/pos_edge.pt')
        neg_mask = torch.load(file_path + '/neg_edge.pt')
    else:
        adj = adj.to('cuda:8')
        adj = adj > 0
        size = adj.size(1)

        pos_mask = torch.zeros_like(adj)
        sort_weight, indices = torch.sum(adj.float(), dim=0).sort()
        min_count, max_count = int(size * 0.1), int(size * 0.9)
        min_pos, max_pos = sort_weight[min_count], sort_weight[max_count]
        min_count = (sort_weight < max(min_pos, 2)).sum()
        max_count = (sort_weight <= max_pos).sum()
        pos_select = indices[min_count:max_count]
        pos_mask[:, pos_select] = adj[:, pos_select]

        neg_mask = adj
        adj = adj.float()
        for i in range(1, n_hop):
            neg_mask = torch.mm(neg_mask.float(), adj.t()) > 0
            neg_mask = torch.mm(neg_mask.float(), adj) > 0
        neg_mask = ~neg_mask

        pos_mask, _ = dense_to_sparse(pos_mask)
        neg_mask, _ = dense_to_sparse(neg_mask)
        pos_mask = pos_mask.cpu()
        neg_mask = neg_mask.cpu()
        if file_path:
            torch.save(pos_mask, file_path + '/pos_edge.pt')
            torch.save(neg_mask, file_path + '/neg_edge.pt')
    return pos_mask, neg_mask
示例#3
0
    def forward(self, data, mask):
        #  x0, edge_index0, edge_weight0 = data.x, data.edge_index, data.edge_attr


        edge_index0, _ = dropout_adj(
            data.edge_index, p=self.initial_dropout_adj, force_undirected=True,
            num_nodes=data.num_nodes, training=self.training)
        x0 = F.dropout(data.x, p=self.initial_dropout_nodes, training=self.training)

        # level 0 conv  
        x0_ = self.gcn0_in(x0, edge_index0)

        # pooled 1 
        s1 = F.relu(self.conv_pool1(x0_, edge_index0))
        x1, adj1, l1, e1 = dense_diff_pool(x0_, data.adj, s1, mask)
        x1 = torch.squeeze(x1)
        
        # get edge index level 1
        adj1_sparse_tuple = dense_to_sparse(torch.squeeze(adj1))
        edge_index1 = adj1_sparse_tuple[0]
        edge_weight1 = adj1_sparse_tuple[1]
                
        # level 1 conv
        x1_ = self.gcn1_in(x1, edge_index1, edge_weight1)
        
        # pooled 2 
        s2 = self.conv_pool2(x1_, edge_index1, edge_weight1)
        s2 = F.relu(s2)
        x2, adj2, l2, e2 = dense_diff_pool(x1_, adj1, s2)
        x2 = torch.squeeze(x2)
        
        # get edge index level 2
        adj2_sparse_tuple = dense_to_sparse(torch.squeeze(adj2))
        edge_index2 = adj2_sparse_tuple[0]
        edge_weight2 = adj2_sparse_tuple[1]
        
        # level 2 conv
        x2_out = self.gcn2_in(x2, edge_index2, edge_weight2)
        x2_out_up = torch.matmul(s2, x2_out) # unpool level 2
        
        # output level 1
        x1_out = self.gcn1_out(torch.cat((x1_, x2_out_up), 1), edge_index1, edge_weight1)
        x1_out_up = torch.matmul(s1, x1_out) # unpool level 1
        
        # output level 0 
        x0_out = self.gcn0_out(torch.cat((x0_, x1_out_up), 1), edge_index0)
    
        edge_loss = l1 + e1 +l2 + e2
        
        edges = {'e1' :{'e': edge_index1, 'w': edge_weight1},
                 'e2' :{'e': edge_index2, 'w': edge_weight2}}

        output_dict = {'prediction': F.log_softmax(x0_out, dim=1), 's01': s1,
                       'edge_loss': edge_loss, 'adj1': adj1, 'edges': edges}

        return output_dict
示例#4
0
def get_k_hop_adjacency(adj, k, file_path, bi_graph=False):
    '''计算k-hop以内(含k)的邻接节点
    '''
    file_name = os.path.join(file_path, '%d_hop_neighbor.pt' % k)
    if file_path and os.path.exists(file_name):
        output, depth = torch.load(file_name)
    else:
        if k < 2:
            output, depth = dense_to_sparse(adj.long().cpu())
            return output, depth
        adj = adj.bool()
        neighbor = adj.float()
        output = adj.long()
        k_neighbor = neighbor
        for i in range(2, k + 1):
            # find the long-tail nodes
            '''
            degrees, indices = k_neighbor.sum(dim=1).sort()
            long_tail_degree = min(2, degrees[int(0.9 * degrees.size(0))])
            long_tail_indices = indices[degrees <= long_tail_degree]
            '''
            if bi_graph:
                k_neighbor = torch.mm(k_neighbor, neighbor.t()).bool()
                k_neighbor = torch.mm(k_neighbor.float(), neighbor)
            else:
                k_neighbor = torch.mm(k_neighbor, neighbor.t())

            # only retain k-hop neighborhood for long-tail nodes
            '''
            long_tail = torch.zeros_like(k_neighbor)
            long_tail[long_tail_indices, :] = k_neighbor[long_tail_indices, :]
            '''

            # control the augmented links are less than existing links
            '''
            new_mask = long_tail.bool() & ~output.bool()
            counts = long_tail[new_mask]
            counts = counts.sort()[0]
            existing_count = output.bool().sum().long()
            add_count = min(counts.size(0), int(0.5 * existing_count.item()))
            min_count = max(1, counts[-add_count])
            '''

            # add augmented links with their depth
            k_adj = (k_neighbor > 1) & ~output.bool()
            output.masked_fill_(k_adj, i)
            k_neighbor = k_neighbor.bool().float()
        output = output.cpu()
        output, depth = dense_to_sparse(output)
        torch.save((output, depth), file_name)
    return output, depth
示例#5
0
文件: pmodels.py 项目: wonlee2019/dgm
    def forward(self, x, edge_index):
        total_loss1 = 0
        total_loss2 = 0
        edge_attr = None

        for i in range(len(self.graph_convs)):
            if i < len(self.graph_convs) - 1:
                if self.mode == 'mincut':
                    s = self.assignment_ws[2 * i + 1](F.relu(
                        self.assignment_ws[2 * i](x)))
                else:
                    s = self.pool_convs[i](x, edge_index, edge_attr)

            x = F.relu((self.graph_convs[i](x, edge_index, edge_attr) +
                        self.graph_skips[i](x)))

            if i < len(self.graph_convs) - 1:
                x, adj, loss1, loss2 = self.pooling_fn(
                    x, to_dense_adj(edge_index, x.size(0),
                                    edge_attr=edge_attr), s)
                edge_index, edge_attr = dense_to_sparse(adj.squeeze(0))
                x = x.squeeze(0)
                total_loss1 += loss1
                total_loss2 += loss2

        x_avg = torch.mean(x, dim=0).unsqueeze(0)
        out = self.classifier(x_avg)

        return out, total_loss1, total_loss2
示例#6
0
    def process(self):
        with open(os.path.join(self.raw_dir, 'abide_raw.pkl'), 'rb') as f:
            dataset = pickle.load(f)

        dataset_list = []

        sub_list = np.loadtxt(self.sub_list, dtype=str, delimiter='\n')

        for subj in sub_list:
            data = dataset[subj]
            if self.target_name is not None:
                data.y = data.y[self.target_name]
            if self.feature_mask is not None:
                data.features = [data.features[i] for i in self.feature_mask]

            edge_index, _ = dense_to_sparse(
                torch.ones(data.features[0].shape, dtype=torch.float32))
            edge_attr = []
            for feature in data.features:
                edge_attr.append(feature[edge_index[0], edge_index[1]])
            data.edge_index = edge_index
            data.edge_attr = torch.stack(edge_attr, dim=-1)
            data.features = torch.stack(data.features, dim=-1)
            data.features = torch.unsqueeze(data.features, dim=0)
            dataset_list.append(data)

        self.data, self.slices = self.collate(dataset_list)
        torch.save((self.data, self.slices), self.processed_paths[0])
        print('Processed dataset saved as', self.processed_paths[0])
示例#7
0
def get_torch_data(df, threshold=3):
    atoms = df['atom'].values

    energy = np.array([-1 * df['Energy(Ry)'].values[0]])
    atoms = np.expand_dims(atoms, axis=1)

    one_hot_encoding = OneHotEncoder(sparse=False).fit_transform(atoms)
    coords = df[['x(angstrom)', 'y(angstrom)', 'z(angstrom)']].values

    edge_index = None
    edge_attr = None

    while True:
        dist = distance.cdist(coords, coords)
        dist[dist > threshold] = 0
        dist = torch.from_numpy(dist)
        edge_index, edge_attr = data_utils.dense_to_sparse(dist)
        edge_attr = edge_attr.unsqueeze(dim=1).type(torch.FloatTensor)
        edge_index = torch.LongTensor(edge_index)
        if (data_utils.contains_isolated_nodes(edge_index, num_nodes=13)):
            threshold += 0.5
        else:
            break

    x = torch.from_numpy(one_hot_encoding).type(torch.FloatTensor)
    y = torch.from_numpy(energy).type(torch.FloatTensor)
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

    return data
示例#8
0
    def do_trans(data):
        node_num, _ = data.x.size()
        if add_self_loop:
            sl = torch.tensor([[n, n] for n in range(node_num)]).t()
            edge_index = torch.cat((data.edge_index, sl), dim=1)
        else:
            edge_index = data.edge_index.detach().clone()
        
        orig_adj = to_dense_adj(edge_index)[0]
        orig_adj = torch.where(orig_adj>1, torch.ones_like(orig_adj), orig_adj)
        d = torch.diag(torch.sum(orig_adj, 1))

        if mode == 'ppr':
            dinv = torch.inverse(torch.sqrt(d))
            at = torch.matmul(torch.matmul(dinv, orig_adj), dinv)
            diff_adj = alpha * torch.inverse((torch.eye(orig_adj.shape[0]) - (1 - alpha) * at))

        elif mode == 'heat':
            diff_adj = torch.exp(t * (torch.matmul(orig_adj, torch.inverse(d)) - 1))

        else:
            raise Exception("Must choose one diffusion instantiation mode from 'ppr' and 'heat'!")
            
        edge_ind, edge_attr = dense_to_sparse(diff_adj)

        return Data(x=data.x, edge_index=edge_ind, edge_attr=edge_attr)
示例#9
0
def load_vgrnn(dataset):
    datasets = ['fb', 'dblp', 'enron10']
    assert dataset in datasets, \
        "Dataset %s not in allowed list: %s" % (dataset, str(datasets))

    adj = os.path.join('/mnt/raid0_24TB/isaiah/code/TGCN/src/data', dataset,
                       'adj_orig_dense_list.pickle')
    with open(adj, 'rb') as f:
        fbytes = f.read()

    dense_adj_list = pickle.loads(fbytes, encoding='bytes')
    num_nodes = dense_adj_list[0].size(0)

    eis = []
    splits = []

    for adj in dense_adj_list:
        # Remove self loops
        for i in range(adj.size(0)):
            adj[i, i] = 0

        ei = dense_to_sparse(adj)[0]
        ei = to_undirected(ei)
        eis.append(ei)
        splits.append(edge_tvt_split(ei))

    data = TData(x=torch.eye(num_nodes),
                 eis=eis,
                 masks=splits,
                 num_nodes=num_nodes,
                 dynamic_feats=False,
                 T=len(eis))

    return data
示例#10
0
    def process(self):
        with open(os.path.join(self.raw_dir, 'pnc_features_raw.pkl'),
                  'rb') as f:
            labels, path_data, filelist, _, min_ts_length = pickle.load(f)

        if self.feature_mask is not None:
            if np.isscalar(self.feature_mask):
                self.feature_mask = [
                    i for i in range(len(filelist))
                    if self.feature_mask == int(filelist[i].split('_')[1])
                ]
            filelist = [filelist[i] for i in self.feature_mask]
        ts_index = [
            i for i in range(len(filelist)) if 'timeseries' in filelist[i]
        ]
        sc_index = [
            i for i in range(len(filelist)) if 'connmat' in filelist[i]
        ]

        dataset_list = []
        sub_list = np.loadtxt(self.sub_list, dtype=str, delimiter='\n')
        epsilon = 1e-5
        for subj in sub_list:
            print('processing', subj, '...')
            features = []
            for filename in filelist:
                filepath = os.path.join(path_data, subj, filename)
                if not os.path.exists(filepath):
                    raise ValueError('invalid path ' + filepath)
                matrix = np.loadtxt(filepath)
                features.append(matrix)

            data = Data(x=None, y=None)
            data.y = {'ScanAgeYears': labels[0][subj], 'Sex': labels[1][subj]}
            data.subj = int(subj.split('_')[0])
            if self.target_name is not None:
                data.y = data.y[self.target_name]
            ts = []
            for i in ts_index:
                ts.append(features[i][:min_ts_length, :])
            data.fconn = torch.tensor(
                ConnectivityMeasure(kind='correlation').fit_transform(ts),
                dtype=torch.float32)
            sc = []
            for i in sc_index:
                sc_matrix = features[i] + epsilon
                sc.append(sc_matrix / np.sum(sc_matrix, axis=0))
            data.sconn = torch.tensor(sc, dtype=torch.float32)
            data.x = data.fconn[0]
            data.edge_index, _ = dense_to_sparse(
                torch.ones(data.sconn[0].shape, dtype=torch.float32))
            data.edge_attr = data.sconn[0].clone().detach()[data.edge_index[0],
                                                            data.edge_index[1]]
            dataset_list.append(data)

        self.data, self.slices = self.collate(dataset_list)
        torch.save((self.data, self.slices), self.processed_paths[0])
        print('Processed dataset saved as', self.processed_paths[0])
示例#11
0
    def get_sample_adj(self, extend_adj, sub_node_num, sample_id):
        sample1 = extend_adj[sample_id, :]
        #print(sub_node_num)
        sample_adj_mid = torch.cat((extend_adj[:sub_node_num, :], sample1), 0)
        sample2 = sample_adj_mid[:, sample_id]
        sample_adj = torch.cat((sample_adj_mid[:, :sub_node_num], sample2), 1)
        sample_adj, sample_adj_weight = dense_to_sparse(sample_adj)

        return sample_adj, sample_adj_weight
示例#12
0
    def decode(self, z, edge_index=None):
        if edge_index is None:
            # inner product decoder
            adj = torch.relu(torch.matmul(z, z.t()))
            # take nonzero elements
            edge_index, _ = dense_to_sparse(adj)

        x, edge_index = self.decoder(z, edge_index)
        return x, edge_index
示例#13
0
    def read_Genetic(self, root):
        BioGrid = pd.read_csv(
            root +
            '/BIOGRID-ORGANISM-Escherichia_coli_K12_W3110-3.5.180.tab2.txt',
            delimiter='\t')
        BioGrid['Official Symbol Interactor A'] = BioGrid[
            'Official Symbol Interactor A'].str.lower()
        BioGrid['Official Symbol Interactor B'] = BioGrid[
            'Official Symbol Interactor B'].str.lower()
        BioGrid = BioGrid.rename(
            columns={
                "Official Symbol Interactor A": "Gene_A",
                "Official Symbol Interactor B": "Gene_B"
            })

        Ecoli = pd.read_table(root +
                              '/avg_E_coli_v4_Build_6_exps466probes4297.tab')

        Ecoli['E_coli_v4_Build_6:genes'] = Ecoli[
            'E_coli_v4_Build_6:genes'].str.split('_').str[0]
        Ecoli = Ecoli.apply(lambda x: x.astype(str).str.lower())
        Ecoli = Ecoli.rename(columns={"E_coli_v4_Build_6:genes": "Genes"})

        Filt_BioGrid_indeces = BioGrid.Gene_A.isin(
            Ecoli.Genes
        ) & BioGrid.Gene_B.isin(
            Ecoli.Genes
        )  # & BioGrid['Experimental System Name'] != 'Biochemical Activity'
        Filt_BioGrid = BioGrid[Filt_BioGrid_indeces]
        Filt_BioGrid_Genetic = Filt_BioGrid[
            Filt_BioGrid['Experimental System Type'] == 'genetic']
        Filt_BioGrid_Genetic_Genes = np.union1d(
            Filt_BioGrid_Genetic.Gene_A.unique(),
            Filt_BioGrid_Genetic.Gene_B.unique())
        Ecoli_Filt_Genetic = Ecoli[Ecoli.Genes.isin(
            Filt_BioGrid_Genetic_Genes)]

        Adj = np.zeros(
            [len(Filt_BioGrid_Genetic_Genes),
             len(Filt_BioGrid_Genetic_Genes)])
        features = np.zeros(
            [len(Filt_BioGrid_Genetic_Genes), Ecoli_Filt_Genetic.shape[1] - 1])
        for i in range(len(Filt_BioGrid_Genetic)):
            row = np.where(Filt_BioGrid_Genetic_Genes ==
                           Filt_BioGrid_Genetic.iloc[i][7])[0][0]
            col = np.where(Filt_BioGrid_Genetic_Genes ==
                           Filt_BioGrid_Genetic.iloc[i][8])[0][0]
            Adj[row][col] = 1
            Adj[col][row] = 1

        for i in range(len(Filt_BioGrid_Genetic_Genes)):
            features[i] = Ecoli[Ecoli.Genes ==
                                Filt_BioGrid_Genetic_Genes[i]].iloc[:, 1:]

        return dense_to_sparse(torch.tensor(Adj))[0], torch.tensor(
            features, dtype=torch.float32)
示例#14
0
    def forward(self, x, powers_adj):
        # x is powers adjacency matrixs
        # output is list of tuple of edge_index and weight
        edge_index_powers = [dense_to_sparse(adj)[0] for adj in powers_adj]
        edge_weight_powers = [
            self._learn_adjacencies(x, edge_index, i)
            for i, edge_index in enumerate(edge_index_powers)
        ]

        return [(i, w) for i, w in zip(edge_index_powers, edge_weight_powers)]
示例#15
0
def test_dense_to_sparse():
    adj = torch.Tensor([
        [3, 1],
        [2, 0],
    ])
    edge_index, edge_attr = dense_to_sparse(adj)
    assert edge_index.tolist() == [[0, 0, 1], [0, 1, 0]]
    assert edge_attr.tolist() == [3, 1, 2]

    adj = torch.Tensor([[
        [3, 1],
        [2, 0],
    ], [
        [0, 1],
        [0, 2],
    ]])
    edge_index, edge_attr = dense_to_sparse(adj)
    assert edge_index.tolist() == [[0, 0, 1, 2, 3], [0, 1, 0, 3, 3]]
    assert edge_attr.tolist() == [3, 1, 2, 1, 2]
示例#16
0
def read_ba2motif_data(folder: str, prefix):
    with open(os.path.join(folder, f"{prefix}.pkl"), 'rb') as f:
        dense_edges, node_features, graph_labels = pickle.load(f)

    data_list = []
    for graph_idx in range(dense_edges.shape[0]):
        data_list.append(Data(x=torch.from_numpy(node_features[graph_idx]).float(),
                              edge_index=dense_to_sparse(torch.from_numpy(dense_edges[graph_idx]))[0],
                              y=torch.from_numpy(np.where(graph_labels[graph_idx])[0])))
    return data_list
示例#17
0
    def forward(self, x, A_q, A_h, A):
        """
        :param X: Input data of shape (batch_size, num_timesteps, num_nodes)
        :A_q: The forward random walk matrix (num_nodes, num_nodes)
        :A_h: The backward random walk matrix (num_nodes, num_nodes)
        :return: Reconstructed X of shape (batch_size, num_timesteps, num_nodes)
        """

        # X = (4, 24, 62)

        # TODO, add x_dim with weather

        x = x.permute(0, 2, 1)

        batch_size = x.size(0)
        num_features = x.size(2)
        num_nodes = x.size(1)

        edge_index, edge_weight = dense_to_sparse(A.to(torch.device("cuda:0")))
        edge_index = edge_index.view(2, 1, -1).repeat(
            1, batch_size, 1) + torch.arange(batch_size).view(1, -1, 1).to(
                torch.device("cuda:0")) * num_nodes
        edge_index = edge_index.view(2, -1)

        x = x.contiguous().view(4, -1)

        ### For t
        ####################################
        x = ...  # use any PyG operator now

        ####################################
        x = x.view(batch_size, num_nodes, num_features)

        ### For t

        # TODO: batch_size = 1
        X_S = X_S[0]

        h, c = None, None
        for i in range(self.time_dimension):
            y_hat, h, c = self.recurrent(X_S[:, i, None], edge_index,
                                         edge_weight, h, c)

        self.recurrent(X_S.view(X_S.shape[0], 1, X_S.shape[1]), edge_index,
                       edge_weight)

        h = self.tgnn(x, edge_index)

        # X_s1 = self.GNN1(X_S, A_q, A_h)
        # X_s2 = self.GNN2(X_s1, A_q, A_h) + X_s1 #num_nodes, rank
        # X_s3 = self.GNN3(X_s2, A_q, A_h)

        X_res = X_s3.permute(0, 2, 1)
        return X_res
示例#18
0
 def _get_buffer(self, x, graph, bsz, len_):
     if not hasattr(self, 'buffer_edge_index') or True:
         adj_mat = graph.new_zeros(x.size(0), x.size(0))
         for i in range(bsz):
             adj_mat[i * len_:(i + 1) * len_,
                     i * len_:(i + 1) * len_] = graph
         edge_index, edge_attr = dense_to_sparse(adj_mat)
         assert edge_index.size(1) % bsz == 0
         setattr(self, 'num_edges_per_graph', edge_index.size(1) // bsz)
         setattr(self, 'buffer_edge_index', edge_index)
     total_edges = getattr(self, 'num_edges_per_graph') * bsz
     return getattr(self, 'buffer_edge_index')[:, :total_edges]
示例#19
0
def mock_batch(batch_size):
    """construct pyG batch"""
    graphs = []
    while len(graphs) < batch_size:
        G = nx.erdos_renyi_graph(np.random.choice([300, 500]), 0.5)
        if G.number_of_edges() > 1:
            graphs.append(G)

    adjs = [torch.from_numpy(nx.to_numpy_array(G)) for G in graphs]
    graph_data = [dense_to_sparse(A) for A in adjs]
    data_list = [Data(x=x, edge_index=e) for (e, x) in graph_data]
    return Batch.from_data_list(data_list)
示例#20
0
文件: sample.py 项目: ycremar/DIG-SSL
    def do_trans(data):

        node_num, _ = data.x.size()
        _, edge_num = data.edge_index.size()

        drop_num = int(node_num * ratio)
        idx_drop = np.random.choice(node_num, drop_num, replace=False)
        idx_nondrop = [n for n in range(node_num) if not n in idx_drop]
        adj = to_dense_adj(data.edge_index)[0]
        adj = adj[idx_nondrop, :][:, idx_nondrop]

        return Data(x=data.x[idx_nondrop], edge_index=dense_to_sparse(adj)[0])
示例#21
0
def read_syn_data(folder: str, prefix):
    with open(os.path.join(folder, f"{prefix}.pkl"), 'rb') as f:
        adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_label_matrix = pickle.load(f)

    x = torch.from_numpy(features).float()
    y = train_mask.reshape(-1, 1) * y_train + val_mask.reshape(-1, 1) * y_val + test_mask.reshape(-1, 1) * y_test
    y = torch.from_numpy(np.where(y)[1])
    edge_index = dense_to_sparse(torch.from_numpy(adj))[0]
    data = Data(x=x, y=y, edge_index=edge_index)
    data.train_mask = torch.from_numpy(train_mask)
    data.val_mask = torch.from_numpy(val_mask)
    data.test_mask = torch.from_numpy(test_mask)
    return data
示例#22
0
def main():
    x_dim = 512
    x_len = 10000

    x = sparse.rand(x_len,
                    x_dim,
                    density=10 / x_dim,
                    format='csr',
                    dtype=np.float)
    adj = sparse.rand(x_len,
                      x_len,
                      density=10 / x_len,
                      format='csr',
                      dtype=np.float)
    w = sparse.rand(x_dim,
                    x_dim,
                    density=10 / x_dim,
                    format='csr',
                    dtype=np.float)

    start = time.time()
    adj.dot(x.dot(w))
    print(time.time() - start)

    x1 = x.todense().astype(np.float)
    adj1 = adj.todense().astype(np.float)
    w1 = w.todense().astype(np.float)

    start = time.time()
    adj1.dot(x1.dot(w1))
    print(time.time() - start)

    x2 = torch.tensor(x1, dtype=torch.float)
    adj2 = torch.tensor(adj1, dtype=torch.float)
    w2 = torch.tensor(w1, dtype=torch.float)

    start = time.time()
    adj2.matmul(x2.matmul(w2))
    print(time.time() - start)

    adj2alt = torch.rand((x_len, x_len), dtype=torch.float)
    start = time.time()
    adj2alt.matmul(x2.matmul(w2))
    print(time.time() - start)

    conv = GCNConv(x_dim, x_dim)
    edge_index, _ = dense_to_sparse(adj2)

    start = time.time()
    x3 = conv(x2, edge_index)
    print(time.time() - start)
示例#23
0
def mol_to_pyg_graph(mol, idm=False, ratio=2.):
    nodes = []
    for atom in mol.GetAtoms():
        nodes.append(atom_to_node(atom))
    idx = [n[0] for n in nodes]
    assert is_sorted(idx)
    nodes = np.array(nodes, dtype=float)[:, 1:]
    edges = []
    for bond in mol.GetBonds():
        edges.append(bond_to_edge(bond))

    g_adj = construct_graph(nodes, edges)

    if idm:
        # inverse distance weighting matrix
        try:
            if AllChem.EmbedMolecule(
                    mol, randomSeed=0xf00d
            ) == -1:  # optional random seed for reproducibility)
                AllChem.Compute2DCoords(mol)

            with np.errstate(divide='ignore'):
                W = 1. / Chem.rdmolops.Get3DDistanceMatrix(mol)
            W[np.isinf(W)] = 0
        except Exception as e:
            try:
                mol = Chem.AddHs(mol)
                if AllChem.EmbedMolecule(
                        mol, randomSeed=0xf00d
                ) == -1:  # optional random seed for reproducibility)
                    AllChem.Compute2DCoords(mol)
                mol = Chem.RemoveHs(mol)

                with np.errstate(divide='ignore'):
                    W = 1. / Chem.rdmolops.Get3DDistanceMatrix(mol)
                W[np.isinf(W)] = 0
            except Exception:
                num_atoms = mol.GetNumAtoms()
                W = np.zeros((num_atoms, num_atoms))
        # preserve top ratio*n entries
        threshold = np.sort(
            W, axis=None)[::-1][min(int(ratio * len(W)) + 1,
                                    len(W)**2) - 1]
        W[W < threshold] = 0
        # convert to sparse representation
        W_spr = dense_to_sparse(torch.FloatTensor(W))
        g_idm = Data(x=g_adj.x, edge_index=W_spr[0], edge_attr=W_spr[1])

        return [g_adj, g_idm]
    return [g_adj, None]
示例#24
0
def load_node_neighbor(adj, file_path):
    from os.path import join as pjoin
    if file_path and os.path.exists(pjoin(file_path, 'node_neighbor.pt')):
        neighbor = torch.load(pjoin(file_path, 'node_neighbor.pt'))
    else:
        adj = adj.to('cuda:8')
        adj = adj > 0
        neighbor = torch.mm(adj.float(), adj.float().t()) > 0
        neighbor, _ = dense_to_sparse(neighbor)
        neighbor = remove_self_loops(neighbor)[0]
        neighbor = neighbor.cpu()
        if file_path:
            torch.save(neighbor, pjoin(file_path, 'node_neighbor.pt'))
    return neighbor
示例#25
0
    def read_syn_data(self):
        with open(self.raw_paths[0], 'rb') as f:
            adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, edge_label_matrix = pickle.load(
                f)

        x = torch.from_numpy(features).float()
        y = train_mask.reshape(-1, 1) * y_train + val_mask.reshape(
            -1, 1) * y_val + test_mask.reshape(-1, 1) * y_test
        y = torch.from_numpy(np.where(y)[1])
        edge_index = dense_to_sparse(torch.from_numpy(adj))[0]
        data = Data(x=x, y=y, edge_index=edge_index)
        data.train_mask = torch.from_numpy(train_mask)
        data.val_mask = torch.from_numpy(val_mask)
        data.test_mask = torch.from_numpy(test_mask)
        return data
示例#26
0
    def __call__(self, data: Data):
        N = data.num_nodes
        adj = to_dense_adj(data.edge_index).squeeze(0)
        adj_order = get_higher_order_adj_matrix(adj, self.order)  # (N, N)

        type_mat = to_dense_adj(data.edge_index,
                                edge_attr=data.edge_type).squeeze(0)  # (N, N)
        type_highorder = torch.where(adj_order > 1,
                                     self.num_types + adj_order - 1,
                                     torch.zeros_like(adj_order))
        assert (type_mat * type_highorder == 0).all()
        type_new = type_mat + type_highorder

        new_edge_index, new_edge_type = dense_to_sparse(type_new)
        _, edge_order = dense_to_sparse(adj_order)

        data.bond_edge_index = data.edge_index  # Save original edges
        data.edge_index, data.edge_type = coalesce(new_edge_index,
                                                   new_edge_type.long(), N, N)
        edge_index_1, data.edge_order = coalesce(new_edge_index,
                                                 edge_order.long(), N, N)
        assert (data.edge_index == edge_index_1).all()

        return data
示例#27
0
    def reward(self):

        #Get black-box labels for all nodes
        edge_indices, _ = dense_to_sparse(self.adj)
        features = torch.ones((self.num_current_nodes, self.num_features))

        logits = self.blackbox_model(features, edge_indices)
        probs = F.softmax(logits, dim=1)

        #Reward is probability of node 0 being predicted as class c
        #reward = probs[0, self.c].detach().item()
        reward = probs[:, self.c].detach().sum().item(
        )  #*10#/self.num_current_nodes

        return reward
示例#28
0
文件: pmodels.py 项目: wonlee2019/dgm
    def forward(self, x, edge_index, pool=True):
        for i in range(len(self.hidden_dims)):
            x = self.embed_nets[i](x, edge_index)

            if pool:
                x, adj = mpr_pool(x,
                                  to_dense_adj(edge_index,
                                               x.size(0),
                                               edge_attr=None)[0],
                                  clusters=self.cluster_dims[i],
                                  overlap=self.overlap)
                edge_index, edge_attr = dense_to_sparse(adj.squeeze(0))

        y_pred = self.g_classifier(x, edge_index, None)
        return y_pred
示例#29
0
    def get_ecc_conv_parameters(self, data, layer_no):
        v_plus_list, laplacians = data.v_plus, data.laplacians

        # print([v_plus[layer_no] for v_plus in v_plus_list])
        v_plus_batch = torch.cat([v_plus[layer_no] for v_plus in v_plus_list], dim=0)

        laplacian_layer_list = [laplacians[i][layer_no] for i in range(len(laplacians))]
        laplacian_block_diagonal = self.make_block_diag(laplacian_layer_list)
        
        if self.config["dataset_name"] == 'DD':
            laplacian_block_diagonal[laplacian_block_diagonal<1e-4] = 0

        # First layer
        lap_edge_idx, lap_edge_weights = dense_to_sparse(laplacian_block_diagonal)

        # Convert v_plus_batch to boolean
        return lap_edge_idx, lap_edge_weights, (v_plus_batch == 1)
示例#30
0
    def process(self):
        r"""Processes the dataset to the :obj:`self.processed_dir` folder."""
        with open(os.path.join(self.raw_dir, 'MUTAG_node_labels.txt'),
                  'r') as f:
            nodes_all_temp = f.read().splitlines()
            nodes_all = [int(i) for i in nodes_all_temp]

        adj_all = np.zeros((len(nodes_all), len(nodes_all)))
        with open(os.path.join(self.raw_dir, 'MUTAG_A.txt'), 'r') as f:
            adj_list = f.read().splitlines()
        for item in adj_list:
            lr = item.split(', ')
            l = int(lr[0])
            r = int(lr[1])
            adj_all[l - 1, r - 1] = 1

        with open(os.path.join(self.raw_dir, 'MUTAG_graph_indicator.txt'),
                  'r') as f:
            graph_indicator_temp = f.read().splitlines()
            graph_indicator = [int(i) for i in graph_indicator_temp]
            graph_indicator = np.array(graph_indicator)

        with open(os.path.join(self.raw_dir, 'MUTAG_graph_labels.txt'),
                  'r') as f:
            graph_labels_temp = f.read().splitlines()
            graph_labels = [int(i) for i in graph_labels_temp]

        data_list = []
        for i in range(1, 189):
            idx = np.where(graph_indicator == i)
            graph_len = len(idx[0])
            adj = adj_all[idx[0][0]:idx[0][0] + graph_len,
                          idx[0][0]:idx[0][0] + graph_len]
            label = int(graph_labels[i - 1] == 1)
            feature = nodes_all[idx[0][0]:idx[0][0] + graph_len]
            nb_clss = 7
            targets = np.array(feature).reshape(-1)
            one_hot_feature = np.eye(nb_clss)[targets]
            data_example = Data(x=torch.from_numpy(one_hot_feature).float(),
                                edge_index=dense_to_sparse(
                                    torch.from_numpy(adj))[0],
                                y=label)
            data_list.append(data_example)

        torch.save(self.collate(data_list), self.processed_paths[0])