Exemplo n.º 1
0
def test_contains_isolated_nodes():
    edge_index = torch.tensor([[0, 1, 0], [1, 0, 0]])
    assert not contains_isolated_nodes(edge_index)
    assert contains_isolated_nodes(edge_index, num_nodes=3)

    edge_index = torch.tensor([[0, 1, 2, 0], [1, 0, 2, 0]])
    assert contains_isolated_nodes(edge_index)
Exemplo n.º 2
0
def test_contains_isolated_nodes():
    row = torch.tensor([0, 1, 0])
    col = torch.tensor([1, 0, 0])

    assert not contains_isolated_nodes(torch.stack([row, col], dim=0))
    assert contains_isolated_nodes(torch.stack([row, col], dim=0), num_nodes=3)

    row = torch.tensor([0, 1, 2, 0])
    col = torch.tensor([1, 0, 2, 0])
    assert contains_isolated_nodes(torch.stack([row, col], dim=0))
Exemplo n.º 3
0
def CreateAuxGraph(edge_index, pos_i, pos_j, original_vertex_features, pos):
    # Build new auxiliary graph:
    num_graph_verts = original_vertex_features.shape[0]
    new_verts = torch.arange(num_graph_verts,
                             num_graph_verts + len(edge_index[0, :]))  # Add edges in G as vertices in aux G
    new_vertex_pos = torch.cat(
        [pos, torch.zeros(len(new_verts), pos.shape[1]).to(device)]
        , dim=0)
    new_vertex_features = torch.cat(
        [original_vertex_features, torch.zeros(len(new_verts), original_vertex_features.shape[1]).to(device)]
        , dim=0)

    # Compute needed components:
    sources = edge_index[0, :]
    targets = edge_index[1, :]
    edge_indices = num_graph_verts + torch.arange(0, len(edge_index[0, :]))
    edge_indices = edge_indices.to(device)

    new_source_edges = torch.stack((sources, edge_indices), dim=0)
    new_target_edges = torch.stack((targets, edge_indices), dim=0)

    new_vertex_pos[num_graph_verts:, :] = (pos_i + pos_j) / 2
    # Build graph from components:
    edge_index_aux = torch.cat([new_target_edges, new_source_edges], dim=1)

    assert (not contains_self_loops(edge_index_aux))
    assert (not contains_isolated_nodes(edge_index_aux))

    return edge_index_aux, new_vertex_features, new_vertex_pos
Exemplo n.º 4
0
def get_torch_data(df, threshold=3):
    atoms = df['atom'].values

    energy = np.array([-1 * df['Energy(Ry)'].values[0]])
    atoms = np.expand_dims(atoms, axis=1)

    one_hot_encoding = OneHotEncoder(sparse=False).fit_transform(atoms)
    coords = df[['x(angstrom)', 'y(angstrom)', 'z(angstrom)']].values

    edge_index = None
    edge_attr = None

    while True:
        dist = distance.cdist(coords, coords)
        dist[dist > threshold] = 0
        dist = torch.from_numpy(dist)
        edge_index, edge_attr = data_utils.dense_to_sparse(dist)
        edge_attr = edge_attr.unsqueeze(dim=1).type(torch.FloatTensor)
        edge_index = torch.LongTensor(edge_index)
        if (data_utils.contains_isolated_nodes(edge_index, num_nodes=13)):
            threshold += 0.5
        else:
            break

    x = torch.from_numpy(one_hot_encoding).type(torch.FloatTensor)
    y = torch.from_numpy(energy).type(torch.FloatTensor)
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

    return data
Exemplo n.º 5
0
 def has_isolated_nodes(self) -> bool:
     edge_index, num_nodes = self.edge_index, self.size(1)
     if num_nodes is None:
         raise NameError("Unable to infer 'num_nodes'")
     if self.is_bipartite():
         return torch.unique(edge_index[1]).numel() < num_nodes
     else:
         return contains_isolated_nodes(edge_index, num_nodes)
Exemplo n.º 6
0
Arquivo: CCRS.py Projeto: jing-ge/CCRS
def product2edgeindex(product):
    value = 0.9
    while True:
        idx = np.where(product>value)
        # index =
        res = contains_isolated_nodes(torch.Tensor(idx).long(), num_nodes=39)
        G=nx.Graph()
        G.add_edges_from(np.array(idx).T.tolist())
        x = getncom(G)
        if x==1:
            return  np.unique(np.array([np.hstack([idx[0],idx[1]]),np.hstack([idx[1],idx[0]])],dtype=np.int),axis=1).astype(np.int)
        value -= 0.1
Exemplo n.º 7
0
def print_stats():
    for data in DATASETS:
        out = load_data(data)
        num_graphs = len(out)
        avg_nodes = out.data.x.size(0) / num_graphs
        avg_edges = out.data.edge_index.size(1) / num_graphs
        num_features = out.num_features
        num_classes = out.num_classes
        print(
            f'{data}\t{num_graphs}\t{avg_nodes}\t{avg_edges}\t{num_features}\t{num_classes}',
            end='\t')

        undirected, self_loops, isolated_nodes, onehot = True, False, False, True
        for graph in out:
            if not is_undirected(graph.edge_index, num_nodes=graph.num_nodes):
                undirected = False
            if contains_self_loops(graph.edge_index):
                self_loops = True
            if contains_isolated_nodes(graph.edge_index,
                                       num_nodes=graph.num_nodes):
                isolated_nodes = True
            if ((graph.x > 0).sum(dim=1) != 1).sum() > 0:
                onehot = False
        print(f'{undirected}\t{self_loops}\t{isolated_nodes}\t{onehot}')
Exemplo n.º 8
0
 def contains_isolated_nodes(self):
     r"""Returns :obj:`True`, if the graph does not contain isolated
     nodes."""
     return contains_isolated_nodes(self.edge_index, self.num_nodes)
Exemplo n.º 9
0
 def contains_isolated_nodes(self):
     return contains_isolated_nodes(self.edge_index, self.num_nodes)
Exemplo n.º 10
0
 def read_files(self, verbose=True):
     start = time.time()
     if verbose:
         print("="*100 + "\n\t\t\t\t Preparing Data for {}\n".format(self.config['data_name']) + "="*100)
         print("\n\n==>> Loading feature matrix and adj matrix....") 
         
     if self.config['data_name'] in ['gossipcop', 'politifact']:
         x_file = os.path.join(self.config['data_path'], self.config['data_name'], 'feat_matrix_lr_train_30_5.npz'.format(self.config['data_name']))
         y_file = os.path.join(self.config['data_path'], self.config['data_name'], 'all_labels_lr_train_30_5.json'.format(self.config['data_name']))
         # adj_name = 'adj_matrix_lr_train_30_5_edge.npy'.format(self.config['data_name']) if self.config['model_name'] != 'HGCN' else 'adj_matrix_lr_train_30_5.npz'.format(self.config['data_name'])
         adj_name = 'adj_matrix_lr_train_30_5_edge.npy'.format(self.config['data_name'])
         edge_index_file = os.path.join(self.config['data_path'], self.config['data_name'], adj_name)
         node2id_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node2id_lr_train_30_5.json'.format(self.config['data_name']))
         node_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node_type_lr_train_30_5.npy'.format(self.config['data_name']))
         split_mask_file = os.path.join(self.config['data_path'], self.config['data_name'], 'split_mask_lr_30_5.json')
         if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']:
             edge_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'edge_type_lr_train_30_5_edge.npy'.format(self.config['data_name']))
     else:
         x_file = os.path.join(self.config['data_path'], self.config['data_name'], 'feat_matrix_lr_top10_train.npz')
         y_file = os.path.join(self.config['data_path'], self.config['data_name'], 'all_labels_lr_top10_train.json')
         # adj_name = 'adj_matrix_lr_top10_train_edge.npy' if self.config['model_name'] != 'HGCN' else 'adj_matrix_lr_top10_train.npz'
         adj_name = 'adj_matrix_lr_top10_train_edge.npy'
         edge_index_file = os.path.join(self.config['data_path'], self.config['data_name'], adj_name)
         node2id_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node2id_lr_top10_train.json')
         node_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'node_type_lr_top10_train.npy')
         split_mask_file = os.path.join(self.config['data_path'], self.config['data_name'], 'split_mask_top10.json')
         if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']:
             edge_type_file = os.path.join(self.config['data_path'], self.config['data_name'], 'edge_type_lr_top10_edge.npy')
     
 
     # if self.config['model_name'] != 'HGCN':
     #     edge_index_data = np.load(edge_index_file)
     #     edge_index_data = torch.from_numpy(edge_index_data).long()
         
     # elif self.config['model_name'] == 'HGCN':
     #     edge_index_data = load_npz(edge_index_file)
         
     #     # edge_index_data = torch.from_numpy(edge_index_data.toarray())
         
     #     edge_index_data = edge_index_data.tocoo()
     #     indices = torch.from_numpy(np.vstack((edge_index_data.row, edge_index_data.col)).astype(np.int64))
     #     values = torch.Tensor(edge_index_data.data)
     #     shape = torch.Size(edge_index_data.shape)
     #     edge_index_data = torch.sparse.FloatTensor(indices, values, shape)
     
     self.edge_index_data = np.load(edge_index_file)
     self.edge_index_data = torch.from_numpy(edge_index_data).long()
     
     self.x_data = load_npz(x_file)
     self.x_data = torch.from_numpy(self.x_data.toarray())
     self.y_data = json.load(open(y_file, 'r'))
     self.y_data = torch.LongTensor(self.y_data['all_labels'])
     self.node2id = json.load(open(node2id_file, 'r'))
     # node_type = np.load(node_type_file)
     # node_type = torch.from_numpy(node_type).float()
     if self.config['model_name'] in ['rgcn', 'rgat', 'rsage']:
         self.edge_type_data = np.load(edge_type_file)
         self.edge_type_data = torch.from_numpy(self.edge_type_data).long()
     else:
         self.edge_type_data = None
     
     self.split_masks = json.load(open(split_mask_file, 'r'))
     
     num_nodes, self.vocab_size = self.x_data.shape
     if self.config['model_name'] != 'HGCN':
         isolated_nodes = contains_isolated_nodes(edge_index= self.edge_index_data)
         self_loops = contains_self_loops(edge_index= self.edge_index_data)
     
     if verbose:
         print("\n\n" + "-"*50 + "\nDATA STATISTICS:\n" + "-"*50)
         if self.config['model_name'] != 'HGCN':
             print("Contains isolated nodes = ", isolated_nodes)
             print("Contains self loops = ", self_loops)
         print("Vocabulary size = ", self.vocab_size)
         print('No. of nodes in graph = ', num_nodes)
         print('No. of nodes after removing isolated nodes = ', new_num_nodes)
         print("No. of edges in graph = ", self.data.num_edges)
         
         print("\nNo.of train instances = ", self.data.train_mask.sum().item())
         print("No.of val instances = ", self.data.val_mask.sum().item())
         print("No.of test instances = ", num_nodes - self.data.train_mask.sum().item() - self.data.val_mask.sum().item())
             
         end = time.time()
         hours, minutes, seconds = calc_elapsed_time(start, end)
         print("\n"+ "-"*50 + "\nTook  {:0>2} hours: {:0>2} mins: {:05.2f} secs  to Prepare Data\n".format(hours,minutes,seconds))