Exemplo n.º 1
0
def visualize_matrix(h: sparse.coo_matrix):
    """

    :param h: Matrice creuse sous forme de coo_matrix
    :return: pd.Dataframe
    """
    normal_matrix = h.todense()
    return pd.DataFrame(normal_matrix)
Exemplo n.º 2
0
def mask_test_edges(
    adj: sp.coo_matrix,
    seed: int = 0,
    validation_frac: float = 0.05,
    test_frac: float = 0.1,
    validation_edges_in_adj: bool = False,
):
    """
    Split edges for graph autoencoder into train/validation/test splits.

    Based on https://github.com/tkipf/gae/blob/master/gae/preprocessing.py

    Args:
        adj: scipy.sparse.coo_matrix adjacency matrix.
    """
    rng = np.random.default_rng(seed)

    def sparse_to_tuple(sparse_mx):
        if not sp.isspmatrix_coo(sparse_mx):
            sparse_mx = sparse_mx.tocoo()
        coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
        values = sparse_mx.data
        shape = sparse_mx.shape
        return coords, values, shape

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] * test_frac))
    num_val = int(np.floor(edges.shape[0] * validation_frac))

    all_edge_idx = list(range(edges.shape[0]))
    rng.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]

    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    # TODO: use sets?
    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = rng.integers(0, adj.shape[0])
        idx_j = rng.integers(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = rng.integers(0, adj.shape[0])
        idx_j = rng.integers(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    if validation_edges_in_adj:
        adj_edges = np.concatenate((train_edges, val_edges), axis=0)
    else:
        adj_edges = train_edges

    data = np.ones(adj_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.coo_matrix((data, adj_edges.T), shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return (
        adj_train,
        val_edges,
        val_edges_false,
        test_edges,
        test_edges_false,
    )