Exemplo n.º 1
0
 def dump_data(self, time_step: Optional[datetime], x_analysis: np.array,
               p_analysis: np.array, p_analysis_inv: sp.coo_matrix,
               state_mask: np.array, n_params: int):
     data = []
     for param in self.param_positions:
         index = self.param_positions[param]
         param_values = np.zeros(state_mask.shape, dtype=np.float32)
         param_values[state_mask] = x_analysis[index::n_params]
         data.append(param_values)
         param_unc = np.zeros(state_mask.shape, dtype=np.float32)
         param_unc[state_mask] = 1. / np.sqrt(
             p_analysis_inv.diagonal()[index::n_params])
         data.append(param_unc)
     self.writer.write(data, self._width, self._height, self._offset_x,
                       self._offset_y)
Exemplo n.º 2
0
def mask_test_edges(
    adj: sp.coo_matrix,
    seed: int = 0,
    validation_frac: float = 0.05,
    test_frac: float = 0.1,
    validation_edges_in_adj: bool = False,
):
    """
    Split edges for graph autoencoder into train/validation/test splits.

    Based on https://github.com/tkipf/gae/blob/master/gae/preprocessing.py

    Args:
        adj: scipy.sparse.coo_matrix adjacency matrix.
    """
    rng = np.random.default_rng(seed)

    def sparse_to_tuple(sparse_mx):
        if not sp.isspmatrix_coo(sparse_mx):
            sparse_mx = sparse_mx.tocoo()
        coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
        values = sparse_mx.data
        shape = sparse_mx.shape
        return coords, values, shape

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] * test_frac))
    num_val = int(np.floor(edges.shape[0] * validation_frac))

    all_edge_idx = list(range(edges.shape[0]))
    rng.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]

    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    # TODO: use sets?
    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = rng.integers(0, adj.shape[0])
        idx_j = rng.integers(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = rng.integers(0, adj.shape[0])
        idx_j = rng.integers(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)

    if validation_edges_in_adj:
        adj_edges = np.concatenate((train_edges, val_edges), axis=0)
    else:
        adj_edges = train_edges

    data = np.ones(adj_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.coo_matrix((data, adj_edges.T), shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return (
        adj_train,
        val_edges,
        val_edges_false,
        test_edges,
        test_edges_false,
    )