def numpy_to_batch(X_list, A_list, E_list=None): """ Converts a batch of graphs stored in lists (X, A, and optionally E) to the [batch mode](https://danielegrattarola.github.io/spektral/data/#batch-mode) by zero-padding all X, A and E matrices to have the same node dimensions (`N_max`). Each entry i of the lists should be associated to the same graph, i.e., `X_list[i].shape[0] == A_list[i].shape[0] == E_list[i].shape[0]`. Note that if `A_list` contains sparse matrices, they will be converted to dense np.arrays, which can be expensice. :param X_list: a list of np.arrays of shape `(N, F)`; :param A_list: a list of np.arrays or sparse matrices of shape `(N, N)`; :param E_list: a list of np.arrays of shape `(N, N, S)`; :return: - `X_out`: a rank 3 array of shape `(batch, N_max, F)`; - `A_out`: a rank 2 array of shape `(batch, N_max, N_max)`; - `E_out`: (only if `E_list` if given) a rank 2 array of shape `(batch, N_max, N_max, S)`; """ N_max = max([a.shape[-1] for a in A_list]) X_out = pad_jagged_array(X_list, (N_max, -1)) # Convert sparse matrices to dense if hasattr(A_list[0], 'toarray'): A_list = [a.toarray() for a in A_list] A_out = pad_jagged_array(A_list, (N_max, N_max)) if E_list is not None: E_out = pad_jagged_array(E_list, (N_max, N_max, -1)) return X_out, A_out, E_out else: return X_out, A_out
def to_batch(x_list=None, a_list=None, e_list=None, mask=False): """ Converts lists of node features, adjacency matrices and edge features to [batch mode](https://graphneural.network/data-modes/#batch-mode), by zero-padding all tensors to have the same node dimension `n_max`. Either the node features or the adjacency matrices must be provided as input. The i-th element of each list must be associated with the i-th graph. If `a_list` contains sparse matrices, they will be converted to dense np.arrays. The edge attributes of a graph can be represented as - a dense array of shape `(n_nodes, n_nodes, n_edge_features)`; - a sparse edge list of shape `(n_edges, n_edge_features)`; and they will always be returned as dense arrays. :param x_list: a list of np.arrays of shape `(n_nodes, n_node_features)` -- note that `n_nodes` can change between graphs; :param a_list: a list of np.arrays or scipy.sparse matrices of shape `(n_nodes, n_nodes)`; :param e_list: a list of np.arrays of shape `(n_nodes, n_nodes, n_edge_features)` or `(n_edges, n_edge_features)`; :param mask: bool, if True, node attributes will be extended with a binary mask that indicates valid nodes (the last feature of each node will be 1 if the node is valid and 0 otherwise). Use this flag in conjunction with layers.base.GraphMasking to start the propagation of masks in a model. :return: only if the corresponding list is given as input: - `x`: np.array of shape `(batch, n_max, n_node_features)`; - `a`: np.array of shape `(batch, n_max, n_max)`; - `e`: np.array of shape `(batch, n_max, n_max, n_edge_features)`; """ if a_list is None and x_list is None: raise ValueError("Need at least x_list or a_list") n_max = max( [x.shape[0] for x in (x_list if x_list is not None else a_list)]) # Node features x_out = None if x_list is not None: if mask: x_list = [ np.concatenate((x, np.ones((x.shape[0], 1))), -1) for x in x_list ] x_out = pad_jagged_array(x_list, (n_max, -1)) # Adjacency matrix a_out = None if a_list is not None: if hasattr(a_list[0], "toarray"): # Convert sparse to dense a_list = [a.toarray() for a in a_list] a_out = pad_jagged_array(a_list, (n_max, n_max)) # Edge attributes e_out = None if e_list is not None: if e_list[0].ndim == 2: # Sparse to dense for i in range(len(a_list)): a, e = a_list[i], e_list[i] e_new = np.zeros(a.shape + e.shape[-1:]) e_new[np.nonzero(a)] = e e_list[i] = e_new e_out = pad_jagged_array(e_list, (n_max, n_max, -1)) return tuple(out for out in [x_out, a_out, e_out] if out is not None)
def collate_labels_batch(y_list, node_level=False): if node_level: n_max = max([x.shape[0] for x in y_list]) return pad_jagged_array(y_list, (n_max, -1)) else: return np.array(y_list)
################################################################################ # LOAD DATA ################################################################################ dataset_name = 'ogbg-molesol' dataset = GraphPropPredDataset(name=dataset_name) n_out = dataset.num_tasks N = max(g[0]['num_nodes'] for g in dataset) idx = dataset.get_idx_split() tr_idx, va_idx, te_idx = idx["train"], idx["valid"], idx["test"] X, A, _, y = ogb.dataset_to_numpy(dataset, dtype='f8') A = [a.toarray() for a in A] F = X[0].shape[-1] X = pad_jagged_array(X, (N, F)) A = pad_jagged_array(A, (N, N)) X_tr, A_tr, y_tr = X[tr_idx], A[tr_idx], y[tr_idx] X_va, A_va, y_va = X[va_idx], A[va_idx], y[va_idx] X_te, A_te, y_te = X[te_idx], A[te_idx], y[te_idx] ################################################################################ # BUILD MODEL ################################################################################ X_in = Input(shape=(N, F)) A_in = Input(shape=(N, N)) X_1 = GraphConv(32, activation='relu')([X_in, A_in]) X_1, A_1 = MinCutPool(N // 2)([X_1, A_in]) X_2 = GraphConv(32, activation='relu')([X_1, A_1]) X_3 = GlobalSumPool()(X_2)
def to_batch(x_list=None, a_list=None, e_list=None): """ Converts lists of node features, adjacency matrices and (optionally) edge features to [batch mode](https://danielegrattarola.github.io/spektral/data/#batch-mode), by zero-padding all tensors to have the same node dimension `n_max`. Either the node features or the adjacency matrices must be provided as input. The i-th element of each list must be associated with the i-th graph. If `a_list` contains sparse matrices, they will be converted to dense np.arrays, which can be expensive. The edge attributes of a graph can be represented as - a dense array of shape `(n_nodes, n_nodes, n_edge_features)`; - a sparse edge list of shape `(n_edges, n_edge_features)`; and they will always be returned as dense arrays. :param x_list: a list of np.arrays of shape `(n_nodes, n_node_features)` -- note that `n_nodes` can change between graphs; :param a_list: a list of np.arrays or scipy.sparse matrices of shape `(n_nodes, n_nodes)`; :param e_list: a list of np.arrays of shape `(n_nodes, n_nodes, n_edge_features)` or `(n_edges, n_edge_features)`; :return: only if the corresponding list is given as input: - `x`: np.array of shape `(batch, n_max, n_node_features)`; - `a`: np.array of shape `(batch, n_max, n_max)`; - `e`: np.array of shape `(batch, n_max, n_max, n_edge_features)`; """ if a_list is None and x_list is None: raise ValueError('Need at least x_list or a_list') n_max = max( [x.shape[0] for x in (x_list if x_list is not None else a_list)]) # Node features x_out = None if x_list is not None: x_out = pad_jagged_array(x_list, (n_max, -1)) # Adjacency matrix a_out = None if a_list is not None: if hasattr(a_list[0], 'toarray'): # Convert sparse to dense a_list = [a.toarray() for a in a_list] a_out = pad_jagged_array(a_list, (n_max, n_max)) # Edge attributes e_out = None if e_list is not None: if e_list[0].ndim == 2: # Sparse to dense for i in range(len(a_list)): a, e = a_list[i], e_list[i] e_new = np.zeros(a.shape + e.shape[-1:]) e_new[np.nonzero(a)] = e e_list[i] = e_new e_out = pad_jagged_array(e_list, (n_max, n_max, -1)) return tuple(out for out in [x_out, a_out, e_out] if out is not None)