def _read_graphs(dataset_name): file_prefix = osp.join(DATA_PATH, dataset_name, dataset_name) available = [ f.split(os.sep)[-1][len(dataset_name)+1:-4] for f in glob.glob('{}_*.txt'.format(file_prefix)) ] I = io.load_txt(file_prefix + '_graph_indicator.txt').astype(int) - 1 unique_ids = np.unique(I) num_graphs = len(unique_ids) graph_sizes = np.bincount(I) offsets = np.concatenate(([0], np.cumsum(graph_sizes)[:-1])) edges = io.load_txt(file_prefix + '_A.txt', delimiter=',').astype(int) - 1 A_list = [[] for _ in range(num_graphs)] for e in edges: graph_id = I[e[0]] A_list[graph_id].append(e - offsets[graph_id]) A_list = map(np.array, A_list) A_list = [ sp.coo_matrix( (np.ones_like(A[:, 0]), (A[:, 0], A[:, 1])), shape=(graph_sizes[i], graph_sizes[i]) ) for i, A in enumerate(A_list) ] X = [] if 'node_attributes' in available: X_na = io.load_txt(file_prefix + '_node_attributes.txt', delimiter=',') if X_na.ndim == 1: X_na = X_na[:, None] X.append(X_na) if 'node_labels' in available: X_nl = io.load_txt(file_prefix + '_node_labels.txt') X_nl = _normalize(X_nl.reshape(-1, 1), 'ohe') X.append(X_nl) if len(X) > 0: X = np.concatenate(X, -1) X_list = [] start = offsets[0] for i in range(num_graphs): stop = offsets[i + 1] if i + 1 < len(offsets) else None X_list.append(X[start:stop]) start = stop y = None if 'graph_attributes' in available: y = io.load_txt(file_prefix + '_graph_attributes.txt') elif 'graph_labels' in available: y = io.load_txt(file_prefix + '_graph_labels.txt') y = _normalize(y[:, None], 'ohe') return A_list, X_list, y
def read(self): fname_template = osp.join(self.path, "{}_{{}}.txt".format(self.name)) available = [ f.split(os.sep)[-1][len(self.name) + 1:-4] # Remove leading name for f in glob.glob(fname_template.format("*")) ] # Batch index node_batch_index = ( io.load_txt(fname_template.format("graph_indicator")).astype(int) - 1) n_nodes = np.bincount(node_batch_index) n_nodes_cum = np.concatenate(([0], np.cumsum(n_nodes)[:-1])) # Adjacency matrix edges = io.load_txt(fname_template.format("A"), delimiter=",").astype(int) - 1 # Remove duplicates and self-loops from edges _, mask = np.unique(edges, axis=0, return_index=True) mask = mask[edges[mask, 0] != edges[mask, 1]] edges = edges[mask] # Split edges into separate edge lists edge_batch_idx = node_batch_index[edges[:, 0]] n_edges = np.bincount(edge_batch_idx) n_edges_cum = np.cumsum(n_edges[:-1]) edge_lists = np.split(edges - n_nodes_cum[edge_batch_idx, None], n_edges_cum) # Create sparse adjacency matrices a_list = [ sp.csr_matrix( (np.ones_like(el[:, 0]), (el[:, 0], el[:, 1])), shape=(n_nodes[i], n_nodes[i]), ) for i, el in enumerate(edge_lists) ] # Node features x_list = [] if "node_attributes" in available: x_attr = io.load_txt(fname_template.format("node_attributes"), delimiter=",") if x_attr.ndim == 1: x_attr = x_attr[:, None] x_list.append(x_attr) if "node_labels" in available: x_labs = io.load_txt(fname_template.format("node_labels")) if x_labs.ndim == 1: x_labs = x_labs[:, None] x_labs = np.concatenate( [_normalize(xl_[:, None], "ohe") for xl_ in x_labs.T], -1) x_list.append(x_labs) if len(x_list) > 0: x_list = np.concatenate(x_list, -1) x_list = np.split(x_list, n_nodes_cum[1:]) else: print("WARNING: this dataset doesn't have node attributes." "Consider creating manual features before using it with a " "Loader.") x_list = [None] * len(n_nodes) # Edge features e_list = [] if "edge_attributes" in available: e_attr = io.load_txt(fname_template.format("edge_attributes")) if e_attr.ndim == 1: e_attr = e_attr[:, None] e_attr = e_attr[mask] e_list.append(e_attr) if "edge_labels" in available: e_labs = io.load_txt(fname_template.format("edge_labels")) if e_labs.ndim == 1: e_labs = e_labs[:, None] e_labs = e_labs[mask] e_labs = np.concatenate( [_normalize(el_[:, None], "ohe") for el_ in e_labs.T], -1) e_list.append(e_labs) if len(e_list) > 0: e_list = np.concatenate(e_list, -1) e_list = np.split(e_list, n_edges_cum) else: e_list = [None] * len(n_nodes) # Labels if "graph_attributes" in available: labels = io.load_txt(fname_template.format("graph_attributes")) elif "graph_labels" in available: labels = io.load_txt(fname_template.format("graph_labels")) labels = _normalize(labels[:, None], "ohe") else: raise ValueError("No labels available for dataset {}".format( self.name)) # Convert to Graph print("Successfully loaded {}.".format(self.name)) return [ Graph(x=x, a=a, e=e, y=y) for x, a, e, y in zip(x_list, a_list, e_list, labels) ]