from deepsnap.dataset import GraphDataset from torch_geometric.datasets import * from graphgym.register import register_loader def load_dataset_example(format, name, dataset_dir): dataset_dir = '{}/{}'.format(dataset_dir, name) if format == 'PyG': if name == 'QM7b': dataset_raw = QM7b(dataset_dir) graphs = GraphDataset.pyg_to_graphs(dataset_raw) return graphs register_loader('example', load_dataset_example)
TODO: use dataset.Generator instead? """ if cfg.dataset.interpretation != 'bipartite' or cfg.dataset.format != 'SBM': return None from util.graph_generators import get_kHSBM_graph cd = cfg.dataset memberships, nw = get_kHSBM_graph(cd.sbm_n, cd.sbm_m, cd.sbm_k, cd.sbm_p, cd.sbm_q, cd.sbm_c) nxG = nw.to_networkx() dsG = deepsnap.graph.Graph(nxG) return [dsG] register_loader('SBM_single_bipartite', SBM_single_bipartite) def SBM_single_from_generated(format, name, dataset_dir): if cfg.dataset.interpretation != 'bipartite' or cfg.dataset.format != 'SBM_gen': return None # todo generate if not found; coalesce this and `generate_nx_to_file` import loader dir = files('data').joinpath(dataset_dir) dataset = loader.load_nx(name, dir) return dataset
nxG: nx.graph.Graph nxG = load_from_numpy(adj_path) # add dummy "node feature" # TODO GG breaks if no node attributes are set. for nodeIx in nxG.nodes: onehot = np.zeros(1) onehot = torch.from_numpy(onehot).to(torch.float) nxG.node[nodeIx]['node_feature'] = onehot labels = np.load(labels_path) # set partition as graph attribute partition = [] for i in range(labels.max() + 1): partition.append(set(np.argwhere(labels == i).flatten())) nxG.graph['partition'] = partition # TODO GG breaks if no labels are set -- even if labels never really used # set membership as node attribute for nodeIx in nxG.nodes: nxG.node[nodeIx]['node_label'] = torch.tensor(labels[nodeIx]).to( torch.float) dsG = deepsnap.graph.Graph(nxG) return [dsG] register_loader('modnet_generated_seed', load_modnet_graph)
def sbml_single_hyper(format, name, dataset_dir): # GG simply uses the first registered loader that does not return None so we # need to make this check in each loader. if cfg.dataset.interpretation != 'hyper' or cfg.dataset.format != 'SBML': return None # TODO use dataset_dir? # TODO: how often isthis method called? # TODO: profile how long this conversion takes for large networks # right now, this goes sbml -> igraph graph -> pytorch tensor -> dsG/networkx graph # for alternative method, see loader/SBM.py nw = Network.from_sbml(name) pyg_data = nw.to_torch_data_onehot_hyper() # NOTE incidence edge indices return _make_dataset(nw, pyg_data, nw.get_num_hypernodes()) register_loader('sbml_single_hyper', sbml_single_hyper) def sbml_single_bipartite(format, name, dataset_dir): if cfg.dataset.interpretation != 'bipartite' or cfg.dataset.format != 'SBML': return None nw = Network.from_sbml(name) if cfg.dataset.max_node_degree is not None: nw = nw.limit_node_degrees(cfg.dataset.max_node_degree) if cfg.dataset.max_edge_degree is not None: nw = nw.limit_edge_degrees(cfg.dataset.max_edge_degree) if cfg.dataset.limit_to_largest_component is True: nw = nw.limit_to_largest_component()
dataset_dir = '{}/{}'.format(dataset_dir, name) netlists = find_netlists(dataset_dir) if cfg.dataset.mean: mean = np.load(cfg.dataset.mean) stddev = np.load(cfg.dataset.stddev) dataset = datasets.omitted(netlists, min_edge_count=5, resample=cfg.dataset.resample, mean=mean, std=stddev) else: dataset = datasets.omitted(netlists, min_edge_count=5, resample=cfg.dataset.resample) graphs = h.to_deepsnap(dataset) dataset = GraphDataset( graphs, task=cfg.dataset.task, edge_train_mode=cfg.dataset.edge_train_mode, edge_message_ratio=cfg.dataset.edge_message_ratio, edge_negative_sampling_ratio=cfg.dataset.edge_negative_sampling_ratio, resample_disjoint=cfg.dataset.resample_disjoint, minimum_node_per_graph=0) dataset._num_graph_labels = len(datasets.helpers.component_types) return dataset register_loader('omitted_netlists', load_dataset)
from graphgym.register import register_loader def find_netlists(rootdir='.'): child_list = [rootdir] if os.path.isdir(rootdir): child_list = (os.path.join(rootdir, c) for c in os.listdir(rootdir)) netlist_paths = [] for file_or_dir in child_list: if os.path.isdir(file_or_dir): contained_paths = find_netlists(file_or_dir) netlist_paths.extend(contained_paths) elif file_or_dir.endswith('.cir') or file_or_dir.endswith('.net'): netlist_paths.append(file_or_dir) return netlist_paths def load_dataset(format, name, dataset_dir): if format != 'NetlistProtoLinks': return None dataset_dir = '{}/{}'.format(dataset_dir, name) netlists = find_netlists(dataset_dir) dataset = PrototypeLinkDataset(netlists, normalize=False) graphs = h.to_deepsnap(dataset) return graphs register_loader('netlist_proto_links', load_dataset)