Пример #1
0
from deepsnap.dataset import GraphDataset
from torch_geometric.datasets import *

from graphgym.register import register_loader


def load_dataset_example(format, name, dataset_dir):
    dataset_dir = '{}/{}'.format(dataset_dir, name)
    if format == 'PyG':
        if name == 'QM7b':
            dataset_raw = QM7b(dataset_dir)
            graphs = GraphDataset.pyg_to_graphs(dataset_raw)
            return graphs


register_loader('example', load_dataset_example)
Пример #2
0
    TODO: use dataset.Generator instead?
    """
    if cfg.dataset.interpretation != 'bipartite' or cfg.dataset.format != 'SBM':
        return None

    from util.graph_generators import get_kHSBM_graph
    cd = cfg.dataset
    memberships, nw = get_kHSBM_graph(cd.sbm_n, cd.sbm_m, cd.sbm_k, cd.sbm_p,
                                      cd.sbm_q, cd.sbm_c)
    nxG = nw.to_networkx()
    dsG = deepsnap.graph.Graph(nxG)

    return [dsG]


register_loader('SBM_single_bipartite', SBM_single_bipartite)


def SBM_single_from_generated(format, name, dataset_dir):
    if cfg.dataset.interpretation != 'bipartite' or cfg.dataset.format != 'SBM_gen':
        return None

    # todo generate if not found; coalesce this and `generate_nx_to_file`

    import loader
    dir = files('data').joinpath(dataset_dir)
    dataset = loader.load_nx(name, dir)

    return dataset

Пример #3
0
    nxG: nx.graph.Graph
    nxG = load_from_numpy(adj_path)

    # add dummy "node feature"
    # TODO GG breaks if no node attributes are set.
    for nodeIx in nxG.nodes:
        onehot = np.zeros(1)
        onehot = torch.from_numpy(onehot).to(torch.float)
        nxG.node[nodeIx]['node_feature'] = onehot

    labels = np.load(labels_path)
    # set partition as graph attribute
    partition = []
    for i in range(labels.max() + 1):
        partition.append(set(np.argwhere(labels == i).flatten()))
    nxG.graph['partition'] = partition

    # TODO GG breaks if no labels are set -- even if labels never really used
    # set membership as node attribute
    for nodeIx in nxG.nodes:
        nxG.node[nodeIx]['node_label'] = torch.tensor(labels[nodeIx]).to(
            torch.float)

    dsG = deepsnap.graph.Graph(nxG)

    return [dsG]


register_loader('modnet_generated_seed', load_modnet_graph)
Пример #4
0
def sbml_single_hyper(format, name, dataset_dir):
    # GG simply uses the first registered loader that does not return None so we
    # need to make this check in each loader.
    if cfg.dataset.interpretation != 'hyper' or cfg.dataset.format != 'SBML':
        return None
    # TODO use dataset_dir?
    # TODO: how often isthis method called?
    # TODO: profile how long this conversion takes for large networks
    # right now, this goes sbml -> igraph graph -> pytorch tensor -> dsG/networkx graph
    # for alternative method, see loader/SBM.py
    nw = Network.from_sbml(name)
    pyg_data = nw.to_torch_data_onehot_hyper()  # NOTE incidence edge indices
    return _make_dataset(nw, pyg_data, nw.get_num_hypernodes())


register_loader('sbml_single_hyper', sbml_single_hyper)


def sbml_single_bipartite(format, name, dataset_dir):
    if cfg.dataset.interpretation != 'bipartite' or cfg.dataset.format != 'SBML':
        return None
    nw = Network.from_sbml(name)

    if cfg.dataset.max_node_degree is not None:
        nw = nw.limit_node_degrees(cfg.dataset.max_node_degree)

    if cfg.dataset.max_edge_degree is not None:
        nw = nw.limit_edge_degrees(cfg.dataset.max_edge_degree)

    if cfg.dataset.limit_to_largest_component is True:
        nw = nw.limit_to_largest_component()
Пример #5
0
    dataset_dir = '{}/{}'.format(dataset_dir, name)
    netlists = find_netlists(dataset_dir)
    if cfg.dataset.mean:
        mean = np.load(cfg.dataset.mean)
        stddev = np.load(cfg.dataset.stddev)
        dataset = datasets.omitted(netlists,
                                   min_edge_count=5,
                                   resample=cfg.dataset.resample,
                                   mean=mean,
                                   std=stddev)
    else:
        dataset = datasets.omitted(netlists,
                                   min_edge_count=5,
                                   resample=cfg.dataset.resample)

    graphs = h.to_deepsnap(dataset)

    dataset = GraphDataset(
        graphs,
        task=cfg.dataset.task,
        edge_train_mode=cfg.dataset.edge_train_mode,
        edge_message_ratio=cfg.dataset.edge_message_ratio,
        edge_negative_sampling_ratio=cfg.dataset.edge_negative_sampling_ratio,
        resample_disjoint=cfg.dataset.resample_disjoint,
        minimum_node_per_graph=0)
    dataset._num_graph_labels = len(datasets.helpers.component_types)
    return dataset


register_loader('omitted_netlists', load_dataset)
Пример #6
0
from graphgym.register import register_loader

def find_netlists(rootdir='.'):
    child_list = [rootdir]
    if os.path.isdir(rootdir):
        child_list = (os.path.join(rootdir, c) for c in os.listdir(rootdir))

    netlist_paths = []
    for file_or_dir in child_list:
        if os.path.isdir(file_or_dir):
            contained_paths = find_netlists(file_or_dir)
            netlist_paths.extend(contained_paths)
        elif file_or_dir.endswith('.cir') or file_or_dir.endswith('.net'):
            netlist_paths.append(file_or_dir)

    return netlist_paths

def load_dataset(format, name, dataset_dir):
    if format != 'NetlistProtoLinks':
        return None

    dataset_dir = '{}/{}'.format(dataset_dir, name)
    netlists = find_netlists(dataset_dir)
    dataset = PrototypeLinkDataset(netlists, normalize=False)
    graphs = h.to_deepsnap(dataset)

    return graphs

register_loader('netlist_proto_links', load_dataset)