示例#1
0
def ConvGraphLoad(dir='data/'):
    graph_list = os.listdir(dir)
    graph_list_tuple = [
        tuple(graph_name.split('_', 2)) for graph_name in graph_list
    ]
    graph_list_tuple.sort(key=lambda tup: int(tup[1]))
    graph_list_tuple.sort(key=lambda tup: int(tup[0]))
    print('Graph List sorted:', graph_list_tuple)
    glist = []
    labellist = []

    for i in range(len(graph_list_tuple) - 2):
        graph_step_name = graph_list_tuple[i][0] + '_' + graph_list_tuple[i][
            1] + '_' + graph_list_tuple[i][2]
        graph_next_step_name = graph_list_tuple[
            i + 1][0] + '_' + graph_list_tuple[
                i + 1][1] + '_' + graph_list_tuple[i + 1][2]

        g, _ = dgl.load_graphs(dir + graph_step_name)
        g = g[0]

        g_next, _ = dgl.load_graphs(dir + graph_next_step_name)
        g_next = g_next[0]

        if g.number_of_nodes() == g_next.number_of_nodes() and int(
                graph_list_tuple[i][1]) <= 491 and int(
                    graph_list_tuple[i][1]) > 250:
            glist.append(g)
            labellist.append(g_next.ndata['value'])

    return glist, labellist
示例#2
0
文件: utils.py 项目: ziqiaomeng/dgl
def get_graph(name, format):
    g = None
    if name == 'cora':
        g = dgl.data.CoraGraphDataset()[0]
    elif name == 'livejournal':
        bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = get_livejournal().formats([format])
            dgl.save_graphs(bin_path, [g])
    elif name == "friendster":
        bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = get_friendster().formats([format])
            dgl.save_graphs(bin_path, [g])
    elif name == "reddit":
        bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = dgl.data.RedditDataset(self_loop=True)[0].formats([format])
            dgl.save_graphs(bin_path, [g])
    else:
        raise Exception("Unknown dataset")
    g = g.formats([format])
    return g
示例#3
0
def load_building_block_data():
    with open(f"{PROCESSED_DATA_DIR}/building_block_smis.pt", "rb") as f:
        building_block_smis = torch.load(f)

    building_block_molgraphs, _ = dgl.load_graphs(
        f"{PROCESSED_DATA_DIR}/building_block_molgraphs.pt")
    return building_block_smis, building_block_molgraphs
示例#4
0
def load_nowplaying_rs():
    import torchtext.legacy as torchtext
    # follow examples/pytorch/pinsage/README to create train_g.bin
    name = 'train_g.bin'
    dataset_dir = os.path.join(os.getcwd(), 'dataset')
    os.symlink('/tmp/dataset/', dataset_dir)

    dataset_path = os.path.join(dataset_dir, "nowplaying_rs", name)
    g_list, _ = dgl.load_graphs(dataset_path)
    g = g_list[0]
    user_ntype = 'user'
    item_ntype = 'track'

    # Assign user and movie IDs and use them as features (to learn an individual trainable
    # embedding for each entity)
    g.nodes[user_ntype].data['id'] = torch.arange(
        g.number_of_nodes(user_ntype))
    g.nodes[item_ntype].data['id'] = torch.arange(
        g.number_of_nodes(item_ntype))

    # Prepare torchtext dataset and vocabulary
    fields = {}
    examples = []
    for i in range(g.number_of_nodes(item_ntype)):
        example = torchtext.data.Example.fromlist([], [])
        examples.append(example)
    textset = torchtext.data.Dataset(examples, fields)

    return PinsageDataset(g, user_ntype, item_ntype, textset)
示例#5
0
def test_serialize_heterograph():
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()
    g_list0 = create_heterographs2(F.int64) + create_heterographs2(F.int32)
    dgl.save_graphs(path, g_list0)

    g_list, _ = dgl.load_graphs(path)
    assert g_list[0].idtype == F.int64
    assert len(g_list[0].canonical_etypes) == 3
    for i in range(len(g_list0)):
        for j, etypes in enumerate(g_list0[i].canonical_etypes):
            assert g_list[i].canonical_etypes[j] == etypes
    #assert g_list[1].restrict_format() == 'any'
    #assert g_list[2].restrict_format() == 'csr'

    assert g_list[4].idtype == F.int32
    assert np.allclose(F.asnumpy(g_list[2].nodes['user'].data['hh']),
                       np.ones((4, 5)))
    assert np.allclose(F.asnumpy(g_list[6].nodes['user'].data['hh']),
                       np.ones((4, 5)))
    edges = g_list[0]['follows'].edges()
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
    for i in range(len(g_list)):
        assert g_list[i].ntypes == g_list0[i].ntypes
        assert g_list[i].etypes == g_list0[i].etypes

    # test set feature after load_graph
    g_list[3].nodes['user'].data['test'] = F.tensor([0, 1, 2, 4])
    g_list[3].edata['test'] = F.tensor([0, 1, 2])

    os.unlink(path)
示例#6
0
文件: dataloading.py 项目: yifeim/dgl
def PEMS_BAYGraphDataset():
    if not os.path.exists('data/graph_bay.bin'):
        if not os.path.exists('data'):
            os.mkdir('data')
        download_file('graph_bay.bin')
    g, _ = dgl.load_graphs('data/graph_bay.bin')
    return g[0]
示例#7
0
文件: dataloading.py 项目: yifeim/dgl
def METR_LAGraphDataset():
    if not os.path.exists('data/graph_la.bin'):
        if not os.path.exists('data'):
            os.mkdir('data')
        download_file('graph_la.bin')
    g, _ = dgl.load_graphs('data/graph_la.bin')
    return g[0]
示例#8
0
def test_graph_serialize_without_feature(is_hetero):
    num_graphs = 100
    g_list = [generate_rand_graph(30, is_hetero) for _ in range(num_graphs)]

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    dgl.save_graphs(path, g_list)

    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
    loadg_list, _ = dgl.load_graphs(path, idx_list)

    idx = idx_list[0]
    load_g = loadg_list[0]

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

    load_edges = load_g.all_edges('uv', 'eid')
    g_edges = g_list[idx].all_edges('uv', 'eid')
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])

    os.unlink(path)
示例#9
0
    def __call__(self, split_type):

        if split_type == 'train':
            subsample_ratio = self.subsample_ratio
        else:
            subsample_ratio = 1

        path = osp.join(
            self.save_dir or '',
            '{}_{}_{}-hop_{}-subsample.bin'.format(self.prefix, split_type,
                                                   self.hop, subsample_ratio))

        if osp.exists(path):
            self.print_fn(
                "Load existing processed {} files".format(split_type))
            graph_list, data = dgl.load_graphs(path)
            dataset = GraphDataSet(graph_list, data['labels'])

        else:
            self.print_fn("Processed {} files not exist.".format(split_type))

            edges, labels = self.generator(split_type)
            self.print_fn("Generate {} edges totally.".format(edges.size(0)))

            graph_list, labels = self.sampler(edges, labels)
            dataset = GraphDataSet(graph_list, labels)
            dgl.save_graphs(path, graph_list, {'labels': labels})
            self.print_fn("Save preprocessed subgraph to {}".format(path))
        return dataset
示例#10
0
def main():
    args = parse_args()
    print(args)
    device = get_device(args.device)
    data, g, _, labels, predict_ntype, train_idx, val_idx, test_idx, evaluator = \
        load_data(args.dataset, device)
    add_node_feat(g, 'pretrained', args.node_embed_path, True)
    if args.dataset == 'oag-venue':
        labels[labels == -1] = 0
    (*mgs, pos_g), _ = dgl.load_graphs(args.pos_graph_path)
    pos_g = pos_g.to(device)

    model = RHCO(
        {ntype: g.nodes[ntype].data['feat'].shape[1]
         for ntype in g.ntypes}, args.num_hidden, data.num_classes,
        args.num_rel_hidden, args.num_heads, g.ntypes,
        g.canonical_etypes, predict_ntype, args.num_layers, args.dropout,
        len(mgs), args.tau, args.lambda_).to(device)
    model.load_state_dict(torch.load(args.model_path, map_location=device))
    model.eval()

    base_pred = model.get_embeds(g, mgs, args.neighbor_size, args.batch_size,
                                 device)
    mask = torch.cat([train_idx, val_idx])
    logits = smooth(base_pred, pos_g, labels, mask, args)
    _, _, test_acc, _, _, test_f1 = calc_metrics(logits, labels, train_idx,
                                                 val_idx, test_idx, evaluator)
    print('After smoothing: Test Acc {:.4f} | Test Macro-F1 {:.4f}'.format(
        test_acc, test_f1))
示例#11
0
 def get_canoncial_etypes(self):
     for _, _, file_names in sorted(os.walk(self.dir)):
         for i in range(1):
             g_sample_name = file_names[i]
     g_sample_path = os.path.join(self.dir, g_sample_name)
     g_sample, _ = dgl.load_graphs(g_sample_path)
     g_sample = g_sample[0]
     return g_sample.canonical_etypes
示例#12
0
def get_graph(name, format=None):
    # global GRAPH_CACHE
    # if name in GRAPH_CACHE:
    #     return GRAPH_CACHE[name].to(format)
    if isinstance(format, str):
        format = [format]  # didn't specify format
    if format is None:
        format = ['csc', 'csr', 'coo']
    g = None
    if name == 'cora':
        g = dgl.data.CoraGraphDataset(verbose=False)[0]
    elif name == 'pubmed':
        g = dgl.data.PubmedGraphDataset(verbose=False)[0]
    elif name == 'livejournal':
        bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = get_livejournal().formats(format)
            dgl.save_graphs(bin_path, [g])
    elif name == "friendster":
        bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            # the original node IDs of friendster are not consecutive, so we compact it
            g = dgl.compact_graphs(get_friendster()).formats(format)
            dgl.save_graphs(bin_path, [g])
    elif name == "reddit":
        bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = dgl.data.RedditDataset(self_loop=True)[0].formats(format)
            dgl.save_graphs(bin_path, [g])
    elif name.startswith("ogb"):
        g = get_ogb_graph(name)
    else:
        raise Exception("Unknown dataset")
    # GRAPH_CACHE[name] = g
    g = g.formats(format)
    return g
示例#13
0
def load_and_cache_examples(args, processor, retrievers, relation_list, input_dir, evaluate=False, output_examples=False):
    """
    :param args: arguments. Here use "local_rank", "cache_dir", "model_type", "max_seq_length", "data_dir",
    "train_file", "tokenization_train_filepath", "predict_file", "tokenization_dev_filepath", "retrieved_nell_concept_filepath",
    :param tokenizer: the predefined tokenzier, correpsonding to the type of model. Each model has its own tokenizer.
    :param evaluate: bool. An indicator for loading train file or dev file.
    :param output_examples: bool. To decide whether to output  examples.
    :return:
    """

    if args.local_rank not in [-1, 0]:
        # Make sure only the first process in distributed training process the dataset, and the others will use the cache
        torch.distributed.barrier()

    # Load data features from cache or dataset file
    if args.test:
        temp_mark = "test"
    elif evaluate:
        temp_mark = "dev"
    else:
        temp_mark = "train"
    cached_features_file = os.path.join(
        input_dir,
        "cached_{}_{}_{}".format(
            temp_mark,
            args.model_type,
            str(args.cache_file_suffix),
        ),
    )

    # Init features and dataset from cache if it exists
    if os.path.exists(cached_features_file) and not args.overwrite_cache:
        logger.info("Loading features from cached file %s", cached_features_file)
        features_and_dataset = torch.load(cached_features_file)
        features, dataset, examples_tokenized = (
            features_and_dataset["features"],
            features_and_dataset["dataset"],
            features_and_dataset["examples"],
        )
        if args.model_type == "kelm":
            all_kgs_graphs, all_kgs_graphs_label_dict = load_graphs(cached_features_file + "_all_kgs_graphs.bin")
        else:
            all_kgs_graphs, all_kgs_graphs_label_dict = [], []
    else:
        logger.error("dataset not exist and program exits")
        exit()
    if args.local_rank == 0:
        # Make sure only the first process in distributed training process the dataset, and the others will use the cache
        torch.distributed.barrier()

    logger.info("{} load data is done".format(args.local_rank))

    if output_examples:
        return dataset, examples_tokenized, features, all_kgs_graphs, all_kgs_graphs_label_dict

    # exit()
    return dataset, all_kgs_graphs, all_kgs_graphs_label_dict
示例#14
0
    def load(cls, path):
        import json
        import dgl

        homograph = dgl.load_graphs(path + "/homograph.bin")[0][0]
        heterograph = dgl.load_graphs(path + "/heterograph.bin")[0][0]

        with open(path + "/mol.json", "r") as f_handle:
            mol = json.load(f_handle)
        from openff.toolkit.topology import Molecule

        try:
            mol = Molecule.from_json(mol)
        except:
            mol = Molecule.from_dict(mol)

        g = cls(mol=mol, homograph=homograph, heterograph=heterograph)
        return g
示例#15
0
def main(args):
    # load an exisitng model or ask for training a model
    model_path = os.path.join('./', 'dummy_model_{}.pth'.format(args.dataset))
    if os.path.exists(model_path):
        model_stat_dict = th.load(model_path)
    else:
        raise FileExistsError('No Saved Model file. Please train a GNN model first...')

    # load graph, feat, and label
    g_list, label_dict = load_graphs('./'+args.dataset+'.bin')
    graph = g_list[0]
    labels = graph.ndata['label']
    feats = graph.ndata['feat']
    num_classes = max(labels).item() + 1
    feat_dim = feats.shape[1]
    hid_dim = label_dict['hid_dim'].item()
    
    # create a model and load from state_dict
    dummy_model = dummy_gnn_model(feat_dim, hid_dim, num_classes)
    dummy_model.load_state_dict(model_stat_dict)

    # Choose a node of the target class to be explained and extract its subgraph.
    # Here just pick the first one of the target class.
    target_list = [i for i, e in enumerate(labels) if e==args.target_class]
    n_idx = th.tensor([target_list[0]])

    # Extract the computation graph within k-hop of target node and use it for explainability
    sub_graph, ori_n_idxes, new_n_idx = extract_subgraph(graph, n_idx, hops=args.hop)
    
    #Sub-graph features.
    sub_feats = feats[ori_n_idxes,:]

    # create an explainer
    explainer = NodeExplainerModule(model=dummy_model,
                                    num_edges=sub_graph.number_of_edges(),
                                    node_feat_dim=feat_dim)

    # define optimizer
    optim = th.optim.Adam([explainer.edge_mask, explainer.node_feat_mask], lr=args.lr, weight_decay=args.wd)

    # train the explainer for the given node
    dummy_model.eval()
    model_logits = dummy_model(sub_graph, sub_feats)
    model_predict = F.one_hot(th.argmax(model_logits, dim=-1), num_classes)

    for epoch in range(args.epochs):
        explainer.train()
        exp_logits = explainer(sub_graph, sub_feats)
        loss = explainer._loss(exp_logits[new_n_idx], model_predict[new_n_idx])

        optim.zero_grad()
        loss.backward()
        optim.step()

    # visualize the importance of edges
    edge_weights = explainer.edge_mask.sigmoid().detach()
    visualize_sub_graph(sub_graph, edge_weights.numpy(), ori_n_idxes, n_idx)
示例#16
0
def load_synthetic_route_data():
    synthetic_route_graphs, _ = dgl.load_graphs(
        f"{PROCESSED_DATA_DIR}/synthetic_route_graphs.pt")
    with open(f"{PROCESSED_DATA_DIR}/synthetic_route_node2smis.pt", "rb") as f:
        synthetic_route_node2smis = torch.load(f)

    synthetic_route_molgraphs, _ = dgl.load_graphs(
        f"{PROCESSED_DATA_DIR}/synthetic_route_molgraphs.pt")

    synthetic_route_node2molgraphs = []
    offset = 0
    for node2smi in synthetic_route_node2smis:
        nodes = list(node2smi.keys())
        molgraphs = synthetic_route_molgraphs[offset:offset + len(nodes)]
        synthetic_route_node2molgraphs.append(
            {node: molgraph
             for node, molgraph in zip(nodes, molgraphs)})

    return synthetic_route_graphs, synthetic_route_node2smis, synthetic_route_node2molgraphs
示例#17
0
    def load(self):
        # Generate paths
        graphs_path, info_path = tuple((path_saves + x) for x in self.get_dataset_name())

        # Load graphs
        self.graphs, label_dict = load_graphs(graphs_path)
        self.labels = label_dict['labels']

        # Load info
        self.data['typemaps'] = load_info(info_path)['typemaps']
        self.data['coordinates'] = load_info(info_path)['coordinates']
示例#18
0
def get_graph(name, format):
    # global GRAPH_CACHE
    # if name in GRAPH_CACHE:
    #     return GRAPH_CACHE[name].to(format)
    g = None
    if name == 'cora':
        g = dgl.data.CoraGraphDataset(verbose=False)[0]
    elif name == 'pubmed':
        g = dgl.data.PubmedGraphDataset(verbose=False)[0]
    elif name == 'livejournal':
        bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = get_livejournal().formats([format])
            dgl.save_graphs(bin_path, [g])
    elif name == "friendster":
        bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = get_friendster().formats([format])
            dgl.save_graphs(bin_path, [g])
    elif name == "reddit":
        bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format)
        if os.path.exists(bin_path):
            g_list, _ = dgl.load_graphs(bin_path)
            g = g_list[0]
        else:
            g = dgl.data.RedditDataset(self_loop=True)[0].formats([format])
            dgl.save_graphs(bin_path, [g])
    elif name.startswith("ogb"):
        g = get_ogb_graph(name)
    else:
        raise Exception("Unknown dataset")
    # GRAPH_CACHE[name] = g
    g = g.formats([format])
    return g
示例#19
0
def train(args):
    set_random_seed(args.seed)
    device = get_device(args.device)
    data, g, features, labels, predict_ntype, train_idx, val_idx, test_idx, _ = \
        load_data(args.dataset, device)
    add_node_feat(g, 'one-hot')

    (*mgs, pos_g), _ = dgl.load_graphs(args.pos_graph_path)
    mgs = [mg.to(device) for mg in mgs]
    if args.use_data_pos:
        pos_v, pos_u = data.pos
        pos_g = dgl.graph((pos_u, pos_v), device=device)
    pos = torch.zeros((g.num_nodes(predict_ntype), g.num_nodes(predict_ntype)),
                      dtype=torch.int,
                      device=device)
    pos[data.pos] = 1

    model = RHCOFull(
        {ntype: g.nodes[ntype].data['feat'].shape[1]
         for ntype in g.ntypes}, args.num_hidden, data.num_classes,
        args.num_rel_hidden, args.num_heads, g.ntypes,
        g.canonical_etypes, predict_ntype, args.num_layers, args.dropout,
        len(mgs), args.tau, args.lambda_).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                     T_max=args.epochs,
                                                     eta_min=args.lr / 100)
    alpha = args.contrast_weight
    warnings.filterwarnings(
        'ignore', 'Setting attributes on ParameterDict is not supported')
    for epoch in range(args.epochs):
        model.train()
        contrast_loss, logits = model(g, g.ndata['feat'], mgs, features, pos)
        clf_loss = F.cross_entropy(logits[train_idx], labels[train_idx])
        loss = alpha * contrast_loss + (1 - alpha) * clf_loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        torch.cuda.empty_cache()
        print(('Epoch {:d} | Loss {:.4f} | ' + METRICS_STR).format(
            epoch, loss.item(),
            *evaluate(model, g, labels, train_idx, val_idx, test_idx)))

    model.eval()
    _, base_pred = model(g, g.ndata['feat'], mgs, features, pos)
    mask = torch.cat([train_idx, val_idx])
    logits = smooth(base_pred, pos_g, labels, mask, args)
    _, _, test_acc, _, _, test_f1 = calc_metrics(logits, labels, train_idx,
                                                 val_idx, test_idx)
    print('After smoothing: Test Acc {:.4f} | Test Macro-F1 {:.4f}'.format(
        test_acc, test_f1))
示例#20
0
def ConvLabeldGraphLoad(dir='../u_label_plus_10/'):
    graph_list = os.listdir(dir)  # kick DS_Store files -> find cleaner way

    glist = []

    for graph_name in graph_list:

        g, _ = dgl.load_graphs(dir + graph_name)
        g = g[0]

        glist.append(g)

    return glist
示例#21
0
def test_deserialize_old_heterograph_file():
    path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin")
    g_list, label_dict = dgl.load_graphs(path)
    assert g_list[0].idtype == F.int64
    assert g_list[3].idtype == F.int32
    assert np.allclose(F.asnumpy(g_list[2].nodes['user'].data['hh']),
                       np.ones((4, 5)))
    assert np.allclose(F.asnumpy(g_list[5].nodes['user'].data['hh']),
                       np.ones((4, 5)))
    edges = g_list[0]['follows'].edges()
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
    assert F.allclose(label_dict['graph_label'], F.ones(54))
示例#22
0
 def load(self):
     self.graph_list = []
     self.label_list = []
     path = "dataset/homograph/test"
     scenarios = os.listdir(path)
     for scenario in scenarios:
         file_path = path + '/' + scenario
         graphs = os.listdir(file_path)
         for graph in graphs:
             g_list, label_dict = dgl.load_graphs(file_path + '/' + graph)
             self.graph_list.append(g_list[0])
             if scenario == 'normal':
                 self.label_list.append(1)
             else:
                 self.label_list.append(0)
示例#23
0
def load_dgl(graph_path, info_path=None):
    """ Loads saved dgl graphs, labels and other info.

    :param graph_path:
    :param info_path:
    :return:
    """
    # load processed data from directory graph_path
    logger.info(f'Loading graph data from: {graph_path}')
    graphs, label_dict = load_graphs(graph_path)
    labels = label_dict['labels']
    if info_path is not None:
        info = load_info(info_path)['info']
        return graphs, labels, info
    return graphs, labels
示例#24
0
def test_serialize_heterograph_s3():
    path = "s3://dglci-data-test/graph2.bin"
    g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32)
    dgl.save_graphs(path, g_list0)

    g_list = dgl.load_graphs(path, [0, 2, 5])
    assert g_list[0].idtype == F.int64
    #assert g_list[1].restrict_format() == 'csr'
    assert np.allclose(F.asnumpy(g_list[1].nodes['user'].data['hh']),
                       np.ones((4, 5)))
    assert np.allclose(F.asnumpy(g_list[2].nodes['user'].data['hh']),
                       np.ones((4, 5)))
    edges = g_list[0]['follows'].edges()
    assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2]))
    assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3]))
示例#25
0
def test_load_old_files1():
    loadg_list, _ = dgl.load_graphs(
        os.path.join(os.path.dirname(__file__), "data/1.bin"))
    idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load(
        os.path.join(os.path.dirname(__file__), "data/1.npy"),
        allow_pickle=True)

    load_g = loadg_list[idx]
    load_edges = load_g.all_edges('uv', 'eid')

    assert np.allclose(F.asnumpy(load_edges[0]), edge0)
    assert np.allclose(F.asnumpy(load_edges[1]), edge1)
    assert np.allclose(F.asnumpy(load_g.edata['e1']), edata_e1)
    assert np.allclose(F.asnumpy(load_g.edata['e2']), edata_e2)
    assert np.allclose(F.asnumpy(load_g.ndata['n1']), ndata_n1)
示例#26
0
def test_load_old_files2():
    loadg_list, labels0 = dgl.load_graphs(
        os.path.join(os.path.dirname(__file__), "data/2.bin"))
    labels1 = load_labels(os.path.join(os.path.dirname(__file__),
                                       "data/2.bin"))
    idx, edges0, edges1, np_labels = np.load(os.path.join(
        os.path.dirname(__file__), "data/2.npy"),
                                             allow_pickle=True)
    assert np.allclose(F.asnumpy(labels0['label']), np_labels)
    assert np.allclose(F.asnumpy(labels1['label']), np_labels)

    load_g = loadg_list[idx]
    print(load_g)
    load_edges = load_g.all_edges('uv', 'eid')
    assert np.allclose(F.asnumpy(load_edges[0]), edges0)
    assert np.allclose(F.asnumpy(load_edges[1]), edges1)
示例#27
0
 def __getitem__(self, index):
     path = self.paths[index]
     graph_list, label_dict = dgl.load_graphs(path)
     graph = graph_list[0]
     assert torch.sum(torch.isnan(graph.ndata['geometric_feat']['edge'])) == 0, print(path)
     graph.apply_nodes(lambda nodes:
                       {'geometric_feat': (nodes.data['geometric_feat'] - self.mean_node_feat)/self.std_node_feat},
                       ntype='node')
     graph.apply_nodes(lambda nodes:
                       {'geometric_feat': (nodes.data['geometric_feat'] - self.mean_edge_feat)/self.std_edge_feat},
                       ntype='edge')
     graph.apply_nodes(lambda nodes:
                       {'geometric_feat': (nodes.data['geometric_feat'] - self.mean_face_feat)/self.std_face_feat},
                       ntype='face')
     label = graph.ndata['label']
     if self.opt.save_prediction_for_test_files:
         return path, graph, label
     return graph, label
示例#28
0
    def load(self):
        self.graph_list = []
        self.label_list = []

        for child_dir in ['normal', 'attack']:
            homograph = "dataset/homograph/" + child_dir + '/'
            scenarios = os.listdir(homograph)
            for scenario in scenarios:
                file_path = homograph + scenario
                graphs = os.listdir(file_path)
                for graph in graphs:
                    g_list, label_dict = dgl.load_graphs(file_path + '/' +
                                                         graph)
                    self.graph_list.append(g_list[0])
                    for key, value in label_dict.items():
                        if key != 'Drive-by-download':
                            self.label_list.append(0)
                        else:
                            self.label_list.append(1)
示例#29
0
def train(args):
    set_random_seed(args.seed)
    device = get_device(args.device)
    data, _, feat, labels, _, train_idx, val_idx, test_idx, evaluator = \
        load_data(args.dataset, device)
    feat = (feat - feat.mean(dim=0)) / feat.std(dim=0)
    # 标签传播图
    if args.dataset in ('acm', 'dblp'):
        pos_v, pos_u = data.pos
        pg = dgl.graph((pos_u, pos_v), device=device)
    else:
        pg = dgl.load_graphs(args.prop_graph)[0][-1].to(device)

    if args.dataset == 'oag-venue':
        labels[labels == -1] = 0

    base_model = nn.Linear(feat.shape[1], data.num_classes).to(device)
    train_base_model(base_model, feat, labels, train_idx, val_idx, test_idx,
                     evaluator, args)
    correct_and_smooth(base_model, pg, feat, labels, train_idx, val_idx,
                       test_idx, evaluator, args)
示例#30
0
def test_graph_serialize_with_feature(is_hetero):
    num_graphs = 100

    t0 = time.time()

    g_list = construct_graph(num_graphs, is_hetero)

    t1 = time.time()

    # create a temporary file and immediately release it so DGL can open it.
    f = tempfile.NamedTemporaryFile(delete=False)
    path = f.name
    f.close()

    dgl.save_graphs(path, g_list)

    t2 = time.time()
    idx_list = np.random.permutation(np.arange(num_graphs)).tolist()
    loadg_list, _ = dgl.load_graphs(path, idx_list)

    t3 = time.time()
    idx = idx_list[0]
    load_g = loadg_list[0]
    print("Save time: {} s".format(t2 - t1))
    print("Load time: {} s".format(t3 - t2))
    print("Graph Construction time: {} s".format(t1 - t0))

    assert F.allclose(load_g.nodes(), g_list[idx].nodes())

    load_edges = load_g.all_edges('uv', 'eid')
    g_edges = g_list[idx].all_edges('uv', 'eid')
    assert F.allclose(load_edges[0], g_edges[0])
    assert F.allclose(load_edges[1], g_edges[1])
    assert F.allclose(load_g.edata['e1'], g_list[idx].edata['e1'])
    assert F.allclose(load_g.edata['e2'], g_list[idx].edata['e2'])
    assert F.allclose(load_g.ndata['n1'], g_list[idx].ndata['n1'])

    os.unlink(path)