示例#1
0
文件: page_rank.py 项目: kuangliu/dgl
def assign_feature(g: dgl.DGLGraph):
    g.ndata['pv'] = torch.ones(N) / N
    g.ndata['deg'] = g.out_degrees(g.nodes()).float()
示例#2
0
def test_nx_conversion():
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = DGLGraph(multigraph=True)
    g.add_nodes(5)
    g.add_edges([0, 1, 3, 4], [2, 4, 0, 3])
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.get_e_repr()['id'],
                         F.copy_to(F.arange(0, 4), F.cpu()))

    # test conversion after modifying DGLGraph
    g.pop_e_repr(
        'id')  # pop id so we don't need to provide id when adding edges
    new_n = F.randn((2, 3))
    new_e = F.randn((3, 5))
    g.add_nodes(2, data={'n1': new_n})
    # add three edges, one is a multi-edge
    g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    n1 = F.cat((n1, new_n), 0)
    e1 = F.cat((e1, new_e), 0)
    # convert to networkx again
    nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    assert len(nxg) == 7
    assert nxg.size() == 7
    _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = DGLGraph(multigraph=True)
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 7
    assert g.number_of_edges() == 7
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)

    # Test converting from a networkx graph whose nodes are
    # not labeled with consecutive-integers.
    nxg = nx.cycle_graph(5)
    nxg.remove_nodes_from([0, 4])
    for u in nxg.nodes():
        nxg.node[u]['h'] = F.tensor([u])
    for u, v, d in nxg.edges(data=True):
        d['h'] = F.tensor([u, v])

    g = dgl.DGLGraph()
    g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
    assert g.number_of_nodes() == 3
    assert g.number_of_edges() == 4
    assert g.has_edge_between(0, 1)
    assert g.has_edge_between(1, 2)
    assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
    assert F.allclose(g.edata['h'],
                      F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
示例#3
0
def test_recv_0deg():
    # test recv with 0deg nodes;
    g = DGLGraph()
    g.add_nodes(2)
    g.add_edge(0, 1)

    def _message(edges):
        return {'m': edges.src['h']}

    def _reduce(nodes):
        return {'h': nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)}

    def _apply(nodes):
        return {'h': nodes.data['h'] * 2}

    def _init2(shape, dtype, ctx, ids):
        return 2 + F.zeros(shape, dtype, ctx)

    g.register_message_func(_message)
    g.register_reduce_func(_reduce)
    g.register_apply_node_func(_apply)
    g.set_n_initializer(_init2, 'h')
    # test#1: recv both 0deg and non-0deg nodes
    old = F.randn((2, 5))
    g.ndata['h'] = old
    g.send((0, 1))
    g.recv([0, 1])
    new = g.ndata.pop('h')
    # 0deg check: initialized with the func and got applied
    assert F.allclose(new[0], F.full_1d(5, 4, F.float32))
    # non-0deg check
    assert F.allclose(new[1], F.sum(old, 0) * 2)

    # test#2: recv only 0deg node is equal to apply
    old = F.randn((2, 5))
    g.ndata['h'] = old
    g.send((0, 1))
    g.recv(0)
    new = g.ndata.pop('h')
    # 0deg check: equal to apply_nodes
    assert F.allclose(new[0], 2 * old[0])
    # non-0deg check: untouched
    assert F.allclose(new[1], old[1])
示例#4
0
文件: train.py 项目: Victorylcl/dgl
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    g.remove_edges_from(g.selfloop_edges())
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes,
                heads, F.elu, args.in_drop, args.attn_drop, args.alpha,
                args.residual)
    print(model)
    stopper = EarlyStopping(patience=100)
    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        if args.fastmode:
            val_acc = accuracy(logits[val_mask], labels[val_mask])
        else:
            val_acc = evaluate(model, features, labels, val_mask)
            if stopper.step(val_acc, model):
                break

        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |"
              " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format(
                  epoch, np.mean(dur), loss.item(), train_acc, val_acc,
                  n_edges / np.mean(dur) / 1000))

    print()
    model.load_state_dict(torch.load('es_checkpoint.pt'))
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#5
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    features = mx.nd.array(data.features)
    labels = mx.nd.array(data.labels)
    mask = mx.nd.array(np.where(data.train_mask == 1))
    test_mask = mx.nd.array(np.where(data.test_mask == 1))
    val_mask = mx.nd.array(np.where(data.val_mask == 1))
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    if args.gpu < 0:
        ctx = mx.cpu()
    else:
        ctx = mx.gpu(args.gpu)
        features = features.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        mask = mask.as_in_context(ctx)
        test_mask = test_mask.as_in_context(ctx)
        val_mask = val_mask.as_in_context(ctx)
    # create graph
    g = DGLGraph(data.graph)
    # add self-loop
    g.add_edges(g.nodes(), g.nodes())

    # create model
    model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes,
                args.num_heads, elu, args.in_drop, args.attn_drop,
                args.residual)

    model.initialize(ctx=ctx)

    # use optimizer
    trainer = gluon.Trainer(model.collect_params(), 'adam',
                            {'learning_rate': args.lr})

    dur = []
    for epoch in range(args.epochs):
        if epoch >= 3:
            t0 = time.time()
        # forward
        with mx.autograd.record():
            logits = model(features)
            loss = mx.nd.softmax_cross_entropy(logits[mask].squeeze(),
                                               labels[mask].squeeze())
            loss.backward()
        trainer.step(mask.shape[0])

        if epoch >= 3:
            dur.append(time.time() - t0)
        print(
            "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}"
            .format(epoch,
                    loss.asnumpy()[0], np.mean(dur),
                    n_edges / np.mean(dur) / 1000))
        if epoch % 100 == 0:
            val_accuracy = evaluate(model, features, labels, val_mask)
            print("Validation Accuracy {:.4f}".format(val_accuracy))

    test_accuracy = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(test_accuracy))
def train(epoch, train_adj, train_fea, idx_train, idx_val, val_adj=None, val_fea=None, labels=None):
    unsupervised_model.eval()

    if val_adj is None:
        val_adj = train_adj
        val_fea = train_fea

    t = time.time()
    classifier_model.train()

    optimizer.zero_grad()

    # construct g from train adj
    train_edges = train_adj._indices().data.cpu().numpy()
    train_edges = sp.coo_matrix((np.ones(train_edges.shape[1]),
                             (train_edges[0], train_edges[1])),
                            shape=(train_adj.shape[0], train_adj.shape[0]),
                            dtype=np.float32)

    train_g = nx.from_scipy_sparse_matrix(train_edges, create_using=nx.DiGraph())
    train_g = DGLGraph(train_g)

    feats = unsupervised_model(train_fea, train_g)

    output = classifier_model(feats)
    # special for inductive
    if sampler.learning_type == "inductive":
        loss_train = F.nll_loss(output, labels[idx_train])
        acc_train = accuracy(output, labels[idx_train])
    else:
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        acc_train = accuracy(output[idx_train], labels[idx_train])

    loss_train.backward()
    optimizer.step()
    train_t = time.time() - t
    val_t = time.time()
    # We can not apply the fastmode for the coauthor_phy dataset.
    # if sampler.learning_type == "inductive" or not args.fastmode:

    classifier_model.eval()
    if sampler.learning_type=='inductive':
        unsupervised_model.cpu()
        classifier_model.cpu()
        labels = labels.cpu()

    # construct g from val adj
    if sampler.learning_type=='inductive':
        val_edges = val_adj._indices().data.numpy()
    else:
        val_edges = val_adj._indices().data.cpu().numpy()

    val_edges = sp.coo_matrix((np.ones(val_edges.shape[1]),
                             (val_edges[0], val_edges[1])),
                            shape=(val_adj.shape[0], val_adj.shape[0]),
                            dtype=np.float32)

    val_g = nx.from_scipy_sparse_matrix(val_edges, create_using=nx.DiGraph())
    val_g = DGLGraph(val_g)
    
    feats = unsupervised_model(val_fea, val_g)

    output = classifier_model(feats)
    if args.early_stopping > 0 and sampler.learning_type != "inductive":
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        early_stopping(loss_val, classifier_model)

    if not args.fastmode:
        #    # Evaluate validation set performance separately,
        #    # deactivates dropout during validation run.
        loss_val = F.nll_loss(output[idx_val], labels[idx_val]).item()
        acc_val = accuracy(output[idx_val], labels[idx_val]).item()
        if sampler.learning_type == "inductive":
            early_stopping(loss_val, classifier_model)
    else:
        loss_val = 0
        acc_val = 0

    if sampler.learning_type=='inductive':
        unsupervised_model.cuda()
        classifier_model.cuda()
        labels = labels.cuda()

    if args.lradjust:
        scheduler.step()

    val_t = time.time() - val_t
    return (loss_train.item(), acc_train.item(), loss_val, acc_val, get_lr(optimizer), train_t, val_t)
示例#7
0
def main_hin(args):
    '''
        The main function to run. (train / val / test)
    '''
    data_dir = args.data_dir
    dataset_name = args.dataset_name
    assert dataset_name.lower() in ['acm', 'imdb', 'dblp']

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    num_nodes, num_classes, node_features, data_split, adjs, \
    edge_list, edge_type, node_type_i, node_type_j, n_edge_type = \
        data_loader(data_dir, dataset_name, device)

    trn_node, trn_label, val_node, val_label, tst_node, tst_label = data_split
    adj_GTN, adj_graphsage, nx_graph = adjs

    in_feats = node_features.size(1)
    g = nx_graph.to_directed()

    if args.self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    dgl_g = DGLGraph(g)
    n_edges = dgl_g.number_of_edges()
    degs = dgl_g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    norm = norm.to(device)
    dgl_g.ndata['norm'] = norm.unsqueeze(1)

    model_args = {
        "g"         : dgl_g,
        "in_feats"  : in_feats,
        "n_classes" : num_classes,
        "n_hidden"  : args.hidden,
        "dropout"   : args.dropout,
        "activation": F.relu,
    }

    gen_type   = args.gen_type
    post_type  = args.post_type

    gen_config = copy.deepcopy(model_args)
    gen_config["type"]               = gen_type
    gen_config["neg_ratio"]          = args.neg_ratio
    gen_config['hidden_x']           = args.hidden_x
    gen_config['aspect_embed_size']  = args.aspect_embed_size
    gen_config['nx_g']               = g
    gen_config['n_edge_type']        = n_edge_type
    gen_config['n_layers']           = args.n_gnn_layers

    post_config = copy.deepcopy(model_args)
    post_config["type"]              = post_type
    post_config['aspect_embed_size'] = args.aspect_embed_size
    
    if post_type == 'graphsage':
        post_config['n_layers']        = args.n_gnn_layers
        post_config['aggregator_type'] = args.aggregator_type
    elif post_type == 'a2gnn':
        post_config['a2gnn_num_layer'] = args.a2gnn_num_layer
        post_config['args']            = args
    else:
        post_config['n_layers']        = args.n_gnn_layers

    model = GenGNN(gen_config, post_config).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    scheduler = lr_scheduler.MultiStepLR(optimizer, [80, 120, 150], 0.6)

    neib_sampler = NeibSampler(nx_graph, args.n_nb).to(device)

    best_val_macro_f1 = -1
    best_tst_macro_f1 = -1
    best_val_micro_f1 = -1
    best_tst_micro_f1 = -1
    for epoch in range(args.n_epochs):
        is_new_best = False
        model.train()
        aspect_embed, logits = model.cal_post(node_features, neib_sampler)
        post_aspect = F.log_softmax(aspect_embed, dim=1)
        y_log_prob  = F.log_softmax(logits, dim=1)
        nll_generative = model.gen.nll_generative(node_features,
                                                  post_aspect,
                                                  trn_node,
                                                  trn_label)
        mask_rate = args.mask_rate
        ss_loss = model.gen.self_supervised(node_features,
                                            aspect_embed,
                                            edge_type,
                                            mask_rate)
        nll_discriminative = F.nll_loss(y_log_prob[trn_node], trn_label)
        trn_loss = args.lamda * (nll_generative + ss_loss) + nll_discriminative

        trn_logits   = logits[trn_node]
        val_logits   = logits[val_node]
        tst_logits   = logits[tst_node]
        trn_logits_  = trn_logits
        val_logits_  = val_logits
        tst_logits_  = tst_logits

        trn_label_   = trn_label.cpu().numpy()
        val_label_   = val_label.cpu().numpy()
        tst_label_   = tst_label.cpu().numpy()
        trn_pred     = trn_logits_.cpu().detach().numpy().argmax(axis=1)
        val_pred     = val_logits_.cpu().detach().numpy().argmax(axis=1)
        tst_pred     = tst_logits_.cpu().detach().numpy().argmax(axis=1)

        trn_macro_f1 = f1_score(trn_label_, trn_pred, average="macro")
        trn_micro_f1 = f1_score(trn_label_, trn_pred, average="micro")
        val_macro_f1 = f1_score(val_label_, val_pred, average='macro')
        val_micro_f1 = f1_score(val_label_, val_pred, average='micro')
        tst_macro_f1 = f1_score(tst_label_, tst_pred, average="macro")
        tst_micro_f1 = f1_score(tst_label_, tst_pred, average="micro")

        if val_macro_f1 > best_val_macro_f1:
            is_new_best = True
            best_val_macro_f1 = val_macro_f1
            best_tst_macro_f1 = tst_macro_f1
        if val_micro_f1 > best_val_micro_f1:
            is_new_best = True
            best_val_micro_f1 = val_micro_f1
            best_tst_micro_f1 = tst_micro_f1

        optimizer.zero_grad()
        trn_loss.backward()
        optimizer.step()
        scheduler.step()

        if is_new_best:
            cprint('epoch:{:>3d}/{}  trn_loss: {:.5f}  trn_macro_f1: {:.4f} | val_macro_f1: {:.4f} | tst_macro_f1: {:.4f}'.format(
                    epoch, args.n_epochs, trn_loss.item(), trn_macro_f1, val_macro_f1, tst_macro_f1), 'green')
        else:
            print('epoch:{:>3d}/{}  trn_loss: {:.5f}  trn_macro_f1: {:.4f} | val_macro_f1: {:.4f} | tst_macro_f1: {:.4f}'.format(
                   epoch, args.n_epochs, trn_loss.item(), trn_macro_f1, val_macro_f1, tst_macro_f1))

    return best_tst_macro_f1, best_tst_micro_f1
示例#8
0
def load_data(dataset_name,
              splits_file_path=None,
              train_percentage=None,
              val_percentage=None,
              embedding_mode=None,
              embedding_method=None,
              embedding_method_graph=None,
              embedding_method_space=None):
    print("test1")
    if dataset_name in {'cora', 'citeseer', 'pubmed'}:
        adj, features, labels, _, _, _ = utils.load_data(dataset_name)

        labels = np.argmax(labels, axis=-1)
        features = features.todense()

        G = nx.DiGraph(adj)
    else:
        graph_adjacency_list_file_path = os.path.join('new_data', dataset_name,
                                                      'out1_graph_edges.txt')
        graph_node_features_and_labels_file_path = os.path.join(
            'new_data', dataset_name, f'out1_node_feature_label.txt')

        G = nx.DiGraph()
        graph_node_features_dict = {}
        graph_labels_dict = {}

        if dataset_name == 'film':
            with open(graph_node_features_and_labels_file_path
                      ) as graph_node_features_and_labels_file:
                graph_node_features_and_labels_file.readline()
                for line in graph_node_features_and_labels_file:
                    line = line.rstrip().split('\t')
                    assert (len(line) == 3)
                    assert (int(line[0]) not in graph_node_features_dict
                            and int(line[0]) not in graph_labels_dict)
                    feature_blank = np.zeros(932, dtype=np.uint8)
                    feature_blank[np.array(line[1].split(','),
                                           dtype=np.uint16)] = 1
                    graph_node_features_dict[int(line[0])] = feature_blank
                    graph_labels_dict[int(line[0])] = int(line[2])
        else:
            with open(graph_node_features_and_labels_file_path
                      ) as graph_node_features_and_labels_file:
                graph_node_features_and_labels_file.readline()
                for line in graph_node_features_and_labels_file:
                    line = line.rstrip().split('\t')
                    assert (len(line) == 3)
                    assert (int(line[0]) not in graph_node_features_dict
                            and int(line[0]) not in graph_labels_dict)
                    graph_node_features_dict[int(line[0])] = np.array(
                        line[1].split(','), dtype=np.uint8)
                    graph_labels_dict[int(line[0])] = int(line[2])

        with open(graph_adjacency_list_file_path) as graph_adjacency_list_file:
            graph_adjacency_list_file.readline()
            for line in graph_adjacency_list_file:
                line = line.rstrip().split('\t')
                assert (len(line) == 2)
                if int(line[0]) not in G:
                    G.add_node(int(line[0]),
                               features=graph_node_features_dict[int(line[0])],
                               label=graph_labels_dict[int(line[0])])
                if int(line[1]) not in G:
                    G.add_node(int(line[1]),
                               features=graph_node_features_dict[int(line[1])],
                               label=graph_labels_dict[int(line[1])])
                G.add_edge(int(line[0]), int(line[1]))

        adj = nx.adjacency_matrix(G, sorted(G.nodes()))
        features = np.array([
            features for _, features in sorted(G.nodes(data='features'),
                                               key=lambda x: x[0])
        ])
        labels = np.array([
            label
            for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0])
        ])

    features = utils.preprocess_features(features)

    if not embedding_mode:
        #g = DGLGraph(adj)
        print(type(adj))
        g = DGLGraph(adj + sp.eye(adj.shape[0]))
        print(g)
        #g = g.to('cpu')
        print(type(g))
    else:
        if embedding_mode == 'ExperimentTwoAll':
            embedding_file_path = os.path.join(
                'embedding_method_combinations_all',
                f'outf_nodes_relation_{dataset_name}all_embedding_methods.txt')
        elif embedding_mode == 'ExperimentTwoPairs':
            embedding_file_path = os.path.join(
                'embedding_method_combinations_in_pairs',
                f'outf_nodes_relation_{dataset_name}_graph_{embedding_method_graph}_space_{embedding_method_space}.txt'
            )
        else:
            embedding_file_path = os.path.join(
                'structural_neighborhood',
                f'outf_nodes_space_relation_{dataset_name}_{embedding_method}.txt'
            )
        space_and_relation_type_to_idx_dict = {}

        with open(embedding_file_path) as embedding_file:
            for line in embedding_file:
                if line.rstrip() == 'node1,node2	space	relation_type':
                    continue
                line = re.split(r'[\t,]', line.rstrip())
                assert (len(line) == 4)
                assert (int(line[0]) in G and int(line[1]) in G)
                if (line[2], int(
                        line[3])) not in space_and_relation_type_to_idx_dict:
                    space_and_relation_type_to_idx_dict[(line[2], int(
                        line[3]))] = len(space_and_relation_type_to_idx_dict)
                if G.has_edge(int(line[0]), int(line[1])):
                    G.remove_edge(int(line[0]), int(line[1]))
                G.add_edge(int(line[0]),
                           int(line[1]),
                           subgraph_idx=space_and_relation_type_to_idx_dict[(
                               line[2], int(line[3]))])

        space_and_relation_type_to_idx_dict['self_loop'] = len(
            space_and_relation_type_to_idx_dict)
        for node in sorted(G.nodes()):
            if G.has_edge(node, node):
                G.remove_edge(node, node)
            G.add_edge(
                node,
                node,
                subgraph_idx=space_and_relation_type_to_idx_dict['self_loop'])
        adj = nx.adjacency_matrix(G, sorted(G.nodes()))

        g = DGLGraph(adj).to(th.device('cpu'))
        print(type(g))

        for u, v, feature in G.edges(data='subgraph_idx'):
            g.edges[g.edge_id(u, v)].data['subgraph_idx'] = th.tensor(
                [feature]).to(th.device('cpu'))

    if splits_file_path:
        print(type(g))
        with np.load(splits_file_path) as splits_file:
            train_mask = splits_file['train_mask']
            val_mask = splits_file['val_mask']
            test_mask = splits_file['test_mask']
    else:
        assert (train_percentage is not None and val_percentage is not None)
        assert (train_percentage < 1.0 and val_percentage < 1.0
                and train_percentage + val_percentage < 1.0)

        if dataset_name in {'cora', 'citeseer'}:
            disconnected_node_file_path = os.path.join(
                'unconnected_nodes', f'{dataset_name}_unconnected_nodes.txt')
            with open(disconnected_node_file_path) as disconnected_node_file:
                disconnected_node_file.readline()
                disconnected_nodes = []
                for line in disconnected_node_file:
                    line = line.rstrip()
                    disconnected_nodes.append(int(line))

            disconnected_nodes = np.array(disconnected_nodes)
            connected_nodes = np.setdiff1d(np.arange(features.shape[0]),
                                           disconnected_nodes)

            connected_labels = labels[connected_nodes]

            train_and_val_index, test_index = next(
                ShuffleSplit(n_splits=1,
                             train_size=train_percentage +
                             val_percentage).split(
                                 np.empty_like(connected_labels),
                                 connected_labels))
            train_index, val_index = next(
                ShuffleSplit(n_splits=1, train_size=train_percentage).split(
                    np.empty_like(connected_labels[train_and_val_index]),
                    connected_labels[train_and_val_index]))
            train_index = train_and_val_index[train_index]
            val_index = train_and_val_index[val_index]

            train_mask = np.zeros_like(labels)
            train_mask[connected_nodes[train_index]] = 1
            val_mask = np.zeros_like(labels)
            val_mask[connected_nodes[val_index]] = 1
            test_mask = np.zeros_like(labels)
            test_mask[connected_nodes[test_index]] = 1
        else:
            train_and_val_index, test_index = next(
                ShuffleSplit(n_splits=1,
                             train_size=train_percentage +
                             val_percentage).split(np.empty_like(labels),
                                                   labels))
            train_index, val_index = next(
                ShuffleSplit(n_splits=1, train_size=train_percentage).split(
                    np.empty_like(labels[train_and_val_index]),
                    labels[train_and_val_index]))
            train_index = train_and_val_index[train_index]
            val_index = train_and_val_index[val_index]

            train_mask = np.zeros_like(labels)
            train_mask[train_index] = 1
            val_mask = np.zeros_like(labels)
            val_mask[val_index] = 1
            test_mask = np.zeros_like(labels)
            test_mask[test_index] = 1

    num_features = features.shape[1]
    num_labels = len(np.unique(labels))
    assert (np.array_equal(np.unique(labels),
                           np.arange(len(np.unique(labels)))))

    features = th.FloatTensor(features)
    labels = th.LongTensor(labels)
    train_mask = th.BoolTensor(train_mask)
    val_mask = th.BoolTensor(val_mask)
    test_mask = th.BoolTensor(test_mask)

    # Adapted from https://docs.dgl.ai/tutorials/models/1_gnn/1_gcn.html
    print(type(g))
    g.to(th.device('cpu'))
    degs = g.in_degrees().float()
    norm = th.pow(degs, -0.5)
    norm[th.isinf(norm)] = 0
    g.ndata['norm'] = norm.unsqueeze(1)

    return g, features, labels, train_mask, val_mask, test_mask, num_features, num_labels
示例#9
0
def generate_rand_graph(n):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    return DGLGraph(arr, readonly=True)
示例#10
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        print("use cuda:", args.gpu)

    # graph preprocess and calculate normalization factor
    g = DGLGraph(data.graph)
    n_edges = g.number_of_edges()

    # create GraphSAGE model
    model = GraphSAGE(g, in_feats, args.n_hidden, n_classes, args.n_layers,
                      F.relu, args.dropout, args.aggregator_type)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#11
0
文件: dgl_.py 项目: tekdogan/gcn
        features = torch.FloatTensor(data.features)
        labels = torch.LongTensor(data.labels)
        train_mask = torch.BoolTensor(data.train_mask)
        test_mask = torch.BoolTensor(data.test_mask)
        g = DGLGraph(data.graph)
        return g, features, labels, train_mask, test_mask

    data = citegrh.load_pubmed()
    #features = torch.FloatTensor(data.features)
    #g = DGLGraph(data.graph).to(device)

    #dataset = da.CoraGraphDataset()

    device = torch.device('cuda')

    #model = Net()
    model = Net().to(device)

    features = torch.FloatTensor(data.features).to(device)
    g = DGLGraph(data.graph).to(device)

    #data = dataset[0].to(device)

    g = g.to(device)

    out = model(g, features)

    profiler.stop()

    #print(net)
示例#12
0
文件: page_rank.py 项目: kuangliu/dgl
def page_rank_builtin(g: dgl.DGLGraph):
    g.ndata['pv'] = g.ndata['pv'] / g.ndata['deg']
    g.update_all(message_func=fn.copy_src(src='pv', out='m'),
                 reduce_func=fn.sum(msg='m', out='sum'))
    g.ndata['pv'] = (1 - DAMP) / N + DAMP * g.ndata['sum']
示例#13
0
文件: page_rank.py 项目: kuangliu/dgl
def page_rank_level2(g: dgl.DGLGraph):
    g.update_all()
示例#14
0
文件: page_rank.py 项目: kuangliu/dgl
def page_rank_batch(g: dgl.DGLGraph):
    g.send(g.edges())
    g.recv(g.nodes())
示例#15
0
def test_graph7():
    """Graph with categorical node and edge features."""
    g1 = DGLGraph([(0, 1), (0, 2), (1, 2)])
    return g1, torch.LongTensor([0, 1, 0]), torch.LongTensor([2, 3, 4]), \
           torch.LongTensor([0, 0, 1]), torch.LongTensor([2, 3, 2])
示例#16
0
num_layers = 1
num_hidden = 16
infeat_dim = data.features.shape[1]
num_classes = data.num_labels

######################################################################
# Set up the DGL-PyTorch model and get the golden results
# -------------------------------------------------------
#
# The weights are trained with https://github.com/dmlc/dgl/blob/master/examples/pytorch/gcn/train.py
from tvm.contrib.download import download_testdata
from dgl import DGLGraph

features = torch.FloatTensor(data.features)
dgl_g = DGLGraph(g)

torch_model = GCN(dgl_g, infeat_dim, num_hidden, num_classes, num_layers,
                  F.relu)

# Download the pretrained weights
model_url = "https://homes.cs.washington.edu/~cyulin/media/gnn_model/gcn_%s.torch" % (
    dataset)
model_path = download_testdata(model_url,
                               "gcn_%s.pickle" % (dataset),
                               module="gcn_model")

# Load the weights into the model
torch_model.load_state_dict(torch.load(model_path))

######################################################################
示例#17
0
文件: train.py 项目: qiygan/HeteGSL
def train_idgl(args):
    data = load_data(args)
    seed_init(seed=args.seed)
    dev = torch.device("cuda:0" if args.gpu >= 0 else "cpu")

    features = torch.FloatTensor(data.features)
    features = F.normalize(features, p=1, dim=1)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))
    # print(torch.where(test_mask)) # Same train/test split with different init_seed
    features = features.to(dev)
    labels = labels.to(dev)
    train_mask = train_mask.to(dev)
    val_mask = val_mask.to(dev)
    test_mask = test_mask.to(dev)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # create model
    model = IDGL(args, num_feats, n_classes, dev)

    print(model)
    es_checkpoint = 'temp/' + time.strftime('%m-%d %H-%M-%S',
                                            time.localtime()) + '.pt'
    stopper = EarlyStopping(patience=100, path=es_checkpoint)

    model.to(dev)
    adj = g.adjacency_matrix()
    # adj = normalize_adj_torch(adj.to_dense())
    adj = F.normalize(adj.to_dense(), dim=1, p=1)
    adj = adj.to(dev)

    # cla_loss = torch.nn.CrossEntropyLoss()
    cla_loss = torch.nn.NLLLoss()
    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    h = None

    # ! Pretrain
    res_dict = {'parameters': args.__dict__}
    for epoch in range(args.pretrain_epochs):
        logits, _ = model.GCN(features, adj)
        loss = cla_loss(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        # Stops if get annomaly
        with torch.autograd.detect_anomaly():
            loss.backward()
        optimizer.step()
        train_acc = accuracy(logits[train_mask], labels[train_mask])
        val_acc = evaluate(model, features, labels, val_mask, adj)
        test_acc = evaluate(model, features, labels, test_mask, adj)
        print(
            f"Pretrain-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}"
        )
        if args.early_stop > 0:
            if stopper.step(val_acc, model):
                break
    print(f"Pretrain Test Accuracy: {test_acc:.4f}")
    print(f"{'=' * 10}Pretrain finished!{'=' * 10}\n\n")
    if args.early_stop > 0:
        model.load_state_dict(torch.load(es_checkpoint))
    test_acc = evaluate(model, features, labels, test_mask, adj)
    res_dict['res'] = {'pretrain_acc': f'{test_acc:.4f}'}
    # ! Train
    stopper = EarlyStopping(patience=100, path=es_checkpoint)
    for epoch in range(args.max_epoch):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        t, adj_sim_prev = 0, None
        logits, h, adj_sim, adj_feat = model(features,
                                             h=None,
                                             adj_ori=adj,
                                             adj_feat=None,
                                             mode='feat',
                                             norm_graph_reg_loss=args.ngrl)
        loss_adj_feat = cal_loss(args, cla_loss, logits, train_mask, labels,
                                 adj_sim, features)
        loss_list = [loss_adj_feat]
        ori_adj_norm = torch.norm(adj_sim.detach(), p=2)

        while iter_condition(args, adj_sim_prev, adj_sim, ori_adj_norm, t):
            t += 1
            adj_sim_prev = adj_sim.detach()
            logits, h, adj_sim, adj_agg = model(features,
                                                h,
                                                adj,
                                                adj_feat,
                                                mode='emb',
                                                norm_graph_reg_loss=args.ngrl)
            # exists_zero_lines(h)
            loss_adj_emb = cal_loss(args, cla_loss, logits, train_mask, labels,
                                    adj_sim, features)
            loss_list.append(loss_adj_emb)
        loss = torch.mean(torch.stack(loss_list))
        optimizer.zero_grad()

        # Stops if get annomaly
        with torch.autograd.detect_anomaly():
            loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        train_acc = accuracy(logits[train_mask], labels[train_mask])

        val_acc = evaluate(model, features, labels, val_mask, adj)
        test_acc = evaluate(model, features, labels, test_mask, adj)

        # print(
        #     f"Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f}")
        print(
            f"IDGL-Epoch {epoch:05d} | Time(s) {np.mean(dur):.4f} | Loss {loss.item():.4f} | TrainAcc {train_acc:.4f} | ValAcc {val_acc:.4f} | TestAcc {test_acc:.4f}"
        )
        if args.early_stop > 0:
            if stopper.step(val_acc, model):
                break
    if args.early_stop > 0:
        model.load_state_dict(torch.load(es_checkpoint))
    test_acc = evaluate(model, features, labels, test_mask, adj)
    print(f"Test Accuracy {test_acc:.4f}")
    res_dict['res']['IDGL_acc'] = f'{test_acc:.4f}'
    print(res_dict['res'])
    print(res_dict['parameters'])
    return res_dict
示例#18
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().item()
    n_val_samples = val_mask.sum().item()
    n_test_samples = test_mask.sum().item()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes,
              n_train_samples,
              n_val_samples,
              n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    norm = 1. / g.in_degrees().float().unsqueeze(1)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        norm = norm.cuda()

    g.ndata['features'] = features

    num_neighbors = args.num_neighbors

    g.ndata['norm'] = norm

    model = GCNSampling(in_feats,
                        args.n_hidden,
                        n_classes,
                        args.n_layers,
                        F.relu,
                        args.dropout)

    if cuda:
        model.cuda()

    loss_fcn = nn.CrossEntropyLoss()

    infer_model = GCNInfer(in_feats,
                           args.n_hidden,
                           n_classes,
                           args.n_layers,
                           F.relu)

    if cuda:
        infer_model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # Create sampler receiver
    sampler = dgl.contrib.sampling.SamplerReceiver(graph=g, addr=args.ip, num_sender=args.num_sampler)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        for nf in sampler:
            nf.copy_from_parent()
            model.train()
            # forward
            pred = model(nf)
            batch_nids = nf.layer_parent_nid(-1).to(device=pred.device, dtype=torch.long)
            batch_labels = labels[batch_nids]
            loss = loss_fcn(pred, batch_labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        for infer_param, param in zip(infer_model.parameters(), model.parameters()):
            infer_param.data.copy_(param.data)

        num_acc = 0.

        for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_workers=32,
                                                       num_hops=args.n_layers+1,
                                                       seed_nodes=test_nid):
            nf.copy_from_parent()
            infer_model.eval()
            with torch.no_grad():
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1).to(device=pred.device, dtype=torch.long)
                batch_labels = labels[batch_nids]
                num_acc += (pred.argmax(dim=1) == batch_labels).sum().cpu().item()

        print("Test Accuracy {:.4f}". format(num_acc/n_test_samples))
示例#19
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    # graph preprocess and calculate normalization factor
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    # add self loop
    g.add_edges(g.nodes(), g.nodes())
    n_edges = g.number_of_edges()
    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    if cuda:
        norm = norm.cuda()
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                args.dropout)

    if cuda:
        model.cuda()
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc = evaluate(model, features, labels, val_mask)
        print(
            "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
            "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
                                          acc, n_edges / np.mean(dur) / 1000))

    print()
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#20
0
def reversed_graph(g):
    ret = DGLGraph()
    ret.add_nodes(g.number_of_nodes())
    u, v = g.all_edges()
    ret.add_edges(v, u)
    return ret
示例#21
0
文件: gat.py 项目: ruizewang/dgl
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.ByteTensor(data.train_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        mask = mask.cuda()
        val_mask = val_mask.cuda()

    # create DGL graph
    g = DGLGraph(data.graph)
    # add self loop
    g.add_edges(g.nodes(), g.nodes())
    # create model
    model = GAT(g, args.num_layers, in_feats, args.num_hidden, n_classes,
                args.num_heads, F.elu, args.in_drop, args.attn_drop,
                args.residual)
    if cuda:
        model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    dur = []
    begin_time = time.time()
    for epoch in range(args.epochs):
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        logits = model(features)
        logp = F.log_softmax(logits, 1)
        loss = F.nll_loss(logp[mask], labels[mask])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)
        print(
            "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}"
            .format(epoch, loss.item(), np.mean(dur),
                    n_edges / np.mean(dur) / 1000))
        if epoch % 100 == 0:
            acc = evaluate(model, features, labels, val_mask)
            print("Validation Accuracy {:.4f}".format(acc))

    end_time = time.time()
    print((end_time - begin_time) / args.epochs)
    acc = evaluate(model, features, labels, test_mask)
    print("Test Accuracy {:.4f}".format(acc))
示例#22
0
def test_graph2():
    """Batched graph with node features."""
    g1 = DGLGraph([(0, 1), (0, 2), (1, 2)])
    g2 = DGLGraph([(0, 1), (1, 2), (1, 3), (1, 4)])
    bg = dgl.batch([g1, g2])
    return bg, torch.arange(bg.number_of_nodes()).float().reshape(-1, 1)
示例#23
0
def main(args):
    # convert boolean type for args
    assert args.self_loop in ['True', 'False'], [
        "Only True or False for self_loop, get ", args.self_loop
    ]
    assert args.use_layernorm in ['True', 'False'], [
        "Only True or False for use_layernorm, get ", args.use_layernorm
    ]
    self_loop = (args.self_loop == 'True')
    use_layernorm = (args.use_layernorm == 'True')
    global t0
    if args.dataset in {'cora', 'citeseer', 'pubmed'}:
        data = load_data(args)
    else:
        raise NotImplementedError(f'{args.dataset} is not a valid dataset')
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.ByteTensor(data.val_mask)
    test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" % (n_edges, n_classes, train_mask.sum().item(),
                             val_mask.sum().item(), test_mask.sum().item()))
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    features = features.to(device)
    labels = labels.to(device)
    train_mask = train_mask.to(device)
    val_mask = val_mask.to(device)
    test_mask = test_mask.to(device)

    # graph preprocess and calculate normalization factor
    g = data.graph
    # add self loop
    if self_loop:
        g.remove_edges_from(nx.selfloop_edges(g))
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    g = g.to(device)
    n_edges = g.number_of_edges()

    # normalization
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
    norm = norm.to(device)
    g.ndata['norm'] = norm.unsqueeze(1)

    # create GCN model
    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu,
                args.dropout, use_layernorm)
    model = model.to(device)
    loss_fcn = torch.nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # initialize graph
    record = []
    dur = []
    for epoch in range(args.n_epochs):
        if args.lr_scheduler:
            if epoch == int(0.5 * args.n_epochs):
                for pg in optimizer.param_groups:
                    pg['lr'] = pg['lr'] / 10
            elif epoch == int(0.75 * args.n_epochs):
                for pg in optimizer.param_groups:
                    pg['lr'] = pg['lr'] / 10
        model.train()
        if epoch >= 3:
            t0 = time.time()
        # forward
        optimizer.zero_grad()
        logits = model(features)
        loss = loss_fcn(logits[train_mask], labels[train_mask])
        loss.backward()
        optimizer.step()

        if epoch >= 3:
            dur.append(time.time() - t0)

        acc_val = evaluate(model, features, labels, val_mask)
        acc_test = evaluate(model, features, labels, test_mask)
        record.append([acc_val, acc_test])

    all_test_acc = [v[1] for v in record]
    all_val_acc = [v[0] for v in record]
    acc = evaluate(model, features, labels, test_mask)
    print(f"Final Test Accuracy: {acc:.4f}")
    print(f"Best Val Accuracy: {max(all_val_acc):.4f}")
    print(f"Best Test Accuracy: {max(all_test_acc):.4f}")
示例#24
0
def test_graph3():
    """Graph with node features and edge features."""
    g = DGLGraph([(0, 1), (0, 2), (1, 2)])
    return g, torch.arange(g.number_of_nodes()).float().reshape(-1, 1), \
           torch.arange(2 * g.number_of_edges()).float().reshape(-1, 2)
示例#25
0
labels = torch.from_numpy(labels).view(-1)

###############################################################################
# Create graph and model
# ~~~~~~~~~~~~~~~~~~~~~~~

# configurations
n_hidden = 16  # number of hidden units
n_bases = -1  # use number of relations as number of bases
n_hidden_layers = 0  # use 1 input layer, 1 output layer, no hidden layer
n_epochs = 25  # epochs to train
lr = 0.01  # learning rate
l2norm = 0  # L2 norm coefficient

# create graph
g = DGLGraph()
g.add_nodes(num_nodes)
g.add_edges(data.edge_src, data.edge_dst)
g.edata.update({'rel_type': edge_type, 'norm': edge_norm})

# create model
model = Model(len(g),
              n_hidden,
              num_classes,
              num_rels,
              num_bases=n_bases,
              num_hidden_layers=n_hidden_layers)

###############################################################################
# Training loop
# ~~~~~~~~~~~~~~~~
示例#26
0
def test_graph5():
    """Graph with node types and edge distances."""
    g1 = DGLGraph([(0, 1), (0, 2), (1, 2)])
    return g1, torch.LongTensor([0, 1, 0]), torch.randn(3, 1)
示例#27
0
def generate_graph(grad=False):
    g = DGLGraph()
    g.add_nodes(10)  # 10 nodes
    # create a graph where 0 is the source and 9 is the sink
    # 17 edges
    for i in range(1, 9):
        g.add_edge(0, i)
        g.add_edge(i, 9)
    # add a back flow from 9 to 0
    g.add_edge(9, 0)
    ncol = F.randn((10, D))
    ecol = F.randn((17, D))
    if grad:
        ncol = F.attach_grad(ncol)
        ecol = F.attach_grad(ecol)

    g.ndata['h'] = ncol
    g.edata['w'] = ecol
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    return g
示例#28
0
def test_graph6():
    """Batched graph with node types and edge distances."""
    g1 = DGLGraph([(0, 1), (0, 2), (1, 2)])
    g2 = DGLGraph([(0, 1), (1, 2), (1, 3), (1, 4)])
    bg = dgl.batch([g1, g2])
    return bg, torch.LongTensor([0, 1, 0, 2, 0, 3, 4, 4]), torch.randn(7, 1)
示例#29
0
def test_recv_0deg_newfld():
    # test recv with 0deg nodes; the reducer also creates a new field
    g = DGLGraph()
    g.add_nodes(2)
    g.add_edge(0, 1)

    def _message(edges):
        return {'m': edges.src['h']}

    def _reduce(nodes):
        return {'h1': nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)}

    def _apply(nodes):
        return {'h1': nodes.data['h1'] * 2}

    def _init2(shape, dtype, ctx, ids):
        return 2 + F.zeros(shape, dtype=dtype, ctx=ctx)

    g.register_message_func(_message)
    g.register_reduce_func(_reduce)
    g.register_apply_node_func(_apply)
    # test#1: recv both 0deg and non-0deg nodes
    old = F.randn((2, 5))
    g.set_n_initializer(_init2, 'h1')
    g.ndata['h'] = old
    g.send((0, 1))
    g.recv([0, 1])
    new = g.ndata.pop('h1')
    # 0deg check: initialized with the func and got applied
    assert F.allclose(new[0], F.full_1d(5, 4, dtype=F.float32))
    # non-0deg check
    assert F.allclose(new[1], F.sum(old, 0) * 2)

    # test#2: recv only 0deg node
    old = F.randn((2, 5))
    g.ndata['h'] = old
    g.ndata['h1'] = F.full((2, 5), -1, F.int64)  # this is necessary
    g.send((0, 1))
    g.recv(0)
    new = g.ndata.pop('h1')
    # 0deg check: fallback to apply
    assert F.allclose(new[0], F.full_1d(5, -2, F.int64))
    # non-0deg check: not changed
    assert F.allclose(new[1], F.full_1d(5, -1, F.int64))
示例#30
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)
    features = mx.nd.array(data.features)
    labels = mx.nd.array(data.labels)
    train_mask = mx.nd.array(data.train_mask)
    val_mask = mx.nd.array(data.val_mask)
    test_mask = mx.nd.array(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.sum().asscalar(),
           val_mask.sum().asscalar(), test_mask.sum().asscalar()))

    if args.gpu < 0:
        cuda = False
        ctx = mx.cpu(0)
    else:
        cuda = True
        ctx = mx.gpu(args.gpu)

    features = features.as_in_context(ctx)
    labels = labels.as_in_context(ctx)
    train_mask = train_mask.as_in_context(ctx)
    val_mask = val_mask.as_in_context(ctx)
    test_mask = test_mask.as_in_context(ctx)

    # create GCN model
    g = data.graph
    if args.self_loop:
        g.remove_edges_from(g.selfloop_edges())
        g.add_edges_from(zip(g.nodes(), g.nodes()))
    g = DGLGraph(g)
    # normalization
    degs = g.in_degrees().astype('float32')
    norm = mx.nd.power(degs, -0.5)
    if cuda:
        norm = norm.as_in_context(ctx)
    g.ndata['norm'] = mx.nd.expand_dims(norm, 1)

    model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers,
                mx.nd.relu, args.dropout)
    model.initialize(ctx=ctx)
    n_train_samples = train_mask.sum().asscalar()
    loss_fcn = gluon.loss.SoftmaxCELoss()

    # use optimizer
    print(model.collect_params())
    trainer = gluon.Trainer(model.collect_params(), 'adam', {
        'learning_rate': args.lr,
        'wd': args.weight_decay
    })

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        if epoch >= 3:
            t0 = time.time()
        # forward
        with mx.autograd.record():
            pred = model(features)
            loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1))
            loss = loss.sum() / n_train_samples

        loss.backward()
        trainer.step(batch_size=1)

        if epoch >= 3:
            loss.asscalar()
            dur.append(time.time() - t0)
            acc = evaluate(model, features, labels, val_mask)
            print(
                "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
                "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur),
                                              loss.asscalar(), acc,
                                              n_edges / np.mean(dur) / 1000))

    # test set accuracy
    acc = evaluate(model, features, labels, test_mask)
    print("Test accuracy {:.2%}".format(acc))