Exemplo n.º 1
0
    def forward(self, g, x=None):
        """Forward pass.

        Parameters
        ----------
        g : `dgl.DGLHeteroGraph`,
            input graph

        Returns
        -------
        g : `dgl.DGLHeteroGraph`
            output graph
        """
        import dgl
        # get homogeneous subgraph
        g_ = dgl.to_homo(g.edge_type_subgraph(["n1_neighbors_n1"]))

        if x is None:
            # get node attributes
            x = g.nodes["n1"].data["h0"]
            x = self.f_in(x)

        # message passing on h**o graph
        x = self._sequential(g_, x)

        # put attribute back in the graph
        g.nodes["n1"].data["h"] = x

        return g
Exemplo n.º 2
0
def construct_homo_from_hetero_dglgraph(g):

    homo_g = dgl.to_homo(g)  # still a heterograph in dgl

    hg = dgl.DGLGraph()
    hg.add_nodes(homo_g.number_of_nodes('_N'))
    u, v = homo_g.all_edges(form='uv', order='eid')
    hg.add_edges(u, v, homo_g.edata)
    hg.readonly(True)  # sampler needs readonly graph
    return hg
Exemplo n.º 3
0
def main(args):
    # load graph data
    if args.dataset == 'aifb':
        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
        dataset = BGSDataset()
    elif args.dataset == 'am':
        dataset = AMDataset()
    else:
        raise ValueError()

    # Load from hetero-graph
    hg = dataset[0]

    num_rels = len(hg.canonical_etypes)
    num_of_ntype = len(hg.ntypes)
    category = dataset.predict_category
    num_classes = dataset.num_classes
    train_mask = hg.nodes[category].data.pop('train_mask')
    test_mask = hg.nodes[category].data.pop('test_mask')
    train_idx = mx.nd.array(np.nonzero(train_mask.asnumpy())[0], dtype='int64')
    test_idx = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], dtype='int64')
    labels = mx.nd.array(hg.nodes[category].data.pop('labels'), dtype='int64')

    # split dataset into train, validate, test
    if args.validation:
        val_idx = train_idx[:len(train_idx) // 5]
        train_idx = train_idx[len(train_idx) // 5:]
    else:
        val_idx = train_idx

    # calculate norm for each edge type and store in edge
    for canonical_etype in hg.canonical_etypes:
        u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
        v = v.asnumpy()
        _, inverse_index, count = np.unique(v,
                                            return_inverse=True,
                                            return_counts=True)
        degrees = count[inverse_index]
        norm = np.ones(eid.shape[0]) / degrees
        hg.edges[canonical_etype].data['norm'] = mx.nd.expand_dims(
            mx.nd.array(norm), axis=1)

    # get target category id
    category_id = len(hg.ntypes)
    for i, ntype in enumerate(hg.ntypes):
        if ntype == category:
            category_id = i

    g = dgl.to_homo(hg)
    num_nodes = g.number_of_nodes()
    node_ids = mx.nd.arange(num_nodes)
    edge_norm = g.edata['norm']
    edge_type = g.edata[dgl.ETYPE]

    # find out the target node ids in g
    node_tids = g.ndata[dgl.NTYPE]
    loc = (node_tids == category_id)
    loc = mx.nd.array(np.nonzero(loc.asnumpy())[0], dtype='int64')
    target_idx = node_ids[loc]

    # since the nodes are featureless, the input feature is then the node id.
    feats = mx.nd.arange(num_nodes, dtype='int32')

    # check cuda
    use_cuda = args.gpu >= 0
    if use_cuda:
        ctx = mx.gpu(args.gpu)
        feats = feats.as_in_context(ctx)
        edge_type = edge_type.as_in_context(ctx)
        edge_norm = edge_norm.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        train_idx = train_idx.as_in_context(ctx)
        g = g.to(ctx)
    else:
        ctx = mx.cpu(0)

    # create model
    model = EntityClassify(num_nodes,
                           args.n_hidden,
                           num_classes,
                           num_rels,
                           num_bases=args.n_bases,
                           num_hidden_layers=args.n_layers - 2,
                           dropout=args.dropout,
                           use_self_loop=args.use_self_loop,
                           gpu_id=args.gpu)
    model.initialize(ctx=ctx)

    # optimizer
    trainer = gluon.Trainer(model.collect_params(), 'adam', {
        'learning_rate': args.lr,
        'wd': args.l2norm
    })
    loss_fcn = gluon.loss.SoftmaxCELoss(from_logits=False)

    # training loop
    print("start training...")
    forward_time = []
    backward_time = []
    for epoch in range(args.n_epochs):
        t0 = time.time()
        with mx.autograd.record():
            pred = model(g, feats, edge_type, edge_norm)
            pred = pred[target_idx]
            loss = loss_fcn(pred[train_idx], labels[train_idx])
        t1 = time.time()
        loss.backward()
        trainer.step(len(train_idx))
        t2 = time.time()

        forward_time.append(t1 - t0)
        backward_time.append(t2 - t1)
        print(
            "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}"
            .format(epoch, forward_time[-1], backward_time[-1]))

        train_acc = F.sum(
            mx.nd.cast(pred[train_idx].argmax(axis=1), 'int64') ==
            labels[train_idx]).asscalar() / train_idx.shape[0]
        val_acc = F.sum(
            mx.nd.cast(pred[val_idx].argmax(
                axis=1), 'int64') == labels[val_idx]).asscalar() / len(val_idx)
        print("Train Accuracy: {:.4f} | Validation Accuracy: {:.4f}".format(
            train_acc, val_acc))
    print()

    logits = model.forward(g, feats, edge_type, edge_norm)
    logits = logits[target_idx]
    test_acc = F.sum(
        mx.nd.cast(logits[test_idx].argmax(
            axis=1), 'int64') == labels[test_idx]).asscalar() / len(test_idx)
    print("Test Accuracy: {:.4f}".format(test_acc))
    print()

    print("Mean forward time: {:4f}".format(
        np.mean(forward_time[len(forward_time) // 4:])))
    print("Mean backward time: {:4f}".format(
        np.mean(backward_time[len(backward_time) // 4:])))
Exemplo n.º 4
0
def main(args, devices):
    # load graph data
    ogb_dataset = False
    if args.dataset == 'aifb':
        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
        dataset = BGSDataset()
    elif args.dataset == 'am':
        dataset = AMDataset()
    else:
        raise ValueError()

    # Load from hetero-graph
    hg = dataset[0]

    num_rels = len(hg.canonical_etypes)
    num_of_ntype = len(hg.ntypes)
    category = dataset.predict_category
    num_classes = dataset.num_classes
    train_mask = hg.nodes[category].data.pop('train_mask')
    test_mask = hg.nodes[category].data.pop('test_mask')
    labels = hg.nodes[category].data.pop('labels')
    train_idx = th.nonzero(train_mask).squeeze()
    test_idx = th.nonzero(test_mask).squeeze()

    # split dataset into train, validate, test
    if args.validation:
        val_idx = train_idx[:len(train_idx) // 5]
        train_idx = train_idx[len(train_idx) // 5:]
    else:
        val_idx = train_idx

    # calculate norm for each edge type and store in edge
    for canonical_etype in hg.canonical_etypes:
        u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
        _, inverse_index, count = th.unique(v,
                                            return_inverse=True,
                                            return_counts=True)
        degrees = count[inverse_index]
        norm = th.ones(eid.shape[0]) / degrees
        norm = norm.unsqueeze(1)
        hg.edges[canonical_etype].data['norm'] = norm

    # get target category id
    category_id = len(hg.ntypes)
    for i, ntype in enumerate(hg.ntypes):
        if ntype == category:
            category_id = i

    g = dgl.to_homo(hg)
    g.ndata[dgl.NTYPE].share_memory_()
    g.edata[dgl.ETYPE].share_memory_()
    g.edata['norm'].share_memory_()
    node_ids = th.arange(g.number_of_nodes())

    # find out the target node ids
    node_tids = g.ndata[dgl.NTYPE]
    loc = (node_tids == category_id)
    target_idx = node_ids[loc]
    target_idx.share_memory_()

    n_gpus = len(devices)
    # cpu
    if devices[0] == -1:
        run(0, 0, args, ['cpu'],
            (g, num_of_ntype, num_classes, num_rels, target_idx, train_idx,
             val_idx, test_idx, labels))
    # gpu
    elif n_gpus == 1:
        run(0, n_gpus, args, devices,
            (g, num_of_ntype, num_classes, num_rels, target_idx, train_idx,
             val_idx, test_idx, labels))
    # multi gpu
    else:
        procs = []
        num_train_seeds = train_idx.shape[0]
        tseeds_per_proc = num_train_seeds // n_gpus
        for proc_id in range(n_gpus):
            proc_train_seeds = train_idx[proc_id * tseeds_per_proc :
                                         (proc_id + 1) * tseeds_per_proc \
                                         if (proc_id + 1) * tseeds_per_proc < num_train_seeds \
                                         else num_train_seeds]
            p = mp.Process(target=run,
                           args=(proc_id, n_gpus, args, devices,
                                 (g, num_of_ntype, num_classes, num_rels,
                                  target_idx, proc_train_seeds, val_idx,
                                  test_idx, labels)))
            p.start()
            procs.append(p)
        for p in procs:
            p.join()
Exemplo n.º 5
0
def test_convert():
    hg = create_test_heterograph()
    hs = []
    for ntype in hg.ntypes:
        h = F.randn((hg.number_of_nodes(ntype), 5))
        hg.nodes[ntype].data['h'] = h
        hs.append(h)
    hg.nodes['user'].data['x'] = F.randn((3, 3))
    ws = []
    for etype in hg.canonical_etypes:
        w = F.randn((hg.number_of_edges(etype), 5))
        hg.edges[etype].data['w'] = w
        ws.append(w)
    hg.edges['plays'].data['x'] = F.randn((4, 3))

    g = dgl.to_homo(hg)
    assert F.array_equal(F.cat(hs, dim=0), g.ndata['h'])
    assert 'x' not in g.ndata
    assert F.array_equal(F.cat(ws, dim=0), g.edata['w'])
    assert 'x' not in g.edata

    src, dst = g.all_edges(order='eid')
    src = F.asnumpy(src)
    dst = F.asnumpy(dst)
    etype_id, eid = F.asnumpy(g.edata[dgl.ETYPE]), F.asnumpy(g.edata[dgl.EID])
    ntype_id, nid = F.asnumpy(g.ndata[dgl.NTYPE]), F.asnumpy(g.ndata[dgl.NID])
    for i in range(g.number_of_edges()):
        srctype = hg.ntypes[ntype_id[src[i]]]
        dsttype = hg.ntypes[ntype_id[dst[i]]]
        etype = hg.etypes[etype_id[i]]
        src_i, dst_i = hg.find_edges([eid[i]], (srctype, etype, dsttype))
        assert np.asscalar(F.asnumpy(src_i)) == nid[src[i]]
        assert np.asscalar(F.asnumpy(dst_i)) == nid[dst[i]]

    mg = nx.MultiDiGraph([('user', 'user', 'follows'),
                          ('user', 'game', 'plays'),
                          ('user', 'game', 'wishes'),
                          ('developer', 'game', 'develops')])

    for _mg in [None, mg]:
        hg2 = dgl.to_hetero(g, ['user', 'game', 'developer'],
                            ['follows', 'plays', 'wishes', 'develops'],
                            ntype_field=dgl.NTYPE,
                            etype_field=dgl.ETYPE,
                            metagraph=_mg)
        assert set(hg.ntypes) == set(hg2.ntypes)
        assert set(hg.canonical_etypes) == set(hg2.canonical_etypes)
        for ntype in hg.ntypes:
            assert hg.number_of_nodes(ntype) == hg2.number_of_nodes(ntype)
            assert F.array_equal(hg.nodes[ntype].data['h'],
                                 hg2.nodes[ntype].data['h'])
        for canonical_etype in hg.canonical_etypes:
            src, dst = hg.all_edges(etype=canonical_etype, order='eid')
            src2, dst2 = hg2.all_edges(etype=canonical_etype, order='eid')
            assert F.array_equal(src, src2)
            assert F.array_equal(dst, dst2)
            assert F.array_equal(hg.edges[canonical_etype].data['w'],
                                 hg2.edges[canonical_etype].data['w'])

    # hetero_from_homo test case 2
    g = dgl.graph([(0, 2), (1, 2), (2, 3), (0, 3)])
    g.ndata[dgl.NTYPE] = F.tensor([0, 0, 1, 2])
    g.edata[dgl.ETYPE] = F.tensor([0, 0, 1, 2])
    hg = dgl.to_hetero(g, ['l0', 'l1', 'l2'], ['e0', 'e1', 'e2'])
    assert set(hg.canonical_etypes) == set([('l0', 'e0', 'l1'),
                                            ('l1', 'e1', 'l2'),
                                            ('l0', 'e2', 'l2')])
    assert hg.number_of_nodes('l0') == 2
    assert hg.number_of_nodes('l1') == 1
    assert hg.number_of_nodes('l2') == 1
    assert hg.number_of_edges('e0') == 2
    assert hg.number_of_edges('e1') == 1
    assert hg.number_of_edges('e2') == 1

    # hetero_from_homo test case 3
    mg = nx.MultiDiGraph([('user', 'movie', 'watches'),
                          ('user', 'TV', 'watches')])
    g = dgl.graph([(0, 1), (0, 2)])
    g.ndata[dgl.NTYPE] = F.tensor([0, 1, 2])
    g.edata[dgl.ETYPE] = F.tensor([0, 0])
    for _mg in [None, mg]:
        hg = dgl.to_hetero(g, ['user', 'TV', 'movie'], ['watches'],
                           metagraph=_mg)
        assert set(hg.canonical_etypes) == set([('user', 'watches', 'movie'),
                                                ('user', 'watches', 'TV')])
        assert hg.number_of_nodes('user') == 1
        assert hg.number_of_nodes('TV') == 1
        assert hg.number_of_nodes('movie') == 1
        assert hg.number_of_edges(('user', 'watches', 'TV')) == 1
        assert hg.number_of_edges(('user', 'watches', 'movie')) == 1
        assert len(hg.etypes) == 2

    # hetero_to_homo test case 2
    hg = dgl.bipartite([(0, 0), (1, 1)], card=(2, 3))
    g = dgl.to_homo(hg)
    assert g.number_of_nodes() == 5
Exemplo n.º 6
0
def main(args, devices):
    # load graph data
    ogb_dataset = False
    if args.dataset == 'aifb':
        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
        dataset = BGSDataset()
    elif args.dataset == 'am':
        dataset = AMDataset()
    elif args.dataset == 'ogbn-mag':
        dataset = DglNodePropPredDataset(name=args.dataset)
        ogb_dataset = True
    else:
        raise ValueError()

    if ogb_dataset is True:
        split_idx = dataset.get_idx_split()
        train_idx = split_idx["train"]['paper']
        val_idx = split_idx["valid"]['paper']
        test_idx = split_idx["test"]['paper']
        hg_orig, labels = dataset[0]
        subgs = {}
        for etype in hg_orig.canonical_etypes:
            u, v = hg_orig.all_edges(etype=etype)
            subgs[etype] = (u, v)
            subgs[(etype[2], 'rev-' + etype[1], etype[0])] = (v, u)
        hg = dgl.heterograph(subgs)
        hg.nodes['paper'].data['feat'] = hg_orig.nodes['paper'].data['feat']
        labels = labels['paper'].squeeze()

        num_rels = len(hg.canonical_etypes)
        num_of_ntype = len(hg.ntypes)
        num_classes = dataset.num_classes
        if args.dataset == 'ogbn-mag':
            category = 'paper'
        print('Number of relations: {}'.format(num_rels))
        print('Number of class: {}'.format(num_classes))
        print('Number of train: {}'.format(len(train_idx)))
        print('Number of valid: {}'.format(len(val_idx)))
        print('Number of test: {}'.format(len(test_idx)))

        if args.node_feats:
            node_feats = []
            for ntype in hg.ntypes:
                if len(hg.nodes[ntype].data) == 0:
                    node_feats.append(None)
                else:
                    assert len(hg.nodes[ntype].data) == 1
                    feat = hg.nodes[ntype].data.pop('feat')
                    node_feats.append(feat.share_memory_())
        else:
            node_feats = [None] * num_of_ntype
    else:
        # Load from hetero-graph
        hg = dataset[0]

        num_rels = len(hg.canonical_etypes)
        num_of_ntype = len(hg.ntypes)
        category = dataset.predict_category
        num_classes = dataset.num_classes
        train_mask = hg.nodes[category].data.pop('train_mask')
        test_mask = hg.nodes[category].data.pop('test_mask')
        labels = hg.nodes[category].data.pop('labels')
        train_idx = th.nonzero(train_mask).squeeze()
        test_idx = th.nonzero(test_mask).squeeze()
        node_feats = [None] * num_of_ntype

        # AIFB, MUTAG, BGS and AM datasets do not provide validation set split.
        # Split train set into train and validation if args.validation is set
        # otherwise use train set as the validation set.
        if args.validation:
            val_idx = train_idx[:len(train_idx) // 5]
            train_idx = train_idx[len(train_idx) // 5:]
        else:
            val_idx = train_idx

    # calculate norm for each edge type and store in edge
    if args.global_norm is False:
        for canonical_etype in hg.canonical_etypes:
            u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
            _, inverse_index, count = th.unique(v,
                                                return_inverse=True,
                                                return_counts=True)
            degrees = count[inverse_index]
            norm = th.ones(eid.shape[0]) / degrees
            norm = norm.unsqueeze(1)
            hg.edges[canonical_etype].data['norm'] = norm

    # get target category id
    category_id = len(hg.ntypes)
    for i, ntype in enumerate(hg.ntypes):
        if ntype == category:
            category_id = i

    g = dgl.to_homo(hg)
    if args.global_norm:
        u, v, eid = g.all_edges(form='all')
        _, inverse_index, count = th.unique(v,
                                            return_inverse=True,
                                            return_counts=True)
        degrees = count[inverse_index]
        norm = th.ones(eid.shape[0]) / degrees
        norm = norm.unsqueeze(1)
        g.edata['norm'] = norm

    g.ndata[dgl.NTYPE].share_memory_()
    g.edata[dgl.ETYPE].share_memory_()
    g.edata['norm'].share_memory_()
    node_ids = th.arange(g.number_of_nodes())

    # find out the target node ids
    node_tids = g.ndata[dgl.NTYPE]
    loc = (node_tids == category_id)
    target_idx = node_ids[loc]
    target_idx.share_memory_()
    train_idx.share_memory_()
    val_idx.share_memory_()
    test_idx.share_memory_()

    n_gpus = len(devices)
    # cpu
    if devices[0] == -1:
        run(0, 0, args, ['cpu'],
            (g, node_feats, num_of_ntype, num_classes, num_rels, target_idx,
             train_idx, val_idx, test_idx, labels), None, None)
    # gpu
    elif n_gpus == 1:
        run(0, n_gpus, args, devices,
            (g, node_feats, num_of_ntype, num_classes, num_rels, target_idx,
             train_idx, val_idx, test_idx, labels), None, None)
    # multi gpu
    else:
        queue = mp.Queue(n_gpus)
        procs = []
        num_train_seeds = train_idx.shape[0]
        num_valid_seeds = val_idx.shape[0]
        num_test_seeds = test_idx.shape[0]
        train_seeds = th.randperm(num_train_seeds)
        valid_seeds = th.randperm(num_valid_seeds)
        test_seeds = th.randperm(num_test_seeds)
        tseeds_per_proc = num_train_seeds // n_gpus
        vseeds_per_proc = num_valid_seeds // n_gpus
        tstseeds_per_proc = num_test_seeds // n_gpus
        for proc_id in range(n_gpus):
            # we have multi-gpu for training, evaluation and testing
            # so split trian set, valid set and test set into num-of-gpu parts.
            proc_train_seeds = train_seeds[proc_id * tseeds_per_proc :
                                           (proc_id + 1) * tseeds_per_proc \
                                           if (proc_id + 1) * tseeds_per_proc < num_train_seeds \
                                           else num_train_seeds]
            proc_valid_seeds = valid_seeds[proc_id * vseeds_per_proc :
                                           (proc_id + 1) * vseeds_per_proc \
                                           if (proc_id + 1) * vseeds_per_proc < num_valid_seeds \
                                           else num_valid_seeds]
            proc_test_seeds = test_seeds[proc_id * tstseeds_per_proc :
                                         (proc_id + 1) * tstseeds_per_proc \
                                         if (proc_id + 1) * tstseeds_per_proc < num_test_seeds \
                                         else num_test_seeds]
            p = mp.Process(target=run,
                           args=(proc_id, n_gpus, args, devices,
                                 (g, node_feats, num_of_ntype, num_classes,
                                  num_rels, target_idx, train_idx, val_idx,
                                  test_idx, labels), (proc_train_seeds,
                                                      proc_valid_seeds,
                                                      proc_test_seeds), queue))
            p.start()
            procs.append(p)
        for p in procs:
            p.join()
Exemplo n.º 7
0
import dgl

if __name__ == "__main__":
    follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
    devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
    hetero_g = dgl.hetero_from_relations([follows_g, devs_g])
    homo_g = dgl.to_homo(hetero_g)

    hetero_g_2 = dgl.to_hetero(homo_g, hetero_g.ntypes, hetero_g.etypes)

    print(hetero_g)
    print(hetero_g_2)
    print("here")
Exemplo n.º 8
0
def construct_dglgraph(data_path,
                       file_prefix='test',
                       emb_path='./emb/',
                       load_version='',
                       h**o=False,
                       datainput='narr argrole temp'):

    labels = json.load(
        open(os.path.join(data_path, file_prefix + '.label.json'), 'r'))

    event_file = os.path.join(
        emb_path, load_version + '.' + file_prefix + '.eventemb.npy')
    if os.access(event_file, os.F_OK):
        entity_file = os.path.join(
            emb_path, load_version + '.' + file_prefix + '.entityemb.npy')
    else:
        event_file = os.path.join(data_path, file_prefix + '.eventemb.npy')
        entity_file = os.path.join(data_path, file_prefix + '.entityemb.npy')
    event_emb = np.load(event_file)
    entity_emb = np.load(entity_file)
    print('Graph Construction: loaded %s and %s' % (event_file, entity_file))

    # construct hetero graph
    dgld = dict()
    dgld[('event', 'event2entity',
          'entity')] = [(t[0], t[1]) for t in labels['event2entity']]
    # dgld[('event', 'event2event', 'event')] = [(t[0], t[1]) for t in labels['event2event']]
    dgld[('event', 'narr', 'event')] = [(t[0], t[1]) for t in labels['narr']]
    dgld[('event', 'coref', 'event')] = [(t[0], t[1]) for t in labels['coref']]
    # dgld[('event', 'eer', 'event')] = [(t[0], t[1]) for t in labels['eer']]
    dgld[('event', 'argrole', 'entity')] = [(t[0], t[1])
                                            for t in labels['argrole']]
    dgld[('event', 'temp', 'event')] = [(t[0], t[1]) for t in labels['temp']]
    dgld[('entity', 'entity2entity',
          'entity')] = [(t[0], t[1]) for t in labels['entity2entity']]
    g = dgl.heterograph(dgld)
    g.nodes['event'].data['x'] = torch.tensor(event_emb)
    g.nodes['entity'].data['x'] = torch.tensor(entity_emb)
    g.nodes['event'].data['y'] = torch.tensor([t[0] for t in labels['event']])
    g.nodes['entity'].data['y'] = torch.tensor(
        [t[0] for t in labels['entity']])
    g.edges['event2entity'].data['y'] = torch.tensor(
        [t[2] for t in labels['event2entity']])
    g.edges['entity2entity'].data['y'] = torch.tensor(
        [t[2] for t in labels['entity2entity']])
    #### add event/entity id ####
    event_cnt = event_emb.shape[0]
    entity_cnt = entity_emb.shape[0]
    g.nodes['event'].data['id'] = torch.tensor(list(range(event_cnt)))
    g.nodes['entity'].data['id'] = torch.tensor(
        [i + event_cnt for i in range(entity_cnt)])

    # event-event relations cotrol by using string variable
    if 'narr' in datainput:
        g.edges['narr'].data['y'] = torch.tensor(
            [t[2] for t in labels['narr']])
    if 'coref' in datainput:
        g.edges['coref'].data['y'] = torch.tensor(
            [t[2] for t in labels['coref']])
    if 'eer' in datainput:
        g.edges['eer'].data['y'] = torch.tensor([t[2] for t in labels['eer']])
    if 'argrole' in datainput:
        g.edges['argrole'].data['y'] = torch.tensor(
            [t[2] for t in labels['argrole']])
    if 'temp' in datainput:
        g.edges['temp'].data['y'] = torch.tensor(
            [t[2] for t in labels['temp']])

    if h**o:
        g.nodes['entity'].data['y'] += type_class_dict['event']
        homo_g = dgl.to_homo(g)
        g = dgl.DGLGraph()
        g.add_nodes(homo_g.number_of_nodes('_N'), homo_g.ndata)
        u, v = homo_g.all_edges(form='uv', order='eid')
        g.add_edges(u, v, homo_g.edata)
        g.readonly(True)  #sampler needs readonly graph

    return g
Exemplo n.º 9
0
def main(args):
    # load graph data
    if args.dataset == 'aifb':
        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
        dataset = BGSDataset()
    elif args.dataset == 'am':
        dataset = AMDataset()
    else:
        raise ValueError()

    # preprocessing in cpu
    with tf.device("/cpu:0"):
        # Load from hetero-graph
        hg = dataset[0]

        num_rels = len(hg.canonical_etypes)
        num_of_ntype = len(hg.ntypes)
        category = dataset.predict_category
        num_classes = dataset.num_classes
        train_mask = hg.nodes[category].data.pop('train_mask')
        test_mask = hg.nodes[category].data.pop('test_mask')
        train_idx = tf.squeeze(tf.where(train_mask))
        test_idx = tf.squeeze(tf.where(test_mask))
        labels = hg.nodes[category].data.pop('labels')

        # split dataset into train, validate, test
        if args.validation:
            val_idx = train_idx[:len(train_idx) // 5]
            train_idx = train_idx[len(train_idx) // 5:]
        else:
            val_idx = train_idx

        # calculate norm for each edge type and store in edge
        for canonical_etype in hg.canonical_etypes:
            u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
            _, inverse_index, count = tf.unique_with_counts(v)
            degrees = tf.gather(count, inverse_index)
            norm = tf.ones(eid.shape[0]) / tf.cast(degrees, tf.float32)
            norm = tf.expand_dims(norm, 1)
            hg.edges[canonical_etype].data['norm'] = norm

        # get target category id
        category_id = len(hg.ntypes)
        for i, ntype in enumerate(hg.ntypes):
            if ntype == category:
                category_id = i

        # edge type and normalization factor
        g = dgl.to_homo(hg)

    # check cuda
    if args.gpu < 0:
        device = "/cpu:0"
        use_cuda = False
    else:
        device = "/gpu:{}".format(args.gpu)
        g = g.to(device)
        use_cuda = True
    num_nodes = g.number_of_nodes()
    node_ids = tf.range(num_nodes, dtype=tf.int64)
    edge_norm = g.edata['norm']
    edge_type = tf.cast(g.edata[dgl.ETYPE], tf.int64)

    # find out the target node ids in g
    node_tids = g.ndata[dgl.NTYPE]
    loc = (node_tids == category_id)
    target_idx = tf.squeeze(tf.where(loc))

    # since the nodes are featureless, the input feature is then the node id.
    feats = tf.range(num_nodes, dtype=tf.int64)

    with tf.device(device):
        # create model
        model = EntityClassify(num_nodes,
                               args.n_hidden,
                               num_classes,
                               num_rels,
                               num_bases=args.n_bases,
                               num_hidden_layers=args.n_layers - 2,
                               dropout=args.dropout,
                               use_self_loop=args.use_self_loop,
                               use_cuda=use_cuda)

        # optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr)
        # training loop
        print("start training...")
        forward_time = []
        backward_time = []
        loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=False)
        for epoch in range(args.n_epochs):
            t0 = time.time()
            with tf.GradientTape() as tape:
                logits = model(g, feats, edge_type, edge_norm)
                logits = tf.gather(logits, target_idx)
                loss = loss_fcn(tf.gather(labels, train_idx),
                                tf.gather(logits, train_idx))
                # Manually Weight Decay
                # We found Tensorflow has a different implementation on weight decay
                # of Adam(W) optimizer with PyTorch. And this results in worse results.
                # Manually adding weights to the loss to do weight decay solves this problem.
                for weight in model.trainable_weights:
                    loss = loss + \
                        args.l2norm * tf.nn.l2_loss(weight)
                t1 = time.time()
                grads = tape.gradient(loss, model.trainable_weights)
                optimizer.apply_gradients(zip(grads, model.trainable_weights))
                t2 = time.time()

            forward_time.append(t1 - t0)
            backward_time.append(t2 - t1)
            print(
                "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}"
                .format(epoch, forward_time[-1], backward_time[-1]))
            train_acc = acc(logits, labels, train_idx)
            val_loss = loss_fcn(tf.gather(labels, val_idx),
                                tf.gather(logits, val_idx))
            val_acc = acc(logits, labels, val_idx)
            print(
                "Train Accuracy: {:.4f} | Train Loss: {:.4f} | Validation Accuracy: {:.4f} | Validation loss: {:.4f}"
                .format(train_acc,
                        loss.numpy().item(), val_acc,
                        val_loss.numpy().item()))
        print()

        logits = model(g, feats, edge_type, edge_norm)
        logits = tf.gather(logits, target_idx)
        test_loss = loss_fcn(tf.gather(labels, test_idx),
                             tf.gather(logits, test_idx))
        test_acc = acc(logits, labels, test_idx)
        print("Test Accuracy: {:.4f} | Test loss: {:.4f}".format(
            test_acc,
            test_loss.numpy().item()))
        print()

        print("Mean forward time: {:4f}".format(
            np.mean(forward_time[len(forward_time) // 4:])))
        print("Mean backward time: {:4f}".format(
            np.mean(backward_time[len(backward_time) // 4:])))
Exemplo n.º 10
0
def load_ogb(dataset, global_norm):
    if dataset == 'ogbn-mag':
        dataset = DglNodePropPredDataset(name=dataset)
        split_idx = dataset.get_idx_split()
        train_idx = split_idx["train"]['paper']
        val_idx = split_idx["valid"]['paper']
        test_idx = split_idx["test"]['paper']
        hg_orig, labels = dataset[0]
        subgs = {}
        for etype in hg_orig.canonical_etypes:
            u, v = hg_orig.all_edges(etype=etype)
            subgs[etype] = (u, v)
            subgs[(etype[2], 'rev-' + etype[1], etype[0])] = (v, u)
        hg = dgl.heterograph(subgs)
        hg.nodes['paper'].data['feat'] = hg_orig.nodes['paper'].data['feat']
        paper_labels = labels['paper'].squeeze()

        num_rels = len(hg.canonical_etypes)
        num_of_ntype = len(hg.ntypes)
        num_classes = dataset.num_classes
        category = 'paper'
        print('Number of relations: {}'.format(num_rels))
        print('Number of class: {}'.format(num_classes))
        print('Number of train: {}'.format(len(train_idx)))
        print('Number of valid: {}'.format(len(val_idx)))
        print('Number of test: {}'.format(len(test_idx)))

        # currently we do not support node feature in mag dataset.
        # calculate norm for each edge type and store in edge
        if global_norm is False:
            for canonical_etype in hg.canonical_etypes:
                u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
                _, inverse_index, count = th.unique(v,
                                                    return_inverse=True,
                                                    return_counts=True)
                degrees = count[inverse_index]
                norm = th.ones(eid.shape[0]) / degrees
                norm = norm.unsqueeze(1)
                hg.edges[canonical_etype].data['norm'] = norm

        # get target category id
        category_id = len(hg.ntypes)
        for i, ntype in enumerate(hg.ntypes):
            if ntype == category:
                category_id = i

        g = dgl.to_homo(hg)
        if global_norm:
            u, v, eid = g.all_edges(form='all')
            _, inverse_index, count = th.unique(v,
                                                return_inverse=True,
                                                return_counts=True)
            degrees = count[inverse_index]
            norm = th.ones(eid.shape[0]) / degrees
            norm = norm.unsqueeze(1)
            g.edata['norm'] = norm

        node_ids = th.arange(g.number_of_nodes())
        # find out the target node ids
        node_tids = g.ndata[dgl.NTYPE]
        loc = (node_tids == category_id)
        target_idx = node_ids[loc]
        train_idx = target_idx[train_idx]
        val_idx = target_idx[val_idx]
        test_idx = target_idx[test_idx]
        train_mask = th.zeros((g.number_of_nodes(), ), dtype=th.bool)
        train_mask[train_idx] = True
        val_mask = th.zeros((g.number_of_nodes(), ), dtype=th.bool)
        val_mask[val_idx] = True
        test_mask = th.zeros((g.number_of_nodes(), ), dtype=th.bool)
        test_mask[test_idx] = True
        g.ndata['train_mask'] = train_mask
        g.ndata['val_mask'] = val_mask
        g.ndata['test_mask'] = test_mask

        labels = th.full((g.number_of_nodes(), ), -1, dtype=paper_labels.dtype)
        labels[target_idx] = paper_labels
        g.ndata['labels'] = labels
        return g
    else:
        raise ("Do not support other ogbn datasets.")
Exemplo n.º 11
0
 def forward(self, g):
     x = g.nodes['atom'].data['h']
     g_sub = dgl.to_homo(g.edge_type_subgraph(['atom_neighbors_atom']))
     x = self.gn(g_sub, x)
     g.nodes['atom'].data['h'] = x
     return g
Exemplo n.º 12
0
 def get_unigraph (self):
     g = self.get_whole_graph()
     return graph.to_homo(g)