Exemplo n.º 1
0
def test_nx_conversion():
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(attr[k].unsqueeze(0))
            for k in node_feat:
                feat = th.cat(node_feat[k], dim=0)
                assert U.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = attr[k].unsqueeze(0)
            for k in edge_feat:
                feat = th.cat(edge_feat[k], dim=0)
                assert U.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = th.randn(5, 3)
    n2 = th.randn(5, 10)
    n3 = th.randn(5, 4)
    e1 = th.randn(4, 5)
    e2 = th.randn(4, 7)
    g = DGLGraph(multigraph=True)
    g.add_nodes(5)
    g.add_edges([0, 1, 3, 4], [2, 4, 0, 3])
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert U.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert U.allclose(g.edata['e1'], e1)
    assert th.equal(g.get_e_repr()['id'], th.arange(4))

    # test conversion after modifying DGLGraph
    g.pop_e_repr(
        'id')  # pop id so we don't need to provide id when adding edges
    new_n = th.randn(2, 3)
    new_e = th.randn(3, 5)
    g.add_nodes(2, data={'n1': new_n})
    # add three edges, one is a multi-edge
    g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    n1 = th.cat((n1, new_n), dim=0)
    e1 = th.cat((e1, new_e), dim=0)
    # convert to networkx again
    nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    assert len(nxg) == 7
    assert nxg.size() == 7
    _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = DGLGraph(multigraph=True)
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 7
    assert g.number_of_edges() == 7
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert U.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(attr['e1'].unsqueeze(0))
    edge_feat = th.cat(edge_feat, dim=0)
    assert U.allclose(g.edata['e1'], edge_feat)
Exemplo n.º 2
0
    def _load(self):
        """ Loads input dataset from dataset/NAME/NAME.txt file

        """

        print('loading data...')
        with open(self.file, 'r') as f:
            # line_1 == N, total number of graphs
            self.N = int(f.readline().strip())

            for i in range(self.N):
                if (i + 1) % 10 == 0 and self.verbosity is True:
                    print('processing graph {}...'.format(i + 1))

                grow = f.readline().strip().split()
                # line_2 == [n_nodes, l] is equal to
                # [node number of a graph, class label of a graph]
                n_nodes, glabel = [int(w) for w in grow]

                # relabel graphs
                if glabel not in self.glabel_dict:
                    mapped = len(self.glabel_dict)
                    self.glabel_dict[glabel] = mapped

                self.labels.append(self.glabel_dict[glabel])

                g = DGLGraph()
                g.add_nodes(n_nodes)

                nlabels = []  # node labels
                nattrs = []  # node attributes if it has
                m_edges = 0

                for j in range(n_nodes):
                    nrow = f.readline().strip().split()

                    # handle edges and attributes(if has)
                    tmp = int(nrow[1]) + 2  # tmp == 2 + #edges
                    if tmp == len(nrow):
                        # no node attributes
                        nrow = [int(w) for w in nrow]
                        nattr = None
                    elif tmp > len(nrow):
                        nrow = [int(w) for w in nrow[:tmp]]
                        nattr = [float(w) for w in nrow[tmp:]]
                        nattrs.append(nattr)
                    else:
                        raise Exception('edge number is incorrect!')

                    # relabel nodes if it has labels
                    # if it doesn't have node labels, then every nrow[0]==0
                    if not nrow[0] in self.nlabel_dict:
                        mapped = len(self.nlabel_dict)
                        self.nlabel_dict[nrow[0]] = mapped

                    #nlabels.append(self.nlabel_dict[nrow[0]])
                    nlabels.append(nrow[0])

                    m_edges += nrow[1]
                    g.add_edges(j, nrow[2:])

                    # add self loop
                    if self.self_loop:
                        m_edges += 1
                        g.add_edge(j, j)

                    if (j + 1) % 10 == 0 and self.verbosity is True:
                        print(
                            'processing node {} of graph {}...'.format(
                                j + 1, i + 1))
                        print('this node has {} edgs.'.format(
                            nrow[1]))

                if nattrs != []:
                    nattrs = np.stack(nattrs)
                    g.ndata['attr'] = nattrs
                    self.nattrs_flag = True
                else:
                    nattrs = None

                g.ndata['label'] = np.array(nlabels)
                if len(self.nlabel_dict) > 1:
                    self.nlabels_flag = True

                assert len(g) == n_nodes

                # update statistics of graphs
                self.n += n_nodes
                self.m += m_edges

                self.graphs.append(g)

        # if no attr
        if not self.nattrs_flag:
            print('there are no node features in this dataset!')
            label2idx = {}
            # generate node attr by node degree
            if self.degree_as_nlabel:
                print('generate node features by node degree...')
                nlabel_set = set([])
                for g in self.graphs:
                    # actually this label shouldn't be updated
                    # in case users want to keep it
                    # but usually no features means no labels, fine.
                    g.ndata['label'] = g.in_degrees()
                    # extracting unique node labels
                    nlabel_set = nlabel_set.union(set(g.ndata['label'].numpy()))

                nlabel_set = list(nlabel_set)

                # in case the labels/degrees are not continuous number
                self.ndegree_dict = {
                    nlabel_set[i]: i
                    for i in range(len(nlabel_set))
                }
                label2idx = self.ndegree_dict
            # generate node attr by node label
            else:
                print('generate node features by node label...')
                label2idx = self.nlabel_dict

            for g in self.graphs:
                g.ndata['attr'] = np.zeros((
                    g.number_of_nodes(), len(label2idx)))
                g.ndata['attr'][range(g.number_of_nodes(
                )), [label2idx[nl.item()] for nl in g.ndata['label']]] = 1

        # after load, get the #classes and #dim
        self.gclasses = len(self.glabel_dict)
        self.nclasses = len(self.nlabel_dict)
        self.eclasses = len(self.elabel_dict)
        self.dim_nfeats = len(self.graphs[0].ndata['attr'][0])

        print('Done.')
        print(
            """
            -------- Data Statistics --------'
            #Graphs: %d
            #Graph Classes: %d
            #Nodes: %d
            #Node Classes: %d
            #Node Features Dim: %d
            #Edges: %d
            #Edge Classes: %d
            Avg. of #Nodes: %.2f
            Avg. of #Edges: %.2f
            Graph Relabeled: %s
            Node Relabeled: %s
            Degree Relabeled(If degree_as_nlabel=True): %s \n """ % (
                self.N, self.gclasses, self.n, self.nclasses,
                self.dim_nfeats, self.m, self.eclasses,
                self.n / self.N, self.m / self.N, self.glabel_dict,
                self.nlabel_dict, self.ndegree_dict))
Exemplo n.º 3
0
def test_nx_conversion():
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(attr[k].unsqueeze(0))
            for k in node_feat:
                feat = th.cat(node_feat[k], dim=0)
                assert U.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = attr[k].unsqueeze(0)
            for k in edge_feat:
                feat = th.cat(edge_feat[k], dim=0)
                assert U.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = th.randn(5, 3)
    n2 = th.randn(5, 10)
    n3 = th.randn(5, 4)
    e1 = th.randn(4, 5)
    e2 = th.randn(4, 7)
    g = DGLGraph(multigraph=True)
    g.add_nodes(5)
    g.add_edges([0, 1, 3, 4], [2, 4, 0, 3])
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph
    # use id feature to test non-tensor copy
    g.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    assert U.allclose(g.get_n_repr()['n1'], n1)
    assert U.allclose(g.get_e_repr()['e1'], e1)
    assert th.equal(g.get_e_repr()['id'], th.arange(4))

    g.pop_e_repr('id')

    # test modifying DGLGraph
    new_n = th.randn(2, 3)
    new_e = th.randn(3, 5)
    g.add_nodes(2, data={'n1': new_n})
    # add three edges, one is a multi-edge
    g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    n1 = th.cat((n1, new_n), dim=0)
    e1 = th.cat((e1, new_e), dim=0)
    # convert to networkx again
    nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    assert len(nxg) == 7
    assert nxg.size() == 7
    _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})