Exemplo n.º 1
0
    def __init__(self,
                 num_nodes,
                 edges,
                 node_types=None,
                 node_feat=None,
                 edge_feat=None):

        self._g = hpg.HeterGraph(num_nodes, edges, node_types, node_feat,
                                 edge_feat)
        self._multi_graph = self._g._multi_graph
Exemplo n.º 2
0
    def build_graph(self):
        """Build pgl heterogeneous graph.
        """
        self.conf_id2index, self.conf_name2index, conf_node_type = self.remapping_id(
            self.config['data_path'] + 'id_conf.txt',
            start_index=0,
            node_type='conf')
        logging.info('%d venues have been loaded.' % (len(self.conf_id2index)))

        self.author_id2index, self.author_name2index, author_node_type = self.remapping_id(
            self.config['data_path'] + 'id_author.txt',
            start_index=len(self.conf_id2index),
            node_type='author')
        logging.info('%d authors have been loaded.' %
                     (len(self.author_id2index)))

        self.paper_id2index, self.paper_name2index, paper_node_type = self.remapping_id(
            self.config['data_path'] + 'paper.txt',
            start_index=(len(self.conf_id2index) + len(self.author_id2index)),
            node_type='paper',
            separator='\t')
        logging.info('%d papers have been loaded.' %
                     (len(self.paper_id2index)))

        node_types = conf_node_type + author_node_type + paper_node_type
        num_nodes = len(node_types)
        edges_by_types = {}
        paper_author_edges = self.load_edges(
            self.config['data_path'] + 'paper_author.txt', self.paper_id2index,
            self.author_id2index)
        paper_conf_edges = self.load_edges(
            self.config['data_path'] + 'paper_conf.txt', self.paper_id2index,
            self.conf_id2index)

        #  edges_by_types['edge'] = paper_author_edges + paper_conf_edges
        edges_by_types['p2c'] = paper_conf_edges
        edges_by_types['c2p'] = [(dst, src) for src, dst in paper_conf_edges]
        edges_by_types['p2a'] = paper_author_edges
        edges_by_types['a2p'] = [(dst, src) for src, dst in paper_author_edges]

        #  logging.info('%d edges have been loaded.' %
        #               (len(edges_by_types['edge'])))

        node_features = {
            'index':
            np.array([i for i in range(num_nodes)]).reshape(-1,
                                                            1).astype(np.int64)
        }

        self.graph = heter_graph.HeterGraph(num_nodes=num_nodes,
                                            edges=edges_by_types,
                                            node_types=node_types,
                                            node_feat=node_features)
Exemplo n.º 3
0
    def build_graph(self):
        """Build pgl heterogeneous graph.
        """
        edges_by_types = {}
        npy = self.edge_file_list[0][1] + ".npy"
        if os.path.exists(npy):
            log.info("load data from numpy file")

            for pair in self.edge_file_list:
                edges_by_types[pair[0]] = np.load(pair[1] + ".npy")

        else:
            log.info("load data from txt file")
            for pair in self.edge_file_list:
                edges_by_types[pair[0]] = self.load_edges(pair[1])
                #  np.save(pair[1] + ".npy", edges_by_types[pair[0]])

        for e_type, edges in edges_by_types.items():
            log.info(["number of %s edges: " % e_type, len(edges)])

        if self.symmetry:
            tmp = {}
            for key, edges in edges_by_types.items():
                n_list = key.split('2')
                re_key = n_list[1] + '2' + n_list[0]
                tmp[re_key] = edges_by_types[key][:, [1, 0]]
            edges_by_types.update(tmp)

        log.info(["finished loadding symmetry edges."])

        node_types = self.load_node_types(self.node_types_file)

        assert len(node_types) == self.num_nodes, \
                "num_nodes should be equal to the length of node_types"
        log.info(["number of nodes: ", len(node_types)])

        node_features = {
            'index':
            np.array([i for i in range(self.num_nodes)
                      ]).reshape(-1, 1).astype(np.int64)
        }

        self.graph = heter_graph.HeterGraph(num_nodes=self.num_nodes,
                                            edges=edges_by_types,
                                            node_types=node_types,
                                            node_feat=node_features)
Exemplo n.º 4
0
def test_dump():
    np.random.seed(1)
    edges = {}
    # for test no successor
    edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6),
                    (3, 7), (3, 4), (3, 8)]
    edges['p2c'] = [(v, u) for u, v in edges['c2p']]
    edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12),
                    (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
    edges['a2p'] = [(v, u) for u, v in edges['p2a']]

    node_types = ['c' for _ in range(4)] + ['p' for _ in range(6)
                                            ] + ['a' for _ in range(5)]
    node_types = [(i, t) for i, t in enumerate(node_types)]

    graph = heter_graph.HeterGraph(num_nodes=len(node_types),
                                   edges=edges,
                                   node_types=node_types)

    graph.dump("./hetergraph_mmap", outdegree=True)
Exemplo n.º 5
0
    def build_graph(self):
        """Build pgl heterogeneous graph. 
        """
        edge_data_by_type, all_edges, all_nodes = self.load_training_data(
            self.train_edges_file,
            slf_loop=self.config['slf_loop'],
            symmetry_edge=self.config['symmetry_edge'])

        num_nodes = len(all_nodes)
        node_features = {
            'index':
            np.array([i for i in range(num_nodes)],
                     dtype=np.int64).reshape(-1, 1)
        }

        self.graph = heter_graph.HeterGraph(num_nodes=num_nodes,
                                            edges=edge_data_by_type,
                                            node_types=None,
                                            node_feat=node_features)

        self.edge_types = sorted(self.graph.edge_types_info())
        logging.info('total %d nodes are loaded' % (self.graph.num_nodes))
Exemplo n.º 6
0
    def setUpClass(cls):
        np.random.seed(1)
        edges = {}
        # for test no successor
        edges['c2p'] = [(1, 4), (0, 5), (1, 9), (1, 8), (2, 8), (2, 5), (3, 6),
                        (3, 7), (3, 4), (3, 8)]
        edges['p2c'] = [(v, u) for u, v in edges['c2p']]
        edges['p2a'] = [(4, 10), (4, 11), (4, 12), (4, 14), (4, 13), (6, 12),
                        (6, 11), (6, 14), (7, 12), (7, 11), (8, 14), (9, 10)]
        edges['a2p'] = [(v, u) for u, v in edges['p2a']]

        # for test speed
        #  edges['c2p'] = [(0, 4), (0, 5), (1, 9), (1,8), (2,8), (2,5), (3,6), (3,7), (3,4), (3,8)]
        #  edges['p2c'] = [(v,u) for u, v in edges['c2p']]
        #  edges['p2a'] = [(4,10), (4,11), (4,12), (4,14), (5,13), (6,13), (6,11), (6,14), (7,12), (7,11), (8,14), (9,13)]
        #  edges['a2p'] = [(v,u) for u, v in edges['p2a']]

        node_types = ['c' for _ in range(4)] + ['p' for _ in range(6)
                                                ] + ['a' for _ in range(5)]
        node_types = [(i, t) for i, t in enumerate(node_types)]

        cls.graph = heter_graph.HeterGraph(
            num_nodes=len(node_types), edges=edges, node_types=node_types)