Exemplo n.º 1
0
def test_hetero_partition():
    g = create_random_graph(10000)
    g = dgl.as_heterograph(g)
    check_partition(g, 'metis', True)
    check_partition(g, 'metis', False)
    check_partition(g, 'random', True)
    check_partition(g, 'random', False)
Exemplo n.º 2
0
def test_cast():
    m = spsp.coo_matrix(([1, 1], ([0, 1], [1, 2])), (4, 4))
    g = dgl.DGLGraph(m, readonly=True)
    gsrc, gdst = g.edges(order='eid')
    ndata = F.randn((4, 5))
    edata = F.randn((2, 4))
    g.ndata['x'] = ndata
    g.edata['y'] = edata

    hg = dgl.as_heterograph(g, 'A', 'AA')
    assert hg.ntypes == ['A']
    assert hg.etypes == ['AA']
    assert hg.canonical_etypes == [('A', 'AA', 'A')]
    assert hg.number_of_nodes() == 4
    assert hg.number_of_edges() == 2
    hgsrc, hgdst = hg.edges(order='eid')
    assert F.array_equal(gsrc, hgsrc)
    assert F.array_equal(gdst, hgdst)

    g2 = dgl.as_immutable_graph(hg)
    assert g2.number_of_nodes() == 4
    assert g2.number_of_edges() == 2
    g2src, g2dst = hg.edges(order='eid')
    assert F.array_equal(g2src, gsrc)
    assert F.array_equal(g2dst, gdst)
Exemplo n.º 3
0
def test_hetero_metis_partition():
    # TODO(zhengda) Metis fails to partition a small graph.
    g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True)
    g = dgl.as_heterograph(g)
    check_metis_partition(g, 0)
    check_metis_partition(g, 1)
    check_metis_partition(g, 2)
    check_metis_partition_with_constraint(g)
Exemplo n.º 4
0
    def load(cls, filepath, device=None, faiss_gpu=None):
        """Restore a previous instance of this class from disk.

        Args
        ----
        filepath : str 
            path on disk to load from
        device : str
            optionally override the pytorch device
        faiss_gpu : str
            optionally override whether faiss uses gpu"""

        with open(f'{filepath}/initargs.pkl','rb') as pklf:
            (embedding_dim,
            feature_dim,
            hidden_dim,
            hidden_layers,
            dropout,
            agg_type,
            distance,
            torch_device,
            faiss_gpu_loaded,
            inference_batch_size,
            p_train,
            train_faiss_index) = pickle.load(pklf)

        if device is not None:
            torch_device=device

        if faiss_gpu is not None:
            faiss_gpu_loaded = faiss_gpu

        restored_self = cls(embedding_dim,
                            feature_dim,
                            hidden_dim,
                            hidden_layers,
                            dropout,
                            agg_type,
                            distance,
                            torch_device,
                            faiss_gpu_loaded,
                            inference_batch_size,
                            p_train,
                            train_faiss_index)

        restored_self.G,_ = dgl.data.utils.load_graphs(f'{filepath}/dgl.bin')
        restored_self.G = restored_self.G[0]
        restored_self.G.readonly()
        restored_self.G = dgl.as_heterograph(restored_self.G)

        restored_self.node_ids = pd.read_csv(f'{filepath}/node_ids.csv')

        restored_self.embed = th.load(f'{filepath}/embed.torch',map_location=th.device(torch_device))
        restored_self.net.load_state_dict(th.load(f'{filepath}/model_weights.torch',map_location=th.device(torch_device)))
        embeddings = np.load(f'{filepath}/final_embed.npy',allow_pickle=False)
        restored_self._embeddings = embeddings

        return restored_self
Exemplo n.º 5
0
def check_rpc_in_subgraph(tmpdir, num_server):
    ip_config = open("rpc_ip_config.txt", "w")
    for _ in range(num_server):
        ip_config.write('{} 1\n'.format(get_local_usable_addr()))
    ip_config.close()

    g = CitationGraphDataset("cora")[0]
    g.readonly()
    num_parts = num_server

    partition_graph(g,
                    'test_in_subgraph',
                    num_parts,
                    tmpdir,
                    num_hops=1,
                    part_method='metis',
                    reshuffle=False)

    pserver_list = []
    ctx = mp.get_context('spawn')
    for i in range(num_server):
        p = ctx.Process(target=start_server,
                        args=(i, tmpdir, num_server > 1, 'test_in_subgraph'))
        p.start()
        time.sleep(1)
        pserver_list.append(p)

    nodes = [0, 10, 99, 66, 1024, 2008]
    time.sleep(3)
    sampled_graph = start_in_subgraph_client(0, tmpdir, num_server > 1, nodes)
    for p in pserver_list:
        p.join()

    src, dst = sampled_graph.edges()
    g = dgl.as_heterograph(g)
    assert sampled_graph.number_of_nodes() == g.number_of_nodes()
    subg1 = dgl.in_subgraph(g, nodes)
    src1, dst1 = subg1.edges()
    assert np.all(np.sort(F.asnumpy(src)) == np.sort(F.asnumpy(src1)))
    assert np.all(np.sort(F.asnumpy(dst)) == np.sort(F.asnumpy(dst1)))
    eids = g.edge_ids(src, dst)
    assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]),
                          F.asnumpy(eids))
Exemplo n.º 6
0
    def set_masks(self):
        """Sets train, test, and relevance masks. Needs to be called once after data as been added to graph.
        self.train and self.evaluate automatically check if this needs to be called and will call it, but
        it can also be called manually. Can be called a second time manually to reroll the random generation
        of the train and test sets."""

        self.node_ids = self.node_ids.sort_values('intID')
        self.labels = self.node_ids.classid.to_numpy()

        #is relevant mask indicates the nodes which we know the class of
        self.is_relevant_mask = np.logical_not(pd.isna(self.node_ids.classid).to_numpy())

        #entity_mask indicates the nodes which we want to include in the faiss index
        self.entity_mask = np.logical_not(self.node_ids.feature_flag.to_numpy().astype(np.bool))

        self.train_mask =  np.random.choice(
        a=[False,True],size=(len(self.node_ids)),p=[1-self.p_train,self.p_train])

        #test set is all nodes other than the train set unless train set is all
        #nodes and then test set is the same as train set.
        if self.p_train != 1:
            self.test_mask = np.logical_not(self.train_mask)
        else:
            self.test_mask = self.train_mask

        #do not include any node without a classid in either set
        self.train_mask = np.logical_and(self.train_mask,self.is_relevant_mask)
        self.train_mask = np.logical_and(self.train_mask,self.entity_mask)
        self.test_mask = np.logical_and(self.test_mask,self.is_relevant_mask)
        self.test_mask = np.logical_and(self.test_mask,self.entity_mask)

        if not self.G.is_readonly:
            self.embed = nn.Embedding(len(self.node_ids),self.feature_dim)
            self.G.readonly()
            self.G = dgl.as_heterograph(self.G)
            self.G.ndata['features'] = self.embed.weight

        self.features = self.embed.weight
        self.features.to(self.device)
        self.embed.to(self.device)

        self._masks_set = True
Exemplo n.º 7
0
    def load_graph_data(self,filepath):
        """Restore graph data from disk, but not network parameters
        or trained embeddings. Useful for changing network parameters
        if you don't want to reconstruct the graph.

        Args
        ----
        filepath : str
            path to where you saved previous the GraphRecommender
        """

        self.G,_ = dgl.data.utils.load_graphs(f'{filepath}/dgl.bin')
        self.G = restored_self.G[0]
        self.G.readonly()
        self.G = dgl.as_heterograph(restored_self.G)

        self.node_ids = pd.read_csv(f'{filepath}/node_ids.csv')

        self._masks_set = False
        self._embeddings = None 
        self._index = None 
Exemplo n.º 8
0
    argparser.add_argument(
        '--num-workers',
        type=int,
        default=0,
        help="Number of sampling processes. Use 0 for no extra process.")
    argparser.add_argument('--inductive',
                           action='store_true',
                           help="Inductive learning setting")
    args = argparser.parse_args()

    devices = list(map(int, args.gpu.split(',')))
    n_gpus = len(devices)

    g, n_classes = load_reddit()
    # Construct graph
    g = dgl.as_heterograph(g)
    in_feats = g.ndata['features'].shape[1]

    if args.inductive:
        train_g, val_g, test_g = inductive_split(g)
    else:
        train_g = val_g = test_g = g

    prepare_mp(train_g)
    prepare_mp(val_g)
    prepare_mp(test_g)
    # Pack data
    data = in_feats, n_classes, train_g, val_g, test_g

    if n_gpus == 1:
        run(0, n_gpus, args, devices, data)
Exemplo n.º 9
0
Arquivo: main.py Projeto: youhe12/dgl
    labels = labels[:, 0]

    graph.readonly(False)
    _, _, self_e = graph.edge_ids(th.arange(graph.number_of_nodes()),
                                  th.arange(graph.number_of_nodes()),
                                  return_uv=True)
    print('Total edges before adding self-loop {}'.format(
        graph.number_of_edges()))
    # clean partial self-loop edges
    graph.remove_edges(self_e)
    # Add all self-loop edges
    graph.add_edges(th.arange(graph.number_of_nodes()),
                    th.arange(graph.number_of_nodes()))
    print('Total edges after adding self-loop {}'.format(
        graph.number_of_edges()))
    graph.readonly(True)
    graph = dgl.as_heterograph(graph)

    in_feats = graph.ndata['feat'].shape[1]
    n_classes = (labels.max() + 1).item()
    prepare_mp(graph)
    # Pack data
    data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, args.head

    # Run 10 times
    test_accs = []
    for i in range(10):
        test_accs.append(run(args, device, data))
        print('Average test accuracy:', np.mean(test_accs), '±',
              np.std(test_accs))