Пример #1
0
def test_hetero_partition():
    g = create_random_graph(10000)
    g = dgl.as_heterograph(g)
    check_partition(g, 'metis', True)
    check_partition(g, 'metis', False)
    check_partition(g, 'random', True)
    check_partition(g, 'random', False)
Пример #2
0
def test_cast():
    m = spsp.coo_matrix(([1, 1], ([0, 1], [1, 2])), (4, 4))
    g = dgl.DGLGraph(m, readonly=True)
    gsrc, gdst = g.edges(order='eid')
    ndata = F.randn((4, 5))
    edata = F.randn((2, 4))
    g.ndata['x'] = ndata
    g.edata['y'] = edata

    hg = dgl.as_heterograph(g, 'A', 'AA')
    assert hg.ntypes == ['A']
    assert hg.etypes == ['AA']
    assert hg.canonical_etypes == [('A', 'AA', 'A')]
    assert hg.number_of_nodes() == 4
    assert hg.number_of_edges() == 2
    hgsrc, hgdst = hg.edges(order='eid')
    assert F.array_equal(gsrc, hgsrc)
    assert F.array_equal(gdst, hgdst)

    g2 = dgl.as_immutable_graph(hg)
    assert g2.number_of_nodes() == 4
    assert g2.number_of_edges() == 2
    g2src, g2dst = hg.edges(order='eid')
    assert F.array_equal(g2src, gsrc)
    assert F.array_equal(g2dst, gdst)
Пример #3
0
def test_hetero_metis_partition():
    # TODO(zhengda) Metis fails to partition a small graph.
    g = dgl.DGLGraph(create_large_graph_index(1000), readonly=True)
    g = dgl.as_heterograph(g)
    check_metis_partition(g, 0)
    check_metis_partition(g, 1)
    check_metis_partition(g, 2)
    check_metis_partition_with_constraint(g)
Пример #4
0
    def load(cls, filepath, device=None, faiss_gpu=None):
        """Restore a previous instance of this class from disk.

        Args
        ----
        filepath : str 
            path on disk to load from
        device : str
            optionally override the pytorch device
        faiss_gpu : str
            optionally override whether faiss uses gpu"""

        with open(f'{filepath}/initargs.pkl','rb') as pklf:
            (embedding_dim,
            feature_dim,
            hidden_dim,
            hidden_layers,
            dropout,
            agg_type,
            distance,
            torch_device,
            faiss_gpu_loaded,
            inference_batch_size,
            p_train,
            train_faiss_index) = pickle.load(pklf)

        if device is not None:
            torch_device=device

        if faiss_gpu is not None:
            faiss_gpu_loaded = faiss_gpu

        restored_self = cls(embedding_dim,
                            feature_dim,
                            hidden_dim,
                            hidden_layers,
                            dropout,
                            agg_type,
                            distance,
                            torch_device,
                            faiss_gpu_loaded,
                            inference_batch_size,
                            p_train,
                            train_faiss_index)

        restored_self.G,_ = dgl.data.utils.load_graphs(f'{filepath}/dgl.bin')
        restored_self.G = restored_self.G[0]
        restored_self.G.readonly()
        restored_self.G = dgl.as_heterograph(restored_self.G)

        restored_self.node_ids = pd.read_csv(f'{filepath}/node_ids.csv')

        restored_self.embed = th.load(f'{filepath}/embed.torch',map_location=th.device(torch_device))
        restored_self.net.load_state_dict(th.load(f'{filepath}/model_weights.torch',map_location=th.device(torch_device)))
        embeddings = np.load(f'{filepath}/final_embed.npy',allow_pickle=False)
        restored_self._embeddings = embeddings

        return restored_self
Пример #5
0
def check_rpc_in_subgraph(tmpdir, num_server):
    ip_config = open("rpc_ip_config.txt", "w")
    for _ in range(num_server):
        ip_config.write('{} 1\n'.format(get_local_usable_addr()))
    ip_config.close()

    g = CitationGraphDataset("cora")[0]
    g.readonly()
    num_parts = num_server

    partition_graph(g,
                    'test_in_subgraph',
                    num_parts,
                    tmpdir,
                    num_hops=1,
                    part_method='metis',
                    reshuffle=False)

    pserver_list = []
    ctx = mp.get_context('spawn')
    for i in range(num_server):
        p = ctx.Process(target=start_server,
                        args=(i, tmpdir, num_server > 1, 'test_in_subgraph'))
        p.start()
        time.sleep(1)
        pserver_list.append(p)

    nodes = [0, 10, 99, 66, 1024, 2008]
    time.sleep(3)
    sampled_graph = start_in_subgraph_client(0, tmpdir, num_server > 1, nodes)
    for p in pserver_list:
        p.join()

    src, dst = sampled_graph.edges()
    g = dgl.as_heterograph(g)
    assert sampled_graph.number_of_nodes() == g.number_of_nodes()
    subg1 = dgl.in_subgraph(g, nodes)
    src1, dst1 = subg1.edges()
    assert np.all(np.sort(F.asnumpy(src)) == np.sort(F.asnumpy(src1)))
    assert np.all(np.sort(F.asnumpy(dst)) == np.sort(F.asnumpy(dst1)))
    eids = g.edge_ids(src, dst)
    assert np.array_equal(F.asnumpy(sampled_graph.edata[dgl.EID]),
                          F.asnumpy(eids))
Пример #6
0
    def set_masks(self):
        """Sets train, test, and relevance masks. Needs to be called once after data as been added to graph.
        self.train and self.evaluate automatically check if this needs to be called and will call it, but
        it can also be called manually. Can be called a second time manually to reroll the random generation
        of the train and test sets."""

        self.node_ids = self.node_ids.sort_values('intID')
        self.labels = self.node_ids.classid.to_numpy()

        #is relevant mask indicates the nodes which we know the class of
        self.is_relevant_mask = np.logical_not(pd.isna(self.node_ids.classid).to_numpy())

        #entity_mask indicates the nodes which we want to include in the faiss index
        self.entity_mask = np.logical_not(self.node_ids.feature_flag.to_numpy().astype(np.bool))

        self.train_mask =  np.random.choice(
        a=[False,True],size=(len(self.node_ids)),p=[1-self.p_train,self.p_train])

        #test set is all nodes other than the train set unless train set is all
        #nodes and then test set is the same as train set.
        if self.p_train != 1:
            self.test_mask = np.logical_not(self.train_mask)
        else:
            self.test_mask = self.train_mask

        #do not include any node without a classid in either set
        self.train_mask = np.logical_and(self.train_mask,self.is_relevant_mask)
        self.train_mask = np.logical_and(self.train_mask,self.entity_mask)
        self.test_mask = np.logical_and(self.test_mask,self.is_relevant_mask)
        self.test_mask = np.logical_and(self.test_mask,self.entity_mask)

        if not self.G.is_readonly:
            self.embed = nn.Embedding(len(self.node_ids),self.feature_dim)
            self.G.readonly()
            self.G = dgl.as_heterograph(self.G)
            self.G.ndata['features'] = self.embed.weight

        self.features = self.embed.weight
        self.features.to(self.device)
        self.embed.to(self.device)

        self._masks_set = True
Пример #7
0
    def load_graph_data(self,filepath):
        """Restore graph data from disk, but not network parameters
        or trained embeddings. Useful for changing network parameters
        if you don't want to reconstruct the graph.

        Args
        ----
        filepath : str
            path to where you saved previous the GraphRecommender
        """

        self.G,_ = dgl.data.utils.load_graphs(f'{filepath}/dgl.bin')
        self.G = restored_self.G[0]
        self.G.readonly()
        self.G = dgl.as_heterograph(restored_self.G)

        self.node_ids = pd.read_csv(f'{filepath}/node_ids.csv')

        self._masks_set = False
        self._embeddings = None 
        self._index = None 
Пример #8
0
    argparser.add_argument(
        '--num-workers',
        type=int,
        default=0,
        help="Number of sampling processes. Use 0 for no extra process.")
    argparser.add_argument('--inductive',
                           action='store_true',
                           help="Inductive learning setting")
    args = argparser.parse_args()

    devices = list(map(int, args.gpu.split(',')))
    n_gpus = len(devices)

    g, n_classes = load_reddit()
    # Construct graph
    g = dgl.as_heterograph(g)
    in_feats = g.ndata['features'].shape[1]

    if args.inductive:
        train_g, val_g, test_g = inductive_split(g)
    else:
        train_g = val_g = test_g = g

    prepare_mp(train_g)
    prepare_mp(val_g)
    prepare_mp(test_g)
    # Pack data
    data = in_feats, n_classes, train_g, val_g, test_g

    if n_gpus == 1:
        run(0, n_gpus, args, devices, data)
Пример #9
0
    labels = labels[:, 0]

    graph.readonly(False)
    _, _, self_e = graph.edge_ids(th.arange(graph.number_of_nodes()),
                                  th.arange(graph.number_of_nodes()),
                                  return_uv=True)
    print('Total edges before adding self-loop {}'.format(
        graph.number_of_edges()))
    # clean partial self-loop edges
    graph.remove_edges(self_e)
    # Add all self-loop edges
    graph.add_edges(th.arange(graph.number_of_nodes()),
                    th.arange(graph.number_of_nodes()))
    print('Total edges after adding self-loop {}'.format(
        graph.number_of_edges()))
    graph.readonly(True)
    graph = dgl.as_heterograph(graph)

    in_feats = graph.ndata['feat'].shape[1]
    n_classes = (labels.max() + 1).item()
    prepare_mp(graph)
    # Pack data
    data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, args.head

    # Run 10 times
    test_accs = []
    for i in range(10):
        test_accs.append(run(args, device, data))
        print('Average test accuracy:', np.mean(test_accs), '±',
              np.std(test_accs))