Exemplo n.º 1
0
def test_cast():
    m = spsp.coo_matrix(([1, 1], ([0, 1], [1, 2])), (4, 4))
    g = dgl.DGLGraph(m, readonly=True)
    gsrc, gdst = g.edges(order='eid')
    ndata = F.randn((4, 5))
    edata = F.randn((2, 4))
    g.ndata['x'] = ndata
    g.edata['y'] = edata

    hg = dgl.as_heterograph(g, 'A', 'AA')
    assert hg.ntypes == ['A']
    assert hg.etypes == ['AA']
    assert hg.canonical_etypes == [('A', 'AA', 'A')]
    assert hg.number_of_nodes() == 4
    assert hg.number_of_edges() == 2
    hgsrc, hgdst = hg.edges(order='eid')
    assert F.array_equal(gsrc, hgsrc)
    assert F.array_equal(gdst, hgdst)

    g2 = dgl.as_immutable_graph(hg)
    assert g2.number_of_nodes() == 4
    assert g2.number_of_edges() == 2
    g2src, g2dst = hg.edges(order='eid')
    assert F.array_equal(g2src, gsrc)
    assert F.array_equal(g2dst, gdst)
Exemplo n.º 2
0
    def add_nodes(self, nodearray, skip_duplicates=False):
        """Define nodes by passing an array (or array like object). Nodes
        can be identified by any data type (even mixed data types), but each
        node must be unique. An exception is raised if all nodes are not unique
        including if the same node is attempted to be added in two calls to this 
        method. Each node is mapped to a unique integer id based on the order
        they are added.

        Args
        ----
        nodearray : numpy array (or array-like object)
            array containing the identifiers of each node to be added
        skip_duplicates : bool
            if true, ignore nodes which have already been added. If False, raise error.
        """
        
        ninputnodes = len(nodearray)
        nodedf = pd.DataFrame(nodearray, columns=['id'])

        if len(nodedf) != len(nodedf.drop_duplicates()):
            raise ValueError('Provided nodeids are not unique. Please pass an array of unique identifiers.')

        nodes_already_exist = nodedf.merge(self.node_ids,on='id',how='inner')
        if len(nodes_already_exist)>0 and not skip_duplicates:
            raise ValueError(
            'Some provided nodes have already been added to the graph. See node_ids.ids.')
        elif len(nodes_already_exist)>0 and skip_duplicates:
            #get rid of the duplicates
            nodes_already_exist['dropflag'] = True 
            nodedf = nodedf.merge(nodes_already_exist,on='id',how='left')
            nodedf['dropflag'] = ~pd.isna(nodedf.dropflag)
            nodedf = nodedf.drop(nodedf[nodedf.dropflag].index)
            nodedf = nodedf[['id']]
            

        current_maximum_id = self.node_ids.intID.max()
        num_new_nodes = len(nodedf)

        start = (current_maximum_id+1)
        if np.isnan(start):
            start = 0
        end = start + num_new_nodes

        nodedf['intID'] = range(start,end)
        nodedf['classid'] = None 
        nodedf['feature_flag'] = False

        self.node_ids = pd.concat([self.node_ids,nodedf])

        self._masks_set = False

        if self.G.is_readonly:
            self.G = dgl.as_immutable_graph(self.G)
            self.G.readonly(False)
        self.G.add_nodes(num_new_nodes)

        self._masks_set = False
        self._embeddings = None 
        self._index = None       
Exemplo n.º 3
0
    def add_edges(self, n1, n2):
        """Adds edges to the DGL graph. Nodes must be previously defined by
        add_nodes or an exception is raised. Edges are directed. To define
        a undirected graph, include both n1->n2 and n2->n1 in the graph.

        Args
        ----
        n1 : numpy array (or array-like object)
            first node in the edge (n1->n2)
        n2 : numpy array (or array-like object)
            second node in the edge (n1->n2)
        """
        edgedf_all = pd.DataFrame(n1,columns=['n1'])
        edgedf_all['n2'] = n2

        chunks = int(max(len(edgedf_all)//MAX_ADD_EDGES,1))
        edgedf_all = np.array_split(edgedf_all, chunks)

        if chunks>1:
            pbar = tqdm.tqdm(total=chunks)

        for i in range(chunks):
            edgedf = edgedf_all.pop()
            edgedf = edgedf.merge(self.node_ids,left_on='n1',right_on='id',how='left')
            edgedf = edgedf.merge(self.node_ids,left_on='n2',right_on='id',how='left',suffixes=('','2'))
            edgedf = edgedf[['intID','intID2']]

            if len(edgedf) != len(edgedf.dropna()):
                raise ValueError('Some edges do not correspond to any known node. Please add with add_nodes method first.')

            if self.G.is_readonly:
                self.G = dgl.as_immutable_graph(self.G)
                self.G.readonly(False)

            self.G.add_edges(edgedf.intID,edgedf.intID2)

            if chunks>1:
                pbar.update(1)

        if chunks>1:
            pbar.close()

        self._masks_set = False
        self._embeddings = None 
        self._index = None     
Exemplo n.º 4
0
    def save(self, filepath):
        """Save all information neccessary to recover current state of the current instance of
        this object to a folder. Initialization args, graph data, node ids, current trained embedding,
        and current torch paramters are all saved.

        Args
        ----
        filepath : str 
            path on disk to save files"""


        outg = dgl.as_immutable_graph(self.G)
        dgl.data.utils.save_graphs(f'{filepath}/dgl.bin',outg)

        self.node_ids.to_csv(f'{filepath}/node_ids.csv',index=False)

        th.save(self.embed,f'{filepath}/embed.torch')
        th.save(self.net.state_dict(),f'{filepath}/model_weights.torch')
        embeddings = self.embeddings
        np.save(f'{filepath}/final_embed.npy',embeddings,allow_pickle=False)

        with open(f'{filepath}/initargs.pkl','wb') as pklf:
            pickle.dump(self.initargs,pklf)