示例#1
0
文件: gf.py 项目: vitamins/GEM
 def learn_embedding(self,
                     graph=None,
                     edge_f=None,
                     is_weighted=False,
                     no_python=False):
     c_flag = True
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if no_python:
         try:
             from c_ext import graphFac_ext
         except ImportError:
             print(
                 'Could not import C++ module for Graph Factorization. Reverting to python implementation. Please recompile graphFac_ext from graphFac.cpp using bjam'
             )
             c_flag = False
         if c_flag:
             if edge_f:
                 graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
             graph_util.saveGraphToEdgeListTxt(graph, 'tempGraph.graph')
             is_weighted = True
             edge_f = 'tempGraph.graph'
             t1 = time()
             graphFac_ext.learn_embedding(edge_f, "tempGraphGF.emb", True,
                                          is_weighted, self._d, self._eta,
                                          self._regu, self._max_iter)
             self._X = graph_util.loadEmbedding('tempGraphGF.emb')
             t2 = time()
             return self._X, (t2 - t1)
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     t1 = time()
     self._node_num = graph.number_of_nodes()
     self._X = 0.01 * np.random.randn(self._node_num, self._d)
     for iter_id in range(self._max_iter):
         if not iter_id % self._print_step:
             [f1, f2, f] = self._get_f_value(graph)
             print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' %
                   (iter_id, f, f1, f2))
         for i, j, w in graph.edges(data='weight', default=1):
             if j <= i:
                 continue
             term1 = -(w -
                       np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :]
             term2 = self._regu * self._X[i, :]
             delPhi = term1 + term2
             self._X[i, :] -= self._eta * delPhi
     t2 = time()
     return self._X, (t2 - t1)
示例#2
0
 def learn_embedding(self, graph=None, edge_f=None,
                     is_weighted=False, no_python=True):
     c_flag = True
     if not graph and not edge_f:
         raise Exception('graph/edge_f needed')
     if no_python:
         if sys.platform[0] == "w":
             args = ["gem/c_exe/gf.exe"]
         else:
             args = ["gem/c_exe/gf"]
         if not graph and not edge_f:
             raise Exception('graph/edge_f needed')
         if edge_f:
             graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
         graphFileName = 'gem/intermediate/%s_gf.graph' % self._data_set
         embFileName = 'gem/intermediate/%s_%d_gf.emb' % (self._data_set, self._d)
         # try:
             # f = open(graphFileName, 'r')
             # f.close()
         # except IOError:
         graph_util.saveGraphToEdgeListTxt(graph, graphFileName)
         args.append(graphFileName)
         args.append(embFileName)
         args.append("1")  # Verbose
         args.append("1")  # Weighted
         args.append("%d" % self._d)
         args.append("%f" % self._eta)
         args.append("%f" % self._regu)
         args.append("%d" % self._max_iter)
         args.append("%d" % self._print_step)
         t1 = time()
         try:
             call(args)
         except Exception as e:
             print(str(e))
             c_flag = False
             print('./gf not found. Reverting to Python implementation. Please compile gf, place node2vec in the path and grant executable permission')
         if c_flag:
             try:
                 self._X = graph_util.loadEmbedding(embFileName)
             except FileNotFoundError:
                 self._X = np.random.randn(len(graph.nodes), self._d)
             t2 = time()
             try:
                 call(["rm", embFileName])
             except:
                 pass
             return self._X, (t2 - t1)
     if not graph:
         graph = graph_util.loadGraphFromEdgeListTxt(edge_f)
     t1 = time()
     self._node_num = len(graph.nodes)
     self._X = 0.01 * np.random.randn(self._node_num, self._d)
     for iter_id in range(self._max_iter):
         if not iter_id % self._print_step:
             [f1, f2, f] = self._get_f_value(graph)
             print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' % (
                 iter_id,
                 f,
                 f1,
                 f2
             ))
         for i, j, w in graph.edges(data='weight', default=1):
             if j <= i:
                 continue
             term1 = -(w - np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :]
             term2 = self._regu * self._X[i, :]
             delPhi = term1 + term2
             self._X[i, :] -= self._eta * delPhi
     t2 = time()
     return self._X, (t2 - t1)