def binary_community_graph(N, k, maxk, mu): """Retruns a binary community graph. """ if sys.platform[0] == "w": args = ["gemben/c_exe/benchm.exe"] fcall = "gemben/c_exe/benchm.exe" else: args = ["gemben/c_exe/benchm"] fcall = "gemben/c_exe/benchm" args.append("-N %d" % N) args.append("-k %d" % k) args.append("-maxk %d" % maxk) args.append("-mu %f" % mu) t1 = time() print(args) try: os.system("%s -N %d -k %d -maxk %d -mu %f" % (fcall, N, k, maxk, mu)) # call(args) except Exception as e: print('ERROR: %s' % str(e)) print('gemben/c_exe/benchm not found. Please compile gf, place benchm in the path and grant executable permission') t2 = time() print('\tTime taken to generate random graph: %f sec' % (t2 - t1)) try: graph = graph_util.loadGraphFromEdgeListTxt('gemben/c_exe/network.dat') node_labels = np.loadtxt('gemben/c_exe/community.dat') except: graph = graph_util.loadGraphFromEdgeListTxt('network.dat') node_labels = np.loadtxt('community.dat') node_labels = node_labels[:, -1].reshape(-1, 1) enc = OneHotEncoder() return graph, enc.fit_transform(node_labels)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() # A = nx.to_scipy_sparse_matrix(graph) # I = sp.eye(graph.number_of_nodes()) # M_g = I - self._beta*A # M_l = self._beta*A A = nx.to_numpy_matrix(graph) if self._sim_fn == "katz": M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A elif self._sim_fn == "pagerank": ## np.matrix can't A = np.array(A) ## in case the sum is 0 row_sums = A.sum(axis=1) + 1e-8 P = A / row_sums[:, np.newaxis] M_g = np.eye(graph.number_of_nodes()) - self._beta * P M_l = (1 - self._beta) * np.eye(graph.number_of_nodes()) elif self._sim_fn == "cn": M_g = np.eye(graph.number_of_nodes()) M_l = np.dot(A, A) elif self._sim_fn == "aa": D = A.sum(axis=1) + A.sum(axis=0) D = np.diag(np.reciprocal(D.astype('float'))) M_g = np.eye(graph.number_of_nodes()) M_l = np.dot(np.dot(A, D), A) else: M_g = np.eye(graph.number_of_nodes()) - self._beta * A M_l = self._beta * A try: S = np.dot(np.linalg.inv(M_g), M_l) u, s, vt = lg.svds(S, k=self._d // 2) X1 = np.dot(u, np.diag(np.sqrt(s))) X2 = np.dot(vt.T, np.diag(np.sqrt(s))) t2 = time() self._X = np.concatenate((X1, X2), axis=1) p_d_p_t = np.dot(u, np.dot(np.diag(s), vt)) eig_err = np.linalg.norm(p_d_p_t - S) print('SVD error (low rank): %f' % eig_err) return self._X, (t2 - t1) except: print( 'Singularity Matrix or SVD did not converge. Assigning random emebdding' ) X1 = np.random.randn(A.shape[0], self._d // 2) X2 = np.random.randn(A.shape[0], self._d // 2) t2 = time() self._X = np.concatenate((X1, X2), axis=1) return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() self._X = np.random.randn(graph.number_of_nodes(), 1) t2 = time() return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): args = ["gem/c_exe/node2vec"] if not graph and not edge_f: raise Exception('graph/edge_f needed') if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graphFileName = 'gem/intermediate/%s_n2v.graph' % self._data_set embFileName = 'gem/intermediate/%s_%d_n2v.emb' % (self._data_set, self._d) try: f = open(graphFileName, 'r') f.close() except IOError: graph_util.saveGraphToEdgeListTxtn2v(graph, graphFileName) args.append("-i:%s" % graphFileName) args.append("-o:%s" % embFileName) args.append("-d:%d" % self._d) args.append("-l:%d" % self._walk_len) args.append("-r:%d" % self._num_walks) args.append("-k:%d" % self._con_size) args.append("-e:%d" % self._max_iter) args.append("-p:%f" % self._ret_p) args.append("-q:%f" % self._inout_p) args.append("-v") args.append("-dr") args.append("-w") t1 = time() try: call(args) except Exception as e: print(str(e)) raise Exception( './node2vec not found. Please compile snap, place node2vec in the path and grant executable permission' ) self._X = graph_util.loadEmbedding(embFileName) t2 = time() call(["rm", embFileName]) return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() A = nx.to_scipy_sparse_matrix(graph) normalize(A, norm='l1', axis=1, copy=False) I_n = sp.eye(graph.number_of_nodes()) I_min_A = I_n - A try: u, s, vt = lg.svds(I_min_A, k=self._d + 1, which='SM') except: u = np.random.randn(A.shape[0], self._d + 1) s = np.random.randn(self._d + 1, self._d + 1) vt = np.random.randn(self._d + 1, A.shape[0]) t2 = time() self._X = vt.T self._X = self._X[:, 1:] return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graph = graph.to_undirected() t1 = time() L_sym = nx.normalized_laplacian_matrix(graph) try: w, v = lg.eigs(L_sym, k=self._d + 1, which='SM') t2 = time() self._X = v[:, 1:] p_d_p_t = np.dot(v, np.dot(np.diag(w), v.T)) eig_err = np.linalg.norm(p_d_p_t - L_sym) print('Laplacian matrix recon. error (low rank): %f' % eig_err) return self._X, (t2 - t1) except: print('SVD did not converge. Assigning random emebdding') self._X = np.random.randn(L_sym.shape[0], self._d) t2 = time() return self._X, (t2 - t1)
self._X = X else: node_num = self._node_num adj_mtx_r = np.zeros((node_num, node_num)) for v_i in range(node_num): for v_j in range(node_num): if v_i == v_j: continue adj_mtx_r[v_i, v_j] = self.get_edge_weight(v_i, v_j) return adj_mtx_r if __name__ == '__main__': # load Zachary's Karate graph edge_f = 'data/karate.edgelist' G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False) G = G.to_directed() res_pre = 'results/testKarate' graph_util.print_graph_stats(G) t1 = time() embedding = HOPE(4, 0.01) embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print('HOPE:\n\tTraining time: %f' % (time() - t1)) viz.plot_embedding2D(embedding.get_embedding()[:, :2], di_graph=G, node_colors=None) plt.show()
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) t1 = time() S = (S + S.T) / 2 self._node_num = graph.number_of_nodes() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter # If cannot use previous step information, initialize new models self._encoder = get_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(2 * self._node_num, ), name='x_in') x1 = Lambda(lambda x: x[:, 0:self._node_num], output_shape=(self._node_num, ))(x_in) x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num], output_shape=(self._node_num, ))(x_in) # Process inputs [x_hat1, y1] = self._autoencoder(x1) [x_hat2, y2] = self._autoencoder(x2) # Outputs x_diff1 = merge([x_hat1, x1], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) x_diff2 = merge([x_hat2, x2], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) y_diff = merge([y2, y1], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains [b, deg] ''' return KBack.sum(KBack.square( y_pred * y_true[:, 0:self._node_num]), axis=-1) / y_true[:, self._node_num] def weighted_mse_y(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains y2 - y1 y_true: Contains s12 ''' min_batch_size = KBack.shape(y_true)[0] return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1), [min_batch_size, 1]) * y_true # Model self._model = Model(input=x_in, output=[x_diff1, x_diff2, y_diff]) sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile( optimizer=sgd, loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y], loss_weights=[1, 1, self._alpha]) history = self._model.fit_generator( generator=batch_generator_sdne(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch, verbose=1, callbacks=[callbacks.TerminateOnNaN()]) loss = history.history['loss'] # Get embedding for all points if loss[-1] == np.inf or np.isnan(loss[-1]): print('Model diverged. Assigning random embeddings') self._Y = np.random.randn(self._node_num, self._d) else: self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) self._node_num = graph.number_of_nodes() t1 = time() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter self._encoder = get_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(self._node_num, ), name='x_in') # Process inputs [x_hat, y] = self._autoencoder(x_in) # Outputs x_diff = Subtract()([x_hat, x_in]) # x_diff = merge([x_hat, x_in], # mode=lambda (a, b): a - b, # output_shape=lambda L: L[1]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains b ''' return KBack.sum(KBack.square(y_true * y_pred), axis=-1) # Model self._model = Model(input=x_in, output=x_diff) # sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile(optimizer=adam, loss=weighted_mse_x) history = self._model.fit_generator( generator=batch_generator_ae(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.shape[0] // self._n_batch, verbose=1, callbacks=[callbacks.TerminateOnNaN()]) loss = history.history['loss'] # Get embedding for all points if loss[0] == np.inf or np.isnan(loss[0]): print('Model diverged. Assigning random embeddings') self._Y = np.random.randn(self._node_num, self._d) else: self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=True): c_flag = True if not graph and not edge_f: raise Exception('graph/edge_f needed') if no_python: if sys.platform[0] == "w": args = ["gem/c_exe/gf.exe"] else: args = ["gem/c_exe/gf"] if not graph and not edge_f: raise Exception('graph/edge_f needed') if edge_f: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) graphFileName = 'gem/intermediate/%s_gf.graph' % self._data_set embFileName = 'gem/intermediate/%s_%d_gf.emb' % (self._data_set, self._d) # try: # f = open(graphFileName, 'r') # f.close() # except IOError: graph_util.saveGraphToEdgeListTxt(graph, graphFileName) args.append(graphFileName) args.append(embFileName) args.append("1") # Verbose args.append("1") # Weighted args.append("%d" % self._d) args.append("%f" % self._eta) args.append("%f" % self._regu) args.append("%d" % self._max_iter) args.append("%d" % self._print_step) t1 = time() try: call(args) except Exception as e: print(str(e)) c_flag = False print( './gf not found. Reverting to Python implementation. Please compile gf, place node2vec in the path and grant executable permission' ) if c_flag: try: self._X = graph_util.loadEmbedding(embFileName) except FileNotFoundError: self._X = np.random.randn(graph.number_of_nodes(), self._d) t2 = time() try: call(["rm", embFileName]) except: pass return self._X, (t2 - t1) if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) t1 = time() self._node_num = graph.number_of_nodes() self._X = 0.01 * np.random.randn(self._node_num, self._d) for iter_id in range(self._max_iter): if not iter_id % self._print_step: [f1, f2, f] = self._get_f_value(graph) print('\t\tIter id: %d, Objective: %g, f1: %g, f2: %g' % (iter_id, f, f1, f2)) for i, j, w in graph.edges(data='weight', default=1): if j <= i: continue term1 = -(w - np.dot(self._X[i, :], self._X[j, :])) * self._X[j, :] term2 = self._regu * self._X[i, :] delPhi = term1 + term2 self._X[i, :] -= self._eta * delPhi t2 = time() return self._X, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) self._node_num = graph.number_of_nodes() t1 = time() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter self._encoder = get_variational_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_variational_autoencoder( self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(self._node_num, ), name='x_in') # Process inputs # [x_hat, y] = self._autoencoder(x_in) [x_hat, y_mean, y_std, y2] = self._autoencoder(x_in) # Outputs x_diff = Subtract()([x_hat, x_in]) # x_diff = merge([x_hat, x_in], # mode=lambda (a, b): a - b, # output_shape=lambda L: L[1]) y_log_var = KBack.log(KBack.square(y_std)) vae_loss = merge( [y_mean, y_std], mode=lambda x: -0.5 * KBack.sum(1 + KBack.log(KBack.square(x[ 1])) - KBack.square(x[0]) - KBack.square(x[1]), axis=-1), output_shape=lambda L: (L[1][0], 1)) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains b ''' return KBack.sum(KBack.square(y_pred * y_true[:, 0:self._node_num]), axis=-1) def weighted_mse_vae(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains KL-divergence y_true: Contains np.zeros(mini_batch) ''' min_batch_size = KBack.shape(y_true)[0] return KBack.mean( # KBack.abs(y_pred), KBack.abs(KBack.reshape(y_pred, [min_batch_size, 1])), axis=-1) # Model self._model = Model(input=x_in, output=[x_diff, vae_loss]) # sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile(optimizer=adam, loss=[weighted_mse_x, weighted_mse_vae], loss_weights=[1, self._beta_vae]) history = self._model.fit_generator( generator=batch_generator_vae(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.shape[0] // self._n_batch, verbose=1, callbacks=[callbacks.TerminateOnNaN()]) loss = history.history['loss'] # Get embedding for all points if loss[0] == np.inf or np.isnan(loss[0]): print('Model diverged. Assigning random embeddings') self._Y = np.random.randn(self._node_num, self._d) else: self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch, meth='vae') submodel_gen = batch_generator_vae(S, self._beta, self._n_batch, True) x = np.concatenate([next(submodel_gen)[0] for _ in range(100)], axis=0) vae_submodel = Model(x_in, self._autoencoder(x_in)) _, _, log_std, _ = vae_submodel.predict(x) mean = np.mean(log_std) std = np.std(log_std) print('log std mean and std') print(mean) print(std) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)