def learn_embeddings(self): model = GraphFactorization(d=self.dim, max_iter=10000, eta=1 * 10**-4, regu=1.0, data_set='ds') model.learn_embedding(self.graph) self.embeddings = model.get_embedding()
def main(data_set_name): dimensions = 4 input_file = './graph/' + data_set_name + '.tsv' output_file = './emb/' + data_set_name + '.emb' # Instatiate the embedding method with hyperparameters graph_factorization = GraphFactorization(dimensions, 1000, 1 * 10**-4, 1.0) # Load graph graph = graph_util.loadGraphFromEdgeListTxt(input_file) # Learn embedding - accepts a networkx graph or file with edge list embeddings_array, t = graph_factorization.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True) embeddings = pandas.DataFrame(embeddings_array) embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
def graphFactorization(netData, **kwargs): d = kwargs.get('d', 2) max_iter = kwargs.get('max_iter', 100000) eta = kwargs.get('eta', 1 * 10**-4) regu = kwargs.get('regu', 1.0) from gem.embedding.gf import GraphFactorization emb = GraphFactorization(d=d, max_iter=max_iter, eta=eta, regu=regu) return attMethods.GEMexport(netData, emb)
def main(args): # Load edgelist G = graph_util.loadGraphFromEdgeListTxt(args.input, directed=args.directed) G = G.to_directed() # Preprocess the graph # G, _ = prep_graph(G) if args.method == 'gf': # GF takes embedding dimension (d), maximum iterations (max_iter), learning rate (eta), # regularization coefficient (regu) as inputs model = GraphFactorization(d=args.dimension, max_iter=args.max_iter, eta=args.eta, regu=args.regu) elif args.method == 'hope': # HOPE takes embedding dimension (d) and decay factor (beta) as inputs model = HOPE(d=args.dimension, beta=args.beta) elif args.method == 'lap': # LE takes embedding dimension (d) as input model = LaplacianEigenmaps(d=args.dimension) elif args.method == 'lle': # LLE takes embedding dimension (d) as input model = LocallyLinearEmbedding(d=args.dimension) elif args.method == 'sdne': encoder_layer_list = ast.literal_eval(args.encoder_list) # SDNE takes embedding dimension (d), seen edge reconstruction weight (beta), first order proximity weight # (alpha), lasso regularization coefficient (nu1), ridge regreesion coefficient (nu2), number of hidden layers # (K), size of each layer (n_units), number of iterations (n_ite), learning rate (xeta), size of batch (n_batch) # location of modelfile and weightfile save (modelfile and weightfile) as inputs model = SDNE(d=args.dimension, beta=args.beta, alpha=args.alpha, nu1=args.nu1, nu2=args.nu2, K=len(encoder_layer_list), n_units=encoder_layer_list, n_iter=args.max_iter, xeta=args.learning_rate, n_batch=args.bs) # , modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']) else: raise ValueError('The requested method does not exist!') # Learn the node embeddings Y, t = model.learn_embedding(graph=G, edge_f=None, is_weighted=args.weighted, no_python=True) Z = np.real_if_close(Y, tol=1000) # Save the node embeddings to a file np.savetxt(args.output, Z, delimiter=',', fmt='%f')
def train_GF_model(graph, dim=2, reg=0.0, max_iter=50000, eta=1 * 10**-4): """ Load and train the Graph Factorization model """ embedding = GraphFactorization(d=dim, max_iter=max_iter, eta=eta, regu=reg) Y, t = (embedding .learn_embedding(graph=graph, edge_f=None, is_weighted=False, no_python=False)) return (embedding, Y)
#from gem.embedding.sdne import SDNE # File that contains the edges. Format: source target # Optionally, you can add weights as third column: source target weight edge_f = 'data/karate.edgelist' # Specify whether the edges are directed isDirected = True # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected) G = G.to_directed() models = [] # You can comment out the methods you don't want to run models.append( GraphFactorization(d=2, max_iter=100000, eta=1 * 10**-4, regu=1.0)) models.append(HOPE(d=4, beta=0.01)) models.append(LaplacianEigenmaps(d=2)) models.append(LocallyLinearEmbedding(d=2)) #models.append(node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) #models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500, # modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], # weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None,
# Optionally, you can add weights as third column: source target weight edge_f = 'examples/data/karate.edgelist' # Specify whether the edges are directed isDirected = True # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected) G = G.to_directed() print(G.nodes) models = [] # Load the models you want to run models.append(GraphFactorization(d=2, max_iter=50000, eta=1 * 10**-4, regu=1.0, data_set='karate')) models.append(HOPE(d=4, beta=0.01)) models.append(LaplacianEigenmaps(d=2)) models.append(LocallyLinearEmbedding(d=2)) if run_n2v: models.append( node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1) ) models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=100, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) # For each model, learn the embedding and evaluate on graph reconstruction and visualization for embedding in models: print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time()
from gem.embedding.node2vec import node2vec from gem.embedding.sdne import SDNE # File that contains the edges. Format: source target # Optionally, you can add weights as third column: source target weight edge_f = 'data/karate.edgelist' # Specify whether the edges are directed isDirected = True # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected) G = G.to_directed() models = [] # Load the models you want to run models.append(GraphFactorization(d=2, max_iter=50000, eta=1 * 10**-4, regu=1.0)) models.append(HOPE(d=4, beta=0.01)) models.append(LaplacianEigenmaps(d=2)) models.append(LocallyLinearEmbedding(d=2)) models.append( node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) models.append( SDNE(d=2, beta=5, alpha=1e-5,
from gem.embedding.node2vec import node2vec from gem.embedding.sdne import SDNE # File that contains the edges. Format: source target # Optionally, you can add weights as third column: source target weight edge_f = './gem/data/karate.edgelist' # Specify whether the edges are directed isDirected = True # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected) G = G.to_directed() models = [] # You can comment out the methods you don't want to run models.append(GraphFactorization(2, 1 * 10**-4, 1.0, 50000)) models.append(HOPE(4, 0.01)) models.append(LaplacianEigenmaps(2)) models.append(LocallyLinearEmbedding(2)) #models.append(node2vec(2, 1, 80, 10, 10, 1, 1)) #models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500, # modelfile=['./intermediate/enc_model.json', './intermediate/dec_model.json'], # weightfile=['./intermediate/enc_weights.hdf5', './intermediate/dec_weights.hdf5'])) for embedding in models: print('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None,
G = nx.read_gpickle(file_prefix) try: node_colors = pickle.load( open('data/sbm_node_labels.pickle', 'rb') ) except UnicodeDecodeError: node_colors = pickle.load( open('data/sbm_node_labels.pickle', 'rb'), encoding='latin1' ) node_colors_arr = [None] * node_colors.shape[0] for idx in range(node_colors.shape[0]): node_colors_arr[idx] = np.where(node_colors[idx, :].toarray() == 1)[1][0] models = [] # Load the models you want to run models.append(GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm')) models.append(HOPE(d=256, beta=0.01)) models.append(LaplacianEigenmaps(d=128)) models.append(LocallyLinearEmbedding(d=128)) models.append(node2vec(d=182, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set='sbm')) models.append(SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[500, 300,], rho=0.3, n_iter=30, xeta=0.001,n_batch=500, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) # For each model, learn the embedding and evaluate on graph reconstruction and visualization for embedding in models: print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction
elif model == 'manela': gr = graph.from_networkx(G, undirected=True) emb = ds.Distributed(gr) emb.setArgs(numUpdates=90, outputPath='temp_emb.embeddings', representSize=128, window=10, numNegSampling=5, ratio=args.ratio) emb.process() emb_matrix = emb.getEmbeddings() elif model == 'gf': md = GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm') md.learn_embedding(G) emb_matrix = md.get_embedding() elif model == 'node2vec': gra = node2vec.Graph(G, is_directed=isDirected, p=1, q=1) gra.preprocess_transition_probs() walks = gra.simulate_walks(num_walks=10, walk_length=80) walks = [list(map(str, walk)) for walk in walks] md = Word2Vec(walks, size=128, window=10, min_count=0, sg=1,
def get_embeddings(graph, embedding_algorithm_enum, dimension_count, hyperparameter, lower=None, higher=None): """Generate embeddings. """ if embedding_algorithm_enum is EmbeddingType.LocallyLinearEmbedding: embedding_alg = LocallyLinearEmbedding(d=dimension_count) elif embedding_algorithm_enum is EmbeddingType.Hope: embedding_alg = HOPE(d=dimension_count, beta=0.01) elif embedding_algorithm_enum is EmbeddingType.GF: embedding_alg = GraphFactorization(d=dimension_count, max_iter=100000, eta=1 * 10**-4, regu=1.0) elif embedding_algorithm_enum is EmbeddingType.LaplacianEigenmaps: embedding_alg = LaplacianEigenmaps(d=dimension_count) elif embedding_algorithm_enum is EmbeddingType.DegreeNeigDistributionWithout: A = np.array([ np.histogram([graph.degree(neig) for neig in graph.neighbors(i)], bins=dimension_count, density=True, range=(lower, higher))[0] for i in graph.nodes() ]) A = (A - A.mean(axis=0)) / A.std(axis=0) return A elif embedding_algorithm_enum is EmbeddingType.DegreeNeigDistribution: A = np.array([ np.concatenate([ np.array([graph.degree(i) / (higher * dimension_count)]), np.histogram( [graph.degree(neig) for neig in graph.neighbors(i)], bins=dimension_count - 1, density=True, range=(lower, higher))[0] ], axis=0) for i in graph.nodes() ]) A = (A - A.mean(axis=0)) / A.std(axis=0) return A elif embedding_algorithm_enum is EmbeddingType.DegreeNeigNeigDistribution: bin_length = int(dimension_count / 2) A = np.array([ np.concatenate([ np.array([graph.degree(i) / (higher)]), np.histogram( [graph.degree(neig) for neig in graph.neighbors(i)], bins=bin_length, density=True, range=(lower, higher))[0], np.histogram([ graph.degree(neigneig) for neig in graph.neighbors(i) for neigneig in graph.neighbors(neig) ], bins=bin_length, density=True, range=(lower, higher))[0] ], axis=0) for i in graph.nodes() ]) A = (A - A.mean(axis=0)) / A.std(axis=0) A[:, 0] = A[:, 0] A[:, 1:1 + bin_length] = A[:, 1:1 + bin_length] A[:, 2 + bin_length:] = A[:, 2 + bin_length:] * hyperparameter A = np.nan_to_num(A) return A else: raise NotImplementedError A, t = embedding_alg.learn_embedding(graph=graph, no_python=True) A = np.dot(A, np.diag(np.sign(np.mean(A, axis=0)))) A = (A - A.mean(axis=0)) / A.std(axis=0) return A
gfile for gfile in os.listdir(dirpath) if os.path.splitext(gfile)[1] == '.csv' ] results = {} for gfile in gfiles: if gfile != 'soc-pokec-relationships-4096.csv': continue models = [] # Load the models you want to run models.append( GraphFactorization(d=64, max_iter=50000, eta=1 * 10**-4, regu=1.0, data_set=gfile)) models.append(HOPE(d=64, beta=0.01)) models.append(LaplacianEigenmaps(d=64)) models.append(LocallyLinearEmbedding(d=64)) models.append( node2vec(d=64, max_iter=100, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set=gfile)) models.append(
from timeit import Timer import networkx as nx from gem.embedding.gf import GraphFactorization sizes = [200, 500, 1000, 2000, 3000, 5000, 10000, 15000, 20000, 35000, 50000] _DENS = 1e-3 for s in sizes: G = nx.gnp_random_graph(s, _DENS, directed=True) gf = GraphFactorization(d=128, eta=1e-3, regu=1, max_iter=1000) t = Timer('gf.learn_embedding(G)', setup='from __main__ import gf, G') n_runs = 3 if s <= 5000 else 1 exec_times = t.repeat(n_runs, 1) print(f'{s}: {exec_times}') with open('gf_times.txt', 'a') as f: f.write(f'{s}: {exec_times}\n')