Exemplo n.º 1
0
def main(opts):
    dataset = opts.dataset
    embed_dim = int(opts.dimension)
    # File that contains the edges. Format: source target
    # Optionally, you can add weights as third column: source target weight
    edge_f = 'Data/%s.edgelist' % dataset

    # Specify whether the edges are directed
    # isDirected = True

    print "Loading Dataset"
    # Load graph
    G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=False)
    #G = G.to_directed()

    embedding = LaplacianEigenmaps(d=embed_dim)

    print('Num nodes: %d, num edges: %d' %
          (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    print "Starting Embedding"
    Y, t = embedding.learn_embedding(graph=G,
                                     edge_f=None,
                                     is_weighted=True,
                                     no_python=True)
    print(embedding._method_name + ':\n\tTraining time: %f' % (time() - t1))
    np_save(writable("Embedding_Results", "jac_" + dataset + str(embed_dim)),
            Y)
Exemplo n.º 2
0
def main(data_set_name):
    dimensions = 4
    input_file = './graph/' + data_set_name + '.tsv'
    output_file = './emb/' + data_set_name + '.emb'
    # Instatiate the embedding method with hyperparameters
    graph_factorization = LaplacianEigenmaps(dimensions)

    # Load graph
    graph = graph_util.loadGraphFromEdgeListTxt(input_file)

    # Learn embedding - accepts a networkx graph or file with edge list
    embeddings_array, t = graph_factorization.learn_embedding(graph, edge_f=None, is_weighted=True, no_python=True)
    embeddings = pandas.DataFrame(embeddings_array)
    embeddings.to_csv(output_file, sep=' ', na_rep=0.1)
Exemplo n.º 3
0
		G = G.to_directed()
		
		if not os.path.exists('SAVER_SUP/'+fig_name[grp]+str(x+1)):
			os.makedirs('SAVER_SUP/'+fig_name[grp]+str(x+1))
		
		# split the graph into 60-20-20 ratio, 60% for calculating the edge features, 20% for training the classifier, 20% for evaluating the model.

		train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(G, train_ratio = 0.6, is_undirected=True)
		train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(test_digraph, train_ratio=0.5, is_undirected=True)

		# embeddings without relearning

		print ("saving for LE")
		for dim in dimensions:
			embedding=LaplacianEigenmaps(d=dim)
			X, _ = embedding.learn_embedding(graph=train_digraph, no_python=False)
			file_name='SAVER_SUP/'+fig_name[grp]+str(x+1)+'/LE1_'+str(dim)
			parameter_file=open(file_name, 'wb')
			pickle.dump(X,parameter_file)
			parameter_file.close()

		print ("saving for DEEPWALK")
		for dim in dimensions:
			embedding=node2vec(d=dim, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)
			X, _ = embedding.learn_embedding(graph=train_digraph, no_python=False)
			file_name='SAVER_SUP/'+fig_name[grp]+str(x+1)+'/DEEPWALK1_'+str(dim)
			parameter_file=open(file_name, 'wb')
			pickle.dump(X,parameter_file)
			parameter_file.close()
		
		print ("saving for n2vA")
    print('Score without neighbours data')
    create_and_evaluate_classifier(no_neighbour_train_features, train_targets,
                                   no_neighbour_test_features, test_targets)
    print('Score with neighbours data')
    # we can use the same '[]_targets' because both (with neighbours and without) datasets are extracted from the same
    # datasets: 'train_data' and 'test_data' and follow the same order of nodes
    create_and_evaluate_classifier(neighbour_train_features, train_targets,
                                   neighbour_test_features, test_targets)

    ###############################################################

    if not os.path.exists(LAPLACIAN_EMB_FILE_PATH):
        laplacian = LaplacianEigenmaps(d=50)
        embs = laplacian.learn_embedding(g,
                                         edge_f=None,
                                         is_weighted=False,
                                         no_python=True)
        save_embeddings(LAPLACIAN_EMB_FILE_PATH, embs[0], list(g.nodes))

    laplacian_embs = read_embeddings(LAPLACIAN_EMB_FILE_PATH)
    node2vec_embs = read_embeddings(
        NODE2VEC_EMB_FILE_PATH)  # we already have the embeddings
    sdne_embs = read_embeddings(
        SDNE_EMB_FILE_PATH)  # we already have the embeddings

    print('LAPLACIAN')
    create_and_evaluate_classifier(
        *extract_data_from_embs(laplacian_embs, train_data, test_data))
    print('NODE2VEC')
    create_and_evaluate_classifier(
        *extract_data_from_embs(node2vec_embs, train_data, test_data))