示例#1
0
def main(args):
    t1 = time.time()
    g = Graph()
    print("Reading...")
    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input,
                        weighted=args.weighted,
                        directed=args.directed)
    if args.method == 'dngr':
        model = DNGR(graph=g, Kstep=args.kstep, dim=args.representation_size)
        model.show()
    t2 = time.time()
    print(t2 - t1)
    if args.method != 'gcn':
        print("Saving embeddings...")
        model.save_embeddings(args.output)
    if args.label_file and args.method != 'gcn':
        vectors = model.vectors
        X, Y = read_node_label(args.label_file)
        print("Training classifier using {:.2f}% nodes...".format(
            args.clf_ratio * 100))
        clf = Classifier(vectors=vectors, clf=LogisticRegression())
        clf.split_train_evaluate(X, Y, args.clf_ratio)
示例#2
0
def main(args):
    t1 = time.time()
    g = Graph()
    singluar_node_file = "singluar_nodes.txt"
    
    print("Reading...")


    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed)
    if args.method == 'node2vec':
        model = node2vec.Node2vec(graph=g, path_length=args.walk_length,
                                 num_paths=args.number_walks, dim=args.representation_size,
                                 workers=args.workers, p=args.p, q=args.q, window=args.window_size)
    elif args.method == 'line':
        if args.label_file and not args.no_auto_save:
            model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order, 
                label_file=args.label_file, clf_ratio=args.clf_ratio)
        else:
            model = line.LINE(g, epoch = args.epochs, rep_size=args.representation_size, order=args.order)
    elif args.method == 'deepWalk':
        model = node2vec.Node2vec(graph=g, path_length=args.walk_length,
                                 num_paths=args.number_walks, dim=args.representation_size,
                                 workers=args.workers, window=args.window_size, dw=True)
    elif args.method == 'tadw':
        # assert args.label_file != ''
        assert args.feature_file != ''
        # g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        fout = open(singluar_node_file, "w+")
        for node_idx in g.sgl_node_list:
            fout.write("{}\n".format(node_idx))
        fout.close()

        model = tadw.TADW(graph=g, dim=args.representation_size, lamb=args.lamb)
        # model = tadw_gpu.TADW_GPU(graph=g, dim=args.representation_size, lamb=args.lamb)
    elif args.method == 'gcn':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = gcnAPI.GCN(graph=g, dropout=args.dropout,
                            weight_decay=args.weight_decay, hidden1=args.hidden,
                            epochs=args.epochs, clf_ratio=args.clf_ratio)
    elif args.method == 'grarep':
        model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size)
    t2 = time.time()
    print("time: ", t2-t1)
    if args.method != 'gcn':
        print("Saving embeddings...")
        model.save_embeddings(args.output)
    if args.label_file and args.method != 'gcn':
        vectors = model.vectors
        X, Y = read_node_label(args.label_file)
        print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio*100))
        clf = Classifier(vectors=vectors, clf=LogisticRegression())
        clf.split_train_evaluate(X, Y, args.clf_ratio)
示例#3
0
def main(args):
    t1 = time.time()
    g = Graph()
    print("Reading...")

    X, Y = read_node_label(args.label_file)
    training_size = int(args.clf_ratio * len(X))
    shuffle_indices = np.random.permutation(np.arange(len(X)))
    X_train = [X[shuffle_indices[i]] for i in range(training_size)]
    Y_train = [Y[shuffle_indices[i]] for i in range(training_size)]
    X_test = [X[shuffle_indices[i]] for i in range(training_size, len(X))]
    Y_test = [Y[shuffle_indices[i]] for i in range(training_size, len(X))]


    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input, weighted=args.weighted, directed=args.directed)

    if args.method == 'dngr':
        model = DNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train])
        model.show()

    if args.method == 'vaedngr':
        model = VAEDNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train])
        model.show()

    if args.method == 'sdngr':
        model= SDNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train])

    if args.method == 'vaesdngr':
        model = VAESDNGR(graph=g, Kstep=args.kstep, dim=args.representation_size, XY=[X_train, Y_train])
        model.show()


    t2 = time.time()
    print(t2-t1)
    if args.method != 'gcn':
        print("Saving embeddings...")
        model.save_embeddings(args.method+'_'+args.output)
    # if args.label_file and args.method != 'gcn':
    vectors = model.vectors

    print("Training classifier using {:.2f}% nodes...".format(args.clf_ratio*100))
    clf = Classifier(vectors=vectors, clf=LogisticRegression())
    clf.my_evaluate(X_train, Y_train, X_test, Y_test)
示例#4
0
import numpy as np
import random
from sklearn.linear_model import LogisticRegression
from libnrl.graph import *
from libnrl import node2vec
from libnrl.classify import Classifier, read_node_label
import matplotlib as plt

g = Graph()
g.read_edgelist(filename='../data/load_rename.csv',
                weighted=True,
                directed=True)
#调参
X, Y = read_node_label('../data/load_label.csv')
tuned_parameters = {
    'path_length': [20, 100],
    'num_paths': [10, 20, 50],
    'dim': [30, 80, 200],
    'p': [0.25, 0.5, 1, 2, 4],
    'q': [0.25, 0.5, 1, 2, 4]
}
test_scores = {}
for p in tuned_parameters['p']:
    for q in tuned_parameters['q']:
        model = node2vec.Node2vec(graph=g,
                                  path_length=80,
                                  num_paths=10,
                                  dim=30,
                                  p=p,
                                  q=q,
                                  window=20)
示例#5
0
文件: main.py 项目: yyr93520/sne
def main(args):
    t1 = time.time()
    g = Graph()
    print "Reading..."
    if args.graph_format == 'adjlist':
        g.read_adjlist(filename=args.input)
    elif args.graph_format == 'edgelist':
        g.read_edgelist(filename=args.input,
                        weighted=args.weighted,
                        directed=args.directed)
    if args.method == 'node2vec':
        model = node2vec.Node2vec(graph=g,
                                  path_length=args.walk_length,
                                  num_paths=args.number_walks,
                                  dim=args.representation_size,
                                  workers=args.workers,
                                  p=args.p,
                                  q=args.q,
                                  window=args.window_size)
    elif args.method == 'line':
        if args.label_file:
            model = line.LINE(g,
                              lr=args.lr,
                              batch_size=args.batch_size,
                              epoch=args.epochs,
                              rep_size=args.representation_size,
                              order=args.order,
                              label_file=args.label_file,
                              clf_ratio=args.clf_ratio,
                              auto_stop=args.no - auto_stop)
        else:
            model = line.LINE(g,
                              lr=args.lr,
                              batch_size=args.batch_size,
                              epoch=args.epochs,
                              rep_size=args.representation_size,
                              order=args.order)
    elif args.method == 'deepWalk':
        model = node2vec.Node2vec(graph=g,
                                  path_length=args.walk_length,
                                  num_paths=args.number_walks,
                                  dim=args.representation_size,
                                  workers=args.workers,
                                  window=args.window_size,
                                  dw=True)
    elif args.method == 'tadw':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = tadw.TADW(graph=g,
                          dim=args.representation_size,
                          lamb=args.lamb)
    elif args.method == 'gcn':
        assert args.label_file != ''
        assert args.feature_file != ''
        g.read_node_label(args.label_file)
        g.read_node_features(args.feature_file)
        model = gcnAPI.GCN(graph=g,
                           dropout=args.dropout,
                           weight_decay=args.weight_decay,
                           hidden1=args.hidden,
                           epochs=args.epochs,
                           clf_ratio=args.clf_ratio)
    elif args.method == 'grarep':
        model = GraRep(graph=g, Kstep=args.kstep, dim=args.representation_size)
    t2 = time.time()
    print t2 - t1
    if args.method != 'gcn':
        print "Saving embeddings..."
        model.save_embeddings(args.output)
    if args.label_file and args.method != 'gcn':
        vectors = model.vectors
        X, Y = read_node_label(args.label_file)
        print "Training classifier using {:.2f}% nodes...".format(
            args.clf_ratio * 100)
        clf = Classifier(vectors=vectors, clf=LogisticRegression())
        clf.split_train_evaluate(X, Y, args.clf_ratio)