Пример #1
0
def build_tadw(g, path, configs):
    # TADW OpenNE
    print("TADW processing...")
    model_tadw = TADW(g, dim=embedding_size, lamb=lamb)
    model_tadw.save_embeddings("{}/TADW.nv".format(path))
    print("TADW finished\n")
    embedding = load_embedding("{}/TADW.nv".format(path))
    return embedding
Пример #2
0
def build_sdne(g, path, configs):
    # SDNE OpenNE
    print("SDNE processing...")
    model_sdne = SDNE(g, encoder_layer_list=encoder_list, epoch=epochs)
    model_sdne.save_embeddings("{}/SDNE.nv".format(path))
    print("SDNE finished\n")
    embedding = load_embedding("{}/SDNE.nv".format(path))
    return embedding
Пример #3
0
def build_line(g, path, configs):
    # LINE OpenNE
    print("LINE processing...")
    model_line = LINE(g, epoch=epochs, rep_size=embedding_size)
    model_line.save_embeddings("{}/LINE.nv".format(path))
    print("LINE finished\n")
    embedding = load_embedding("{}/LINE.nv".format(path))
    return embedding
Пример #4
0
def build_hope(g, path, configs):
    # HOPE OpenNE
    print("HOPE processing...")
    model_hope = HOPE(graph=g, d=embedding_size)
    model_hope.save_embeddings("{}/HOPE.nv".format(path))
    print("HOPE finished\n")
    embedding = load_embedding("{}/HOPE.nv".format(path))
    return embedding
Пример #5
0
def build_grarep(g, path, configs):
    # GraRep OpenNE
    print("GraRep processing...")
    model_grarep = GraRep(graph=g, Kstep=kstep, dim=embedding_size)
    model_grarep.save_embeddings("{}/GraRep.nv".format(path))
    print("GraRep finished\n")
    embedding = load_embedding("{}/GraRep.nv".format(path))
    return embedding
Пример #6
0
def build_gf(g, path, configs):
    # GF OpenNE
    print("GF processing...")
    model_gf = GraphFactorization(graph=g, rep_size=embedding_size)
    model_gf.save_embeddings("{}/GF.nv".format(path))
    print("GF finished\n")
    embedding = load_embedding("{}/GF.nv".format(path))
    return embedding
Пример #7
0
def build_lle(g, path, configs):
    # LLE OpenNE
    print("LLE processing...")
    model_lle = LLE(graph=g, d=embedding_size)
    model_lle.save_embeddings("{}/LLE.nv".format(path))
    print("LLE finished\n")
    embedding = load_embedding("{}/LLE.nv".format(path))
    return embedding
Пример #8
0
def build_le(g, path, configs):
    # Laplacian Eigenmaps OpenNE
    print("Lapacian Eigenmaps processing...")
    model_lap = LaplacianEigenmaps(g, rep_size=embedding_size)
    model_lap.save_embeddings("{}/Lap.nv".format(path))
    print("Laplacian Eigenmaps finished\n")
    embedding = load_embedding("{}/Lap.nv".format(path))
    return embedding
Пример #9
0
def build_n2v(g, path, configs):
    # node2vec OpenNE
    print("Node2vec processing...")
    model_n2v = Node2vec(graph=g, path_length=walk_length, num_paths=number_walks, dim=embedding_size,
                        workers=workers, p=p, q=q, window=window_size)
    model_n2v.save_embeddings("{}/Node2vec.nv".format(path))
    print("Node2vec finished\n")
    embedding = load_embedding("{}/Node2vec.nv".format(path))
    return embedding
Пример #10
0
def build_dw(g, path, configs):
    # DeepWalk OpenNE
    print("DeepWalk processing...")
    model_deepwalk = Node2vec(graph=g, path_length=walk_length, num_paths=number_walks, 
                    dim=embedding_size, window=window_size, workers=workers, dw=True)
    model_deepwalk.save_embeddings("{}/DeepWalk.nv".format(path))
    print("DeepWalk finished\n")
    embedding = load_embedding("{}/DeepWalk.nv".format(path))
    return embedding
Пример #11
0
def build_gcae(g, path, configs):
    gcae = GCAE(g, path, configs)
    t_total = time.time()
    print("GCAE Start Training")
    for epoch in range(epochs):
        gcae.train(epoch)
    print("GCAE Optimization Finished!")
    print("Total time elapsed: {:.4f}s\n".format(time.time() - t_total))
    embedding = load_embedding("{}/GCAE.nv".format(path))

    return embedding
Пример #12
0
def main(configs):
    timespells = configs.timespells
    Path(configs.RESULT_PATH).mkdir(parents=True, exist_ok=True)

    # calculate for each model
    for model in configs.models:
        # load embeddings across time spells
        embs = []
        dist_ts = []
        cos_ts = []
        for ts in range(1, int(timespells) + 1):
            embs.append(
                load_embedding(f"{configs.EMBEDDING_PATH}/TS{ts}/{model}.nv"))
        for i in range(1, int(timespells)):
            e_prev = embs[i - 1]
            e_new = embs[i]
            e_new_rotated = get_rotated_embedding(
                e_prev, e_new, [j for j in range(e_prev.shape[0])])
            dist = get_embedding_distance(e_prev, e_new_rotated)
            cos = get_embedding_cosine(e_prev, e_new_rotated)
            dist_ts.append(dist)
            cos_ts.append(cos)

        # export to csv
        f = open(f"{configs.RESULT_PATH}/{model}.csv", "w")
        header = "NodeID, " + ", ".join(["Shift" + str(i) for i in range(1, timespells)]) + \
            ", " + ", ".join(["Sim" + str(i) for i in range(1, timespells)]) + "\n"
        f.write(header)
        n_nodes = max([len(l) for l in dist_ts])
        for i in range(n_nodes):
            dists = []
            coss = []
            for j in range(len(dist_ts)):
                if i < len(dist_ts[j]):
                    dists.append(str(dist_ts[j][i]))
                    coss.append(str(cos_ts[j][i]))
                else:
                    dists.append(" ")
                    coss.append(" ")
            row = "{}, ".format(
                str(i)) + ", ".join(dists) + ", " + ", ".join(coss) + "\n"
            f.write(row)
        f.close()
Пример #13
0
def build_gate(g, embedding_path, configs):
    print("GATE processing...")
    G, X = gate_utils.load_data(configs)
    feature_dim = X.shape[1]
    gate_args['hidden_dims'] = [feature_dim] + gate_args['hidden_dims']

    G_tf, S, R = gate_utils.prepare_graph_data(G, configs)

    trainer = Trainer(gate_args)
    trainer(G_tf, X, S, R)
    embeddings, attentions = trainer.infer(G_tf, X, S, R)
    f = open("{}/GATE.nv".format(embedding_path), "w")
    f.write(" ".join([str(x) for x in embeddings.shape]))
    f.write("\n")
    for i in range(embeddings.shape[0]):
        d = " ".join([str(x) for x in embeddings[i]])
        f.write("{} {}\n".format(str(i), d))
    f.close()
    print("GATE finished\n")
    embedding = load_embedding("{}/GATE.nv".format(embedding_path))

    return embedding
Пример #14
0
def build_vgae(g, path, configs):
    vgae = VGAE(g, path, configs)
    vgae.train(epochs)
    embedding = load_embedding("{}/VGAE.nv".format(path))
    return embedding
Пример #15
0
def main(configs, LOAD_TRAINED_EMBEDDING, n_cluster):
    process_node_index(configs.edgelist_filename, configs.node_index_filename,
                       configs.embedding_mapping)
    temp = open(configs.node_index_filename, 'rb')
    node_index = pickle.load(temp)
    temp.close()

    # load dataset
    print("====================\nLoading edgelist")
    t1 = time.time()
    # load graph from edgelist and feature file
    graph = Graph_Int()
    graph.read_edgelist(filename=configs.edgelist_filename,
                        node_index=node_index,
                        weighted=configs.weighted_graph,
                        directed=False)
    graph_str = Graph_Str()
    graph_str.read_edgelist(filename=configs.edgelist_filename,
                            node_index=node_index,
                            weighted=configs.weighted_graph,
                            directed=False)
    if configs.have_features:
        graph.read_node_features(node_index=node_index,
                                 filename=configs.current_feature_file)
    print("Data Loaded. Time elapsed: {:.3f}\n====================\n".format(
        time.time() - t1))

    graph_embeddings = {}
    if LOAD_TRAINED_EMBEDDING:
        # load graph embeddings
        print("====================\nLoading Graph Embeddings\n")
        for model in configs.models:
            embedding_file = (f"{configs.current_embedding_path}/{model}.nv")
            graph_embeddings[model] = load_embedding(embedding_file)
        print("Embeddings Loaded.\n====================")
    else:
        # build graph embedding
        print("====================\nBuilding Graph Embeddings\n")
        t2 = time.time()
        for model in configs.models:
            graph_embeddings[model] = build_embedding(
                graph, graph_str, model, configs.current_embedding_path,
                configs)
        print(
            "Embeddings Constructed. Total time elapsed: {:.3f}\n===================="
            .format(time.time() - t2))

    # GEM graph reconstruction evaluation
    print("====================\nEvaluating Graph Embeddings")
    t3 = time.time()
    reconstruction_performance = {}
    for model in configs.models:
        reconstruction_performance[model] = evaluate_embedding(
            graph.G, graph_embeddings[model])
    print(
        "Embeddings Evaluated. Total time elapsed: {:.3f}\n===================="
        .format(time.time() - t3))

    # clustering evaluation
    print("====================\nEvaluating Node Clusters")
    t4 = time.time()
    kmeans_performance = {}
    dbscan_performance = {}

    # KMeans
    if KMEANS_EVAL:
        kmeans_prediction = {}
        tsne_kmeans = {}
        for model in configs.models:
            print("[KMeans] Clustering {} Embedding".format(model))
            temp_t = time.time()
            kmeans = KMeans(n_clusters=n_cluster).fit(graph_embeddings[model])
            kmeans_prediction[model] = kmeans.labels_
            kmeans_performance[model] = evaluate_clustering_performance(
                graph_embeddings[model], kmeans_prediction[model])
            print(
                "[KMeans] Clustering Finished for {} Embedding. Time elapsed: {:.3f}"
                .format(model,
                        time.time() - temp_t))

    # DBSCAN
    if DBSCAN_EVAL:
        dbscan_predcition = {}
        tsne_dbscan = {}
        for model in configs.models:
            print("[DBSCAN] Clustering {} Embedding".format(model))
            temp_t = time.time()
            dbscan = DBSCAN(eps=eps).fit(graph_embeddings[model])
            dbscan_predcition[model] = dbscan.labels_
            dbscan_performance[model] = evaluate_clustering_performance(
                graph_embeddings[model], dbscan_predcition[model])
            print(
                "[DBSCAN] Clustering Finished for {} Embedding. Time elapsed: {:.3f}"
                .format(model,
                        time.time() - temp_t))

    tsne_result = {}
    tsne_time = {}
    for model in configs.models:
        tsne = TSNE(n_components=2, init='pca', random_state=0)
        temp_t = time.time()
        tsne_result[model] = tsne.fit_transform(graph_embeddings[model])
        t_model = time.time() - temp_t
        print("t-SNE for {} embedding finished ({}s)".format(model, t_model))
        tsne_time[model] = t_model

    print(
        "Clustering Results Evaluated. Total time elapsed: {:.3f}\n===================="
        .format(time.time() - t4))

    # Generate Report
    f = open(
        "{}results-{}.tsv".format(configs.current_report_path, str(n_cluster)),
        "w")
    for model in configs.models:
        f.write("{}\t".format(model))
        MAP, prec_curv = reconstruction_performance[model]
        f.write("{:.3f}\t".format(MAP))
        if KMEANS_EVAL:
            k_s, k_c, k_d = kmeans_performance[model]
            f.write("{:.3f}\t{:.3f}\t{:.3f}\t".format(k_s, k_c, k_d))
        if DBSCAN_EVAL:
            d_s, d_c, d_d = dbscan_performance[model]
            f.write("{:.3f}\t{:.3f}\t{:.3f}\t".format(d_s, d_c, d_d))
        f.write("{}\n".format("\t".join(
            ["{:.3f}".format(x) for x in prec_curv[:10]])))
    f.close()

    # dump data to cache
    f = open(
        "{}experiment-{}.cache".format(configs.current_report_path,
                                       str(n_cluster)), "wb")
    data_cache = [
        graph_embeddings, reconstruction_performance, tsne_result, tsne_time
    ]
    if KMEANS_EVAL:
        data_cache.append(kmeans_prediction)
        data_cache.append(kmeans_performance)
    if DBSCAN_EVAL:
        data_cache.append(dbscan_predcition)
        data_cache.append(dbscan_performance)
    pickle.dump(data_cache, f)
    f.close()
Пример #16
0
    return (MAP, prec_curv)

# Change the name of the dataset
dataset = "pastebin"
json_path = f"./data/{dataset}/config.json"
configs = load_json(json_path)
configs = dict2dotdict(configs)

timespells = configs.timespells
graph_embeddings = {}
reconstruction_performance = {}
reconstruction_performance_curve = {}
for ts in range(1, int(timespells)+1):
    f = open(f"{configs.DATA_PATH}/TS{str(ts)}/generated/graphs.pkl", 'rb')
    graph, graph_str = pickle.load(f)
    f.close()    
    for model in configs.models:
        if model not in graph_embeddings.keys():
            graph_embeddings[model] = {}
            reconstruction_performance[model] = {}
            reconstruction_performance_curve[model] = {}
        graph_embeddings[model][ts] = load_embedding(f"{configs.EMBEDDING_PATH}/TS{ts}/{model}.nv")
        reconstruction_performance[model][ts], reconstruction_performance_curve[model][ts] = evaluate_embedding(graph.G, graph_embeddings[model][ts])

f = open(f"{configs.RESULT_PATH}/MAP.csv", "w")
header = "Model, " + ", ".join(["TS" + str(i) for i in range(1, timespells + 1)]) + "\n"
f.write(header)
for model in configs.models:
    row = model + ", " + ", ".join([str(reconstruction_performance[model][i]) for i in range(1, timespells + 1)]) + "\n"
    f.write(row)
f.close()