示例#1
0
def evaluate_unsupervised_embedding(di_graph,
                                    graph_embedding,
                                    is_undirected=True):

    train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(
        di_graph, train_ratio=0.8, is_undirected=True)

    X, _ = graph_embedding.learn_embedding(graph=train_digraph,
                                           no_python=False)

    sample_edges = sample_edge_new(train_digraph, test_digraph, 0.5)
    filtered_edge_list = getscore4(train_digraph, graph_embedding,
                                   sample_edges)
    AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph)

    test_digraph1, node_l = graph_util.sample_graph(test_digraph, 1024)
    X = X[node_l]
    estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=True)
    filtered_edge_list = [
        e for e in predicted_edge_list
        if not (train_digraph.has_edge(node_l[e[0]], node_l[e[1]]))
    ]
    MAP = scores.computeMAP(filtered_edge_list, test_digraph1)

    print(AP, ROC, MAP)

    return AP, ROC, MAP
示例#2
0
def evaluate_unsupervised_all(di_graph, is_undirected=True):

    train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(
        di_graph, train_ratio=0.8, is_undirected=True)
    sample_edges = sample_edge_new(train_digraph, test_digraph)
    test_digraph1, node_l = graph_util.sample_graph(test_digraph, 1024)
    AP = []
    ROC = []
    MAP = []
    heurestics = [cn, jc, pa, aa]

    for x in heurestics:

        estimated_adj = getscore1(train_digraph, node_l, x)
        predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
            estimated_adj, is_undirected=True)
        filtered_edge_list = [
            e for e in predicted_edge_list
            if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])
        ]
        MAP1 = scores.computeMAP(filtered_edge_list, test_digraph1)
        MAP.append(MAP1)

        filtered_edge_list = getscore3(train_digraph, sample_edges, x)
        AP1, ROC1 = scores.computeAP_ROC(filtered_edge_list, test_digraph)
        AP.append(AP1)
        ROC.append(ROC1)

        print(AP1, ROC1, MAP1)

    return AP, ROC, MAP
示例#3
0
def evaluate_supervised(di_graph, graph_embedding, is_undirected=True):

    train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(
        di_graph, train_ratio=0.6, is_undirected=True)
    train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(
        test_digraph, train_ratio=0.5, is_undirected=is_undirected)

    X, _ = graph_embedding.learn_embedding(graph=train_digraph,
                                           no_python=False)

    trp, trn = create_edge_dataset(train_digraph, train_digraph1)
    trd, trl = create_vector_dataset(trp, trn, hadamard2, X)
    mean = np.mean(trd, axis=0)
    std = np.std(trd, axis=0)
    trd = (trd - mean) / std

    clasifier = train_classifier(trd, trl)

    for (st, ed) in train_digraph1.edges():
        train_digraph.add_edge(st, ed)

    sample_edges = sample_edge_new(train_digraph, test_digraph, 0.5)

    X, _ = graph_embedding.learn_embedding(graph=train_digraph,
                                           no_python=False)

    filtered_edge_list = getscore5(train_digraph, sample_edges, clasifier,
                                   hadamard2, X, mean, std)
    AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph)

    test_digraph, node_l = graph_util.sample_graph(test_digraph, 1024)
    X = X[node_l]
    estimated_adj = getscore2(train_digraph, node_l, clasifier, hadamard2, X,
                              mean, std)
    predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(
        estimated_adj, is_undirected=True)
    filtered_edge_list = [
        e for e in predicted_edge_list
        if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])
    ]
    MAP = scores.computeMAP(filtered_edge_list, test_digraph)

    print(MAP)

    return AP, ROC, MAP
示例#4
0
def evaluate_unsupervised_all(di_graph, is_undirected=True):

    train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(
        di_graph, train_ratio=0.8, is_undirected=True)
    # train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(di_graph, train_ratio = 0.75, is_undirected=True)
    # train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(
    #     test_digraph,
    #     train_ratio=0.2,
    #     is_undirected=is_undirected
    # )

    # train_digraph_temp=train_digraph.copy()
    # for (st,ed) in train_digraph1.edges():
    #     train_digraph_temp.add_edge(st,ed)

    # sample_edges = sample_edge_new(train_digraph_temp,test_digraph, -1)

    sample_edges = sample_edge_new(train_digraph, test_digraph, -1)

    filtered_edge_list = getscore3(train_digraph, sample_edges, aa)
    AP1, ROC1 = scores.computeAP_ROC(filtered_edge_list, test_digraph)
    print(AP1, ROC1)
    return AP1, ROC1
示例#5
0
print ("saving for supervised")

for grp in xrange(len(list_graphs)):
	for x in xrange(num_samples):

		# load the graph as a networkx graph

		G = graph_util.loadGraphFromEdgeListTxt(list_graphs[grp], directed=list_directed[grp])
		G = G.to_directed()
		
		if not os.path.exists('SAVER_SUP/'+fig_name[grp]+str(x+1)):
			os.makedirs('SAVER_SUP/'+fig_name[grp]+str(x+1))
		
		# split the graph into 60-20-20 ratio, 60% for calculating the edge features, 20% for training the classifier, 20% for evaluating the model.

		train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(G, train_ratio = 0.6, is_undirected=True)
		train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(test_digraph, train_ratio=0.5, is_undirected=True)

		# embeddings without relearning

		print ("saving for LE")
		for dim in dimensions:
			embedding=LaplacianEigenmaps(d=dim)
			X, _ = embedding.learn_embedding(graph=train_digraph, no_python=False)
			file_name='SAVER_SUP/'+fig_name[grp]+str(x+1)+'/LE1_'+str(dim)
			parameter_file=open(file_name, 'wb')
			pickle.dump(X,parameter_file)
			parameter_file.close()

		print ("saving for DEEPWALK")
		for dim in dimensions:
示例#6
0
def evaluate_supervised_new(train_digraph,
                            embeddings,
                            hads,
                            is_undirected=True):

    train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(
        train_digraph, train_ratio=0.6, is_undirected=True)
    for (st, ed) in train_digraph.edges():
        if (test_digraph.has_edge(st, ed)):
            test_digraph.remove_edge(st, ed)

    train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(
        test_digraph, train_ratio=0.5, is_undirected=is_undirected)

    l_emb = []
    combine = []
    for emb in embeddings:
        X, _ = emb.learn_embedding(graph=train_digraph, no_python=False)
        l_emb.append(X)

    for had in hads:
        if (had == 1):
            combine.append(hadamard1)
        elif (had == 0):
            combine.append(hadamard2)

    # combine.append(dotp1)

    print("embeddings learned")

    trp, trn = create_edge_dataset(train_digraph, train_digraph1)
    trd, trl = create_mix_dataset(trp, trn, train_digraph, l_emb, combine)
    mean = np.mean(trd, axis=0)
    std = np.std(trd, axis=0)
    trd = (trd - mean) / std
    clasifier = train_classifier(trd, trl)
    # print (clasifier.coef_)
    # print (clasifier.intercept_)

    train_digraph_temp = train_digraph.copy()
    for (st, ed) in train_digraph1.edges():
        train_digraph_temp.add_edge(st, ed)

    sample_edges = sample_edge_new(train_digraph_temp,
                                   test_digraph,
                                   -1,
                                   num_edges=500000)

    # co=0
    # for (st,ed) in sample_edges:
    #     for (st1,ed1) in trn:
    #         if(st==st1 and ed==ed1):
    #             if(test_digraph.has_edge(st,ed)):
    #                 print ("1")

    #     for (st1,ed1) in trp:
    #         if(st==st1 and ed==ed1):
    #             if(test_digraph.has_edge(st,ed)):
    #                 print ("2")
    #             else:
    #                 print ("3")

    # l_emb1 = []
    # for emb in embeddings:
    #     X, _ = emb.learn_embedding(graph=train_digraph_temp, no_python=False)
    #     l_emb1.append(X)
    #     break
    # l_emb1.append(l_emb[1])

    print("embeddings learned")

    # filtered_edge_list = getscore9(train_digraph, sample_edges, clasifier, l_emb1, combine, mean, std)
    # AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph)
    # print (AP,ROC)

    filtered_edge_list = getscore9(train_digraph_temp, sample_edges, clasifier,
                                   l_emb, combine, mean, std)
    AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph)
    print(AP, ROC)

    trd, trl = create_score_dataset(trp, trn, allh, train_digraph)
    mean = np.mean(trd, axis=0)
    std = np.std(trd, axis=0)
    trd = (trd - mean) / std
    clasifier = train_classifier(trd, trl)
    filtered_edge_list = getscore7(train_digraph_temp, sample_edges, clasifier,
                                   allh, mean, std)
    AP2, ROC2 = scores.computeAP_ROC(filtered_edge_list, test_digraph)
    print(AP2, ROC2)

    # G11 = train_digraph.to_undirected()
    # f1=[]
    # f2=[]
    # for (st,ed,w) in filtered_edge_list:
    #     f1.append(w)
    #     f2.append(cn(G11,st,ed))

    # f1=np.array(f1)
    # f2=np.array(f2)
    # ind1 = np.argsort(-1*f1)
    # ind2 = np.argsort(-1*f2)
    # print (ind1[:1000])
    # print (ind2[:1000])
    # print (f1[ind1[:1000]])
    # print (f2[ind1[:1000]])

    # filtered_edge_list = getscore3(train_digraph_temp, sample_edges, aa)
    # AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph)
    # print (AP,ROC)

    return AP, ROC

    # labels=[]
    # score=[]
    # dist=[]
    # G=train_digraph.to_undirected()
    # print (len(filtered_edge_list))
    # for (st,ed,w) in filtered_edge_list:
    #     # if not(nx.shortest_path_length(G,source=st,target=ed)==2):
    #     #     continue
    #     if(test_digraph.has_edge(st,ed)):
    #         labels.append(1)
    #     else:
    #         labels.append(0)
    #     score.append(w)
    # ap = average_precision_score(labels, score)
    # print (ap)

    # ind = np.argsort(-1*np.asarray(score))
    # labels = np.array(labels)
    # print (labels[ind[:1000]])

    # labels=[]
    # score=[]
    # dist=[]
    # G=train_digraph.to_undirected()
    # for (st,ed,w) in filtered_edge_list:
    # if (nx.shortest_path_length(G,source=st,target=ed)==2):
    #     continue
    #     if(test_digraph.has_edge(st,ed)):
    #         labels.append(1)
    #     else:
    #         labels.append(0)
    #     score.append(w)
    # ap = average_precision_score(labels, score)
    # print (ap)

    # ind = np.argsort(-1*np.asarray(score))
    # labels = np.array(labels)
    # print (labels[ind[:1000]])

    # test_digraph, node_l = graph_util.sample_graph(test_digraph, 1024)
    # estimated_adj = getscore8(train_digraph, node_l, clasifier, l_emb1, combine)
    # predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(estimated_adj,is_undirected=True)
    # filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])]
    # MAP = scores.computeMAP(filtered_edge_list, test_digraph)

    # print (MAP)
    MAP = 0

    return AP, ROC, MAP