def evaluate_unsupervised_embedding(di_graph, graph_embedding, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.8, is_undirected=True) X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=False) sample_edges = sample_edge_new(train_digraph, test_digraph, 0.5) filtered_edge_list = getscore4(train_digraph, graph_embedding, sample_edges) AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) test_digraph1, node_l = graph_util.sample_graph(test_digraph, 1024) X = X[node_l] estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not (train_digraph.has_edge(node_l[e[0]], node_l[e[1]])) ] MAP = scores.computeMAP(filtered_edge_list, test_digraph1) print(AP, ROC, MAP) return AP, ROC, MAP
def evaluate_unsupervised_all(di_graph, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.8, is_undirected=True) sample_edges = sample_edge_new(train_digraph, test_digraph) test_digraph1, node_l = graph_util.sample_graph(test_digraph, 1024) AP = [] ROC = [] MAP = [] heurestics = [cn, jc, pa, aa] for x in heurestics: estimated_adj = getscore1(train_digraph, node_l, x) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP1 = scores.computeMAP(filtered_edge_list, test_digraph1) MAP.append(MAP1) filtered_edge_list = getscore3(train_digraph, sample_edges, x) AP1, ROC1 = scores.computeAP_ROC(filtered_edge_list, test_digraph) AP.append(AP1) ROC.append(ROC1) print(AP1, ROC1, MAP1) return AP, ROC, MAP
def evaluate_supervised(di_graph, graph_embedding, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.6, is_undirected=True) train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest( test_digraph, train_ratio=0.5, is_undirected=is_undirected) X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=False) trp, trn = create_edge_dataset(train_digraph, train_digraph1) trd, trl = create_vector_dataset(trp, trn, hadamard2, X) mean = np.mean(trd, axis=0) std = np.std(trd, axis=0) trd = (trd - mean) / std clasifier = train_classifier(trd, trl) for (st, ed) in train_digraph1.edges(): train_digraph.add_edge(st, ed) sample_edges = sample_edge_new(train_digraph, test_digraph, 0.5) X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=False) filtered_edge_list = getscore5(train_digraph, sample_edges, clasifier, hadamard2, X, mean, std) AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) test_digraph, node_l = graph_util.sample_graph(test_digraph, 1024) X = X[node_l] estimated_adj = getscore2(train_digraph, node_l, clasifier, hadamard2, X, mean, std) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=True) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]]) ] MAP = scores.computeMAP(filtered_edge_list, test_digraph) print(MAP) return AP, ROC, MAP
def evaluate_unsupervised_all(di_graph, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( di_graph, train_ratio=0.8, is_undirected=True) # train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(di_graph, train_ratio = 0.75, is_undirected=True) # train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest( # test_digraph, # train_ratio=0.2, # is_undirected=is_undirected # ) # train_digraph_temp=train_digraph.copy() # for (st,ed) in train_digraph1.edges(): # train_digraph_temp.add_edge(st,ed) # sample_edges = sample_edge_new(train_digraph_temp,test_digraph, -1) sample_edges = sample_edge_new(train_digraph, test_digraph, -1) filtered_edge_list = getscore3(train_digraph, sample_edges, aa) AP1, ROC1 = scores.computeAP_ROC(filtered_edge_list, test_digraph) print(AP1, ROC1) return AP1, ROC1
print ("saving for supervised") for grp in xrange(len(list_graphs)): for x in xrange(num_samples): # load the graph as a networkx graph G = graph_util.loadGraphFromEdgeListTxt(list_graphs[grp], directed=list_directed[grp]) G = G.to_directed() if not os.path.exists('SAVER_SUP/'+fig_name[grp]+str(x+1)): os.makedirs('SAVER_SUP/'+fig_name[grp]+str(x+1)) # split the graph into 60-20-20 ratio, 60% for calculating the edge features, 20% for training the classifier, 20% for evaluating the model. train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2(G, train_ratio = 0.6, is_undirected=True) train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest(test_digraph, train_ratio=0.5, is_undirected=True) # embeddings without relearning print ("saving for LE") for dim in dimensions: embedding=LaplacianEigenmaps(d=dim) X, _ = embedding.learn_embedding(graph=train_digraph, no_python=False) file_name='SAVER_SUP/'+fig_name[grp]+str(x+1)+'/LE1_'+str(dim) parameter_file=open(file_name, 'wb') pickle.dump(X,parameter_file) parameter_file.close() print ("saving for DEEPWALK") for dim in dimensions:
def evaluate_supervised_new(train_digraph, embeddings, hads, is_undirected=True): train_digraph, test_digraph = train_test_split.splitDiGraphToTrainTest2( train_digraph, train_ratio=0.6, is_undirected=True) for (st, ed) in train_digraph.edges(): if (test_digraph.has_edge(st, ed)): test_digraph.remove_edge(st, ed) train_digraph1, test_digraph = evaluation_util.splitDiGraphToTrainTest( test_digraph, train_ratio=0.5, is_undirected=is_undirected) l_emb = [] combine = [] for emb in embeddings: X, _ = emb.learn_embedding(graph=train_digraph, no_python=False) l_emb.append(X) for had in hads: if (had == 1): combine.append(hadamard1) elif (had == 0): combine.append(hadamard2) # combine.append(dotp1) print("embeddings learned") trp, trn = create_edge_dataset(train_digraph, train_digraph1) trd, trl = create_mix_dataset(trp, trn, train_digraph, l_emb, combine) mean = np.mean(trd, axis=0) std = np.std(trd, axis=0) trd = (trd - mean) / std clasifier = train_classifier(trd, trl) # print (clasifier.coef_) # print (clasifier.intercept_) train_digraph_temp = train_digraph.copy() for (st, ed) in train_digraph1.edges(): train_digraph_temp.add_edge(st, ed) sample_edges = sample_edge_new(train_digraph_temp, test_digraph, -1, num_edges=500000) # co=0 # for (st,ed) in sample_edges: # for (st1,ed1) in trn: # if(st==st1 and ed==ed1): # if(test_digraph.has_edge(st,ed)): # print ("1") # for (st1,ed1) in trp: # if(st==st1 and ed==ed1): # if(test_digraph.has_edge(st,ed)): # print ("2") # else: # print ("3") # l_emb1 = [] # for emb in embeddings: # X, _ = emb.learn_embedding(graph=train_digraph_temp, no_python=False) # l_emb1.append(X) # break # l_emb1.append(l_emb[1]) print("embeddings learned") # filtered_edge_list = getscore9(train_digraph, sample_edges, clasifier, l_emb1, combine, mean, std) # AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) # print (AP,ROC) filtered_edge_list = getscore9(train_digraph_temp, sample_edges, clasifier, l_emb, combine, mean, std) AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) print(AP, ROC) trd, trl = create_score_dataset(trp, trn, allh, train_digraph) mean = np.mean(trd, axis=0) std = np.std(trd, axis=0) trd = (trd - mean) / std clasifier = train_classifier(trd, trl) filtered_edge_list = getscore7(train_digraph_temp, sample_edges, clasifier, allh, mean, std) AP2, ROC2 = scores.computeAP_ROC(filtered_edge_list, test_digraph) print(AP2, ROC2) # G11 = train_digraph.to_undirected() # f1=[] # f2=[] # for (st,ed,w) in filtered_edge_list: # f1.append(w) # f2.append(cn(G11,st,ed)) # f1=np.array(f1) # f2=np.array(f2) # ind1 = np.argsort(-1*f1) # ind2 = np.argsort(-1*f2) # print (ind1[:1000]) # print (ind2[:1000]) # print (f1[ind1[:1000]]) # print (f2[ind1[:1000]]) # filtered_edge_list = getscore3(train_digraph_temp, sample_edges, aa) # AP, ROC = scores.computeAP_ROC(filtered_edge_list, test_digraph) # print (AP,ROC) return AP, ROC # labels=[] # score=[] # dist=[] # G=train_digraph.to_undirected() # print (len(filtered_edge_list)) # for (st,ed,w) in filtered_edge_list: # # if not(nx.shortest_path_length(G,source=st,target=ed)==2): # # continue # if(test_digraph.has_edge(st,ed)): # labels.append(1) # else: # labels.append(0) # score.append(w) # ap = average_precision_score(labels, score) # print (ap) # ind = np.argsort(-1*np.asarray(score)) # labels = np.array(labels) # print (labels[ind[:1000]]) # labels=[] # score=[] # dist=[] # G=train_digraph.to_undirected() # for (st,ed,w) in filtered_edge_list: # if (nx.shortest_path_length(G,source=st,target=ed)==2): # continue # if(test_digraph.has_edge(st,ed)): # labels.append(1) # else: # labels.append(0) # score.append(w) # ap = average_precision_score(labels, score) # print (ap) # ind = np.argsort(-1*np.asarray(score)) # labels = np.array(labels) # print (labels[ind[:1000]]) # test_digraph, node_l = graph_util.sample_graph(test_digraph, 1024) # estimated_adj = getscore8(train_digraph, node_l, clasifier, l_emb1, combine) # predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx(estimated_adj,is_undirected=True) # filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])] # MAP = scores.computeMAP(filtered_edge_list, test_digraph) # print (MAP) MAP = 0 return AP, ROC, MAP