def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True, is_weighted=False): node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) # If weighted, compute the error in reconstructed weights of observed edges if is_weighted: digraph_adj = nx.to_numpy_matrix(digraph) estimated_adj[digraph_adj == 0] = 0 err = np.linalg.norm(digraph_adj - estimated_adj) err_baseline = np.linalg.norm(digraph_adj) else: err = None err_baseline = None return (MAP, prec_curv, err, err_baseline)
def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True): node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) return (MAP, prec_curv)
def evaluateStaticGraphReconstruction(digraph, graph_embedding, X_stat, node_l=None, file_suffix=None, sample_ratio_e=None, is_undirected=True, is_weighted=False): """This function evaluates the graph reconstruction accuracy of the embedding algorithms. Args: digraph (Object): directed networkx graph object. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X_stat (Vector): Embedding of the the nodes of the graph. node_l (Int): Number of nodes in the graph. file_suffix (Str): The name of the algorithm and dataset used to save the embedding. sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. is_weighted (bool): Boolean flag to denote whether the edges of the graph is weighted. Returns: Numpy Array: Consiting of Mean average precision precision curve, errors and error baselines. """ node_num = digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None if file_suffix is None: estimated_adj = graph_embedding.get_reconstructed_adj(X_stat, node_l) else: estimated_adj = graph_embedding.get_reconstructed_adj( X_stat, file_suffix, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) if 'partition' in digraph.node[0]: predicted_edge_list = [ e for e in predicted_edge_list if digraph.node[e[0]]['partition'] != digraph.node[e[1]]['partition'] ] MAP = metrics.computeMAP(predicted_edge_list, digraph) prec_curv, _ = metrics.computePrecisionCurve(predicted_edge_list, digraph) # If weighted, compute the error in reconstructed weights of observed edges if is_weighted: digraph_adj = nx.to_numpy_matrix(digraph) estimated_adj[digraph_adj == 0] = 0 err = np.linalg.norm(digraph_adj - estimated_adj) err_baseline = np.linalg.norm(digraph_adj) else: err = None err_baseline = None return (MAP, prec_curv, err, err_baseline)
def evaluateStaticLinkPrediction(train_digraph, test_digraph, graph_embedding, X, node_l=None, sample_ratio_e=None, is_undirected=True, store_predictions=1): """This function evaluates the static link prediction accuracy of the embedding algorithms. Args: train_digraph (Object): directed networkx graph object used for training the algorithm. test_digraph (Object): directed networkx graph object to be used for testing the algorithm. graph_embedding (object): Object of the embedding algorithm class defined in gemben/embedding. X (Vector): Embedding of the the nodes of the graph. node_l (Int): Number of nodes in the graph. sample_ratio_e (Float): The ratio used to sample the original graph for evaluation purpose. is_undirected (bool): Boolean flag to denote whether the graph is directed or not. store_prediction (Int): Stores the predicted values. Returns: Numpy Array: Consiting of Mean average precision and the precision curve values. """ node_num = train_digraph.number_of_nodes() # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected ) else: eval_edge_pairs = None if X is None: # If not an embedding approach, store the new subgraph graph_embedding.learn_embedding(train_digraph) estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs ) filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(e[0], e[1])] if 'partition' in train_digraph.node[0]: filtered_edge_list = [e for e in predicted_edge_list if train_digraph.node[e[0]]['partition'] != train_digraph.node[e[1]]['partition']] pickle.dump(filtered_edge_list, open('gem/nodeListMap/preds.pickle', 'wb')) pickle.dump(test_digraph, open('gem/nodeListMap/test_graph.pickle', 'wb')) t1 = time() MAP = metrics.computeMAP(filtered_edge_list, test_digraph) t2 = time() prec_curv, _ = metrics.computePrecisionCurve( filtered_edge_list, test_digraph ) t3 = time() print('MAP computation time: %f sec, prec: %f sec' % (t2 - t1, t3 - t2)) return (MAP, prec_curv)
def evaluateStaticLinkPrediction(digraph, graph_embedding, train_ratio=0.8, n_sample_nodes=None, sample_ratio_e=None, no_python=False, is_undirected=True): node_num = digraph.number_of_nodes() # seperate train and test graph train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest( digraph, train_ratio=train_ratio, is_undirected=is_undirected) if not nx.is_connected(train_digraph.to_undirected()): train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len) tdl_nodes = train_digraph.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) nx.relabel_nodes(train_digraph, nodeListMap, copy=False) test_digraph = test_digraph.subgraph(tdl_nodes) nx.relabel_nodes(test_digraph, nodeListMap, copy=False) # learning graph embedding X, _ = graph_embedding.learn_embedding(graph=train_digraph, no_python=no_python) node_l = None if n_sample_nodes: test_digraph, node_l = graph_util.sample_graph(test_digraph, n_sample_nodes) X = X[node_l] # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected) else: eval_edge_pairs = None estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs) if node_l is None: node_l = list(range(train_digraph.number_of_nodes())) filtered_edge_list = [ e for e in predicted_edge_list if not train_digraph.has_edge(node_l(e[0]), node_l(e[1])) ] MAP = metrics.computeMAP(filtered_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve(filtered_edge_list, test_digraph) return (MAP, prec_curv)
def evaluateStaticLinkPrediction(digraph, graph_embedding, train_ratio=0.8, n_sample_nodes=None, sample_ratio_e=None, no_python=False, is_undirected=True): node_num = digraph.number_of_nodes() print('eslp graph') print(digraph.edges()[:3]) # seperate train and test graph train_digraph, test_digraph = evaluation_util.splitDiGraphToTrainTest( digraph, train_ratio=train_ratio, is_undirected=is_undirected ) print('eslp training graph') print(train_digraph.edges()[:3]) if not nx.is_connected(train_digraph.to_undirected()): train_digraph = max( nx.weakly_connected_component_subgraphs(train_digraph), key=len ) tdl_nodes = train_digraph.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) reversedNodeListMap = dict(zip(range(len(tdl_nodes)),tdl_nodes)) print(nodeListMap) nx.relabel_nodes(train_digraph, nodeListMap, copy=False) test_digraph = test_digraph.subgraph(tdl_nodes) nx.relabel_nodes(test_digraph, nodeListMap, copy=False) else: reversedNodeListMap = dict(zip(tdl_nodes,tdl_nodes)) print('elsp training graph after largest cc') print(train_digraph.edges()[:3]) # learning graph embedding X, _ = graph_embedding.learn_embedding( graph=train_digraph, no_python=no_python ) node_l = None if n_sample_nodes: test_digraph, node_l = graph_util.sample_graph( test_digraph, n_sample_nodes ) X = X[node_l] # print('len graph edges') # print(len(graph.nodes())) # print('embedding vectors number') # print(len(self._X)) node2vec_dict = {} print('GUESS embedding node2vc train result') for i in range(len(X)): node2vec_dict[reversedNodeListMap[train_digraph.nodes()[i]]] = X[i] # print(str(train_digraph.nodes()[i])+" "+str(reversedNodeListMap[train_digraph.nodes()[i]]) + " "+ str(X[i])) # evaluation if sample_ratio_e: eval_edge_pairs = evaluation_util.getRandomEdgePairs( node_num, sample_ratio_e, is_undirected ) else: eval_edge_pairs = None estimated_adj = graph_embedding.get_reconstructed_adj(X, node_l) predicted_edge_list = evaluation_util.getEdgeListFromAdjMtx( estimated_adj, is_undirected=is_undirected, edge_pairs=eval_edge_pairs ) if node_l is None: node_l = list(range(train_digraph.number_of_nodes())) filtered_edge_list = [e for e in predicted_edge_list if not train_digraph.has_edge(node_l[e[0]], node_l[e[1]])] MAP = metrics.computeMAP(filtered_edge_list, test_digraph) prec_curv, _ = metrics.computePrecisionCurve( filtered_edge_list, test_digraph ) return (MAP, prec_curv, node2vec_dict)