def graph_characteristics(graphName): ''' Return all the characteristics of a graph that we present in the paper. ''' storedFolder = roles.graph_folder(graphName) inGraph = graph_analysis.IO.load_data("../Data/Graphs/" + storedFolder + "/" + graphName + ".GT.graph").next() groupTaxa, blackList = roles.graph_node_clusters(graphName, inGraph, metric="default") res = [] headers = [ "Graph Name", "\#Nodes", "\#Edges", "Edge density", "Clustering Coef.", "Diameter", "Role Taxonomy", "\#Clusters" ] res.append(paper_graph_name(graphName)) res.append(inGraph.num_vertices()) res.append(inGraph.num_edges()) res.append( round(inGraph.num_edges() / (2 * float(inGraph.num_vertices())), 2)) res.append(round(clustering.global_clustering(inGraph)[0], 3)) res.append(topology.pseudo_diameter(inGraph)[0]) res.append(graphRoles[graphName]) res.append(len(set(groupTaxa))) return res, headers
def metrics(file, use_cache=True): # use cache or recompute cache = os.path.splitext(file)[0] + ".json" if use_cache and os.path.isfile(cache): print('using cached metrics for', os.path.basename(file)) with open(cache, "r") as fp: return json.load(fp) print('computing metrics for', os.path.basename(file)) # read file g = load_graph(file) degrees = list(g.degree_property_map("out")) with open(file) as f: metalines = [next(f) for x in range(13)] # gather data metrics = {} metrics['file'] = os.path.basename(file) metrics['edges'] = int(metalines[5].split()[-1]) metrics['rounds'] = int(metalines[1].split()[-1]) metrics['max_degree'] = max(degrees) metrics['avg_degree'] = mean(degrees) metrics['min_degree'] = min(degrees) metrics['local_clustering'] = mean(local_clustering(g).get_array()) metrics['global_clustering'] = global_clustering(g)[0] metrics['pseudo_diameter'] = int(pseudo_diameter(g)[0]) fit = powerlaw.Fit(degrees, discrete=True, verbose=False) metrics['exponent'] = fit.alpha metrics['KS'] = fit.power_law.KS() metrics['x_min'] = fit.xmin with open(cache, "w") as fp: json.dump(metrics, fp) return metrics
def stats(g, name,rankfile=None): print('***** ' + name + ' *****') print('Directed: ', g.is_directed()) print('v count entire graph: ', g.num_vertices()) comp,hist = graph_tool.topology.label_components(g) print('Num C.C.: ', len(hist)) print('Largest C.C.: ', max(hist)) g = get_largest_cc(g) print('>>>>> Only largest C.C.') print('v count: ', g.num_vertices()) print('e count: ', g.num_edges()) print('pseudo-diameter: ', gt.pseudo_diameter(g)[0]) print('density: ', get_density(g)) print('global clust: ', graph_tool.clustering.global_clustering(g)[0]) deg = [x.out_degree() for x in g.vertices()] deg = sorted(deg) print('mean deg+: ', np.mean(deg)) print('std deg+: ', np.std(deg)) print('min deg+: ', deg[0]) print('max deg+: ', deg[-1]) plot_ccdf(deg) if rankfile is not None: rank(rankfile)
def f_pseudo_diameter( D, stats, options={ 'features': [] } ): """""" LC = label_largest_component(D) LCD = GraphView( D, vfilt=LC ) if 'diameter' in options['features']: if LCD.num_vertices() == 0 or LCD.num_vertices() == 1: # if largest component does practically not exist, use the whole graph dist, ends = pseudo_diameter(D) else: dist, ends = pseudo_diameter(LCD) stats['pseudo_diameter']=dist # D may be used in both cases stats['pseudo_diameter_src_vertex']=D.vertex_properties['name'][ends[0]] stats['pseudo_diameter_trg_vertex']=D.vertex_properties['name'][ends[1]] log.debug( 'done pseudo_diameter' )
def create_graph(N=100, nb_clusters=4): from graph_tool.topology import label_largest_component, pseudo_diameter is_connected = False nb_iter = 0 while not is_connected and nb_iter < N: cexp.fast_random_graph(N, .05) g = cexp.to_graph_tool() is_connected = label_largest_component(g).a.sum() == N cexp.turn_into_signed_graph_by_propagation(nb_clusters, .8) return g, int(pseudo_diameter(g)[0])
def clustering_tables(graphName, strategy=None): ''' Create the data for the two tables of Beta_CV and C-Index. ''' inGraph, groupTaxa, blackList, xTickMarks = cmp.prepare_input_graph( graphName, metric="default") gDiameter = topology.pseudo_diameter(inGraph)[0] ranks = False if blackList != None: print "BlackListed = " + str(blackList) allBetaCV = [] allCIndex = [] # for methodName in ["roleSim", "simRank", "heatSim", "spectralSim"]: for methodName in ["spectralSim"]: print methodName if cmp.is_spectral(methodName): energies = int(gDiameter) methodParams = [energies, 10, strategy] else: #non Spectra methods methodParams = "default" if graphName in ["E_Coli", "Carribean_FoodWeb", "Mapk" ] and methodName in ["heatSim", "heatSim_PP"]: distMatrix = cmp.execute_method(methodName, inGraph, graphName, distances=True, ranks=ranks, distFunction="canberra", methodParams=methodParams) else: distMatrix = cmp.execute_method(methodName, inGraph, graphName, distances=True, ranks=ranks, distFunction="default", methodParams=methodParams) allBetaCV.append( myUtils.inner_intra_distances(distMatrix, groupTaxa, blackList, ranks=ranks)) allCIndex.append( myUtils.clustering_c_index(distMatrix, groupTaxa, blackList)) #transform values relative to the second worst secondWorse = sorted(allBetaCV)[-2] relativeBetas = [((secondWorse - i) / float(i)) * 100 for i in allBetaCV] secondWorse = sorted(allCIndex)[-2] relativeCs = [((secondWorse - i) / float(i)) * 100 for i in allCIndex] return allBetaCV, relativeBetas, allCIndex, relativeCs
def graph_diameter(graph): largest_connected_component = graph_lcc(graph) return int(pseudo_diameter(largest_connected_component)[0])
def diameter(g, directed=None, weights=None, combine_weights="mean", is_connected=False): ''' Returns the diameter of the graph. .. versionchanged:: 2.3 Added `combine_weights` argument. .. versionchanged:: 2.0 Added `directed` and `is_connected` arguments. It returns infinity if the graph is not connected (strongly connected for directed graphs) unless `is_connected` is True, in which case it returns the longest existing shortest distance. Parameters ---------- g : :class:`~nngt.Graph` Graph to analyze. directed : bool, optional (default: ``g.is_directed()``) Whether to compute the directed diameter if the graph is directed. If False, then the graph is treated as undirected. The option switches to False automatically if `g` is undirected. weights : bool or str, optional (default: binary edges) Whether edge weights should be considered; if ``None`` or ``False`` then use binary edges; if ``True``, uses the 'weight' edge attribute, otherwise uses any valid edge attribute required. combine_weights : str, optional (default: 'mean') How to combine the weights of reciprocal edges if the graph is directed but `directed` is set to False. It can be: * "sum": the sum of the edge attribute values will be used for the new edge. * "mean": the mean of the edge attribute values will be used for the new edge. * "min": the minimum of the edge attribute values will be used for the new edge. * "max": the maximum of the edge attribute values will be used for the new edge. is_connected : bool, optional (default: False) If False, check whether the graph is connected or not and return infinite diameter if graph is unconnected. If True, the graph is assumed to be connected. See also -------- :func:`nngt.analysis.shortest_distance` References ---------- .. [gt-diameter] :gtdoc:`topology.pseudo_diameter` ''' g, graph, w = _get_gt_graph(g, directed, weights, combine_weights, return_all=True) w = _get_gt_weights(g, w) # first check whether the graph is fully connected ctype = "scc" if directed else "wcc" if not is_connected: cc, hist = connected_components(g, ctype) if len(hist) > 1: return np.inf return gtt.pseudo_diameter(graph, weights=w)[0]
def get_descriptors(network, short_name, nx_network, already_calculated=False): def _prefixToTitle(prefix): if prefix == 'a': return "Artists" elif prefix == 't': return "Tags" elif prefix == 'u': return 'Users' filename = "cache/{}.pickle".format(short_name) if os.path.isfile(filename): result = pickle.load(open(filename, 'rb')) return result result = {} prefix1, prefix2 = short_name[0], short_name[1] t1 = _prefixToTitle(prefix1) t2 = _prefixToTitle(prefix2) result['name'] = short_name result['title_dd1'] = PLOT_TITLES[short_name].format(t1, "") result['title_dd2'] = PLOT_TITLES[short_name].format(t2, "") result['title_dd1_acum'] = PLOT_TITLES[short_name].format( t1, " Cumulative") result['title_dd2_acum'] = PLOT_TITLES[short_name].format( t2, " Cumulative") result['title_wd'] = PLOT_TITLES['wd'].format("", t1, t2) result['title_wd_acum'] = PLOT_TITLES['wd'].format("Cumulative ", t1, t2) result['title_cd'] = PLOT_TITLES['cd'].format(t1, t2) result['title_sp'] = PLOT_TITLES['sp'].format(t1, t2) result['filename_dd'] = '{}_dd'.format(short_name) # degree input dist result['filename_ddl'] = '{}_dd_log'.format( short_name) # degree dist (log) result['filename_dd1'] = '{}_{}_dd'.format(short_name[0], short_name) # degree input dist result['filename_dd2'] = '{}_{}_dd'.format(short_name[1], short_name) # degree input dist result['filename_dd1l'] = '{}_{}_dd_log'.format( short_name[0], short_name) # degree input dist result['filename_dd2l'] = '{}_{}_dd_log'.format( short_name[1], short_name) # degree input dist result['filename_dd1_acum'] = '{}_{}_dd_acum'.format( short_name[0], short_name) # degree input dist result['filename_dd2_acum'] = '{}_{}_dd_acum'.format( short_name[1], short_name) # degree input dist result['filename_wd'] = '{}_wd'.format(short_name) # weight distribution result['filename_wdl'] = '{}_wd_log'.format( short_name) # weight distribution result['filename_wd_acum'] = '{}_wd_acum'.format( short_name) # weight distribution result['filename_sp'] = '{}_sp'.format(short_name) # shortest path result['filename_cd'] = '{}_cd'.format(short_name) # components result['filename_cdl'] = '{}_cd_log'.format(short_name) # nodes = network.get_vertices() edges = network.get_edges() result['num_nodes'] = {} result['num_nodes']['total'] = nodes.shape[0] result['num_edges'] = edges.shape[0] result['degree'] = {"total": {}, "prefix1": {}, "prefix2": {}} result['degree']["total"]['max'] = network.get_out_degrees(nodes).max() result['degree']["total"]['min'] = network.get_out_degrees(nodes).min() result['degree']["total"]['avg'] = network.get_out_degrees(nodes).mean() result['degree']["total"]["counts"], result['degree']["total"][ "bins"] = st.vertex_hist(network, "out") nodes1, nodes2 = [], [] for node in nodes: if prefix1 in network.vp['id'][node]: nodes1.append(node) elif prefix2 in network.vp['id'][node]: nodes2.append(node) result['num_nodes']['prefix1'] = len(nodes1) result['degree']["prefix1"]['max'] = network.get_out_degrees(nodes1).max() result['degree']["prefix1"]['min'] = network.get_out_degrees(nodes1).min() result['degree']["prefix1"]['avg'] = network.get_out_degrees(nodes1).mean() result['degree']["prefix1"]["counts"], result['degree']["prefix1"][ "bins"] = np.histogram( network.get_out_degrees(nodes1), bins=15) # result['degree']["total"]["bins"].shape[0] result['degree']["prefix1"]["d"] = network.get_out_degrees( nodes1) # result['degree']["total"]["bins"].shape[0] if prefix1 == prefix2: nodes2 = nodes1 result['num_nodes']['prefix2'] = len(nodes2) result['degree']["prefix2"]['max'] = network.get_out_degrees(nodes2).max() result['degree']["prefix2"]['min'] = network.get_out_degrees(nodes2).min() result['degree']["prefix2"]['avg'] = network.get_out_degrees(nodes2).mean() result['degree']["prefix2"]["counts"], result['degree']["prefix2"][ "bins"] = np.histogram(network.get_out_degrees(nodes2), bins=15) result['degree']["prefix2"]["d"] = network.get_out_degrees( nodes2) # result['degree']["total"]["bins"].shape[0] result['weights'] = {} weights = [] for v1, v2 in nx_network.edges(): weight = nx_network.get_edge_data(v1, v2)['weight'] weights.append(weight) # result['weights']['counts'], result['weights']['bins'] = np.histogram(weights, bins=8) result['weights']['d'] = weights # estimated diamater and longest path d, (v1, v2) = top.pseudo_diameter(network) result['diameter'] = d d_path = "{}-{}".format(network.vp['id'][v1], network.vp['id'][v2]) result['diameter_path'] = d_path result['clustering'] = clu.global_clustering(network) if not already_calculated: net2 = gt.Graph(network) # undirected version net2.set_directed(False) result['sp'] = {} result['sp']['counts'], result['sp']['bins'] = shortest_paths(net2) # connected components _, c2 = top.label_components(net2) result['components'] = {} result['components']['num'] = len(c2) result['components']['bins'] = range(len(c2)) result['components']['counts'] = c2 pickle.dump(result, open(filename, "wb")) return result
currentMethods = [ "panos_sim", "sim_rank", "role_sim", "vertex_sim", "refex_sim", "commute_time_dist" ] if __name__ == '__main__': for graphName in currentGraphs: print_fancy(graphName) v_measAllMethods = [] for methodName in currentMethods: print "\n\n" inGraph, trueClusters, blackList, xTickMarks = prepare_input_graph( graphName, metric="default", verbose=False) graphDiameter = int(topology.pseudo_diameter(inGraph)[0]) trueClustersSize = len(np.unique(trueClusters)) if methodName.startswith("panos"): methodParams = ["all", "small", False, None] # print methodParams else: methodParams = None try: if methodParams == None: res = load_data("../Output/" + methodName + "_" + graphName).next() else: inFile = "../Output/" + methodName + "_" + '_'.join( str(e) for e in methodParams) + "_" + graphName
def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph, full_partition: BlockState, sampled_graph_partition: BlockState, block_mapping: Dict[int, int], vertex_mapping: Dict[int, int], assignment: np.ndarray): """Evaluates the goodness of the samples. Parameters ---------- full_graph : Graph the full, unsampled Graph object sampled_graph : Graph the sampled graph full_partition : Partition the partitioning results on the full graph sampled_graph_partition : Partition the partitioning results on the sampled graph block_mapping : Dict[int, int] the mapping of blocks from the full graph to the sampled graph vertex_mapping : Dict[int, int] the mapping of vertices from the full graph to the sampled graph assignment : np.ndarray[int] the true vertex-to-community mapping """ ##### # General ##### self.sampled_graph_num_vertices = sampled_graph.num_vertices() self.sampled_graph_num_edges = sampled_graph.num_edges() self.blocks_retained = sampled_graph_partition.get_B( ) / full_partition.get_B() # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex)) self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0] self.full_graph_diameter = pseudo_diameter(full_graph)[0] for vertex in sampled_graph.vertices(): if (vertex.in_degree() + vertex.out_degree()) == 0: self.sampled_graph_island_vertices += 1 self.sampled_graph_largest_component = extract_largest_component( sampled_graph, directed=False).num_vertices() self.full_graph_largest_component = extract_largest_component( full_graph, directed=False).num_vertices() ###### # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762) ###### # Expansion factor = Neighbors of sample / size of sample # Maximum expansion factor = (size of graph - size of sample) / size of sample # Expansion quality = Neighbors of sample / (size of graph - size of sample) # Expansion quality = 1 means sample is at most 1 edge away from entire graph sampled_graph_vertices = set(vertex_mapping.keys()) neighbors = set() for vertex in sampled_graph_vertices: for neighbor in full_graph.get_out_neighbors(vertex): neighbors.add(neighbor) neighbors = neighbors - sampled_graph_vertices self.expansion_quality = len(neighbors) / ( full_graph.num_vertices() - sampled_graph.num_vertices()) ###### # Clustering coefficient ###### self.sampled_graph_clustering_coefficient = global_clustering( sampled_graph)[0] self.full_graph_clustering_coefficient = global_clustering( full_graph)[0] ###### # Info on communities ###### self.get_community_details( assignment, full_partition.get_blocks().get_array(), sampled_graph_partition.get_blocks().get_array(), vertex_mapping) if np.unique( assignment ).size == 1: # Cannot compute below metrics if no true partition is provided return ##### # % difference in ratio of within-block to between-block edges ##### sample_assignment = assignment[np.fromiter(vertex_mapping.keys(), dtype=np.int32)] true_sampled_graph_partition = partition_from_truth( sampled_graph, sample_assignment) sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix() self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal( ).sum() / sampled_graph_blockmatrix.sum() true_full_partition = partition_from_truth(full_graph, assignment) full_blockmatrix = true_full_partition.get_matrix() self.graph_edge_ratio = full_blockmatrix.diagonal().sum( ) / full_blockmatrix.sum() ##### # Normalized difference from ideal-block membership ##### membership_size = max(np.max(assignment), np.max(sample_assignment)) + 1 full_graph_membership_nums = np.zeros(membership_size) for block_membership in assignment: full_graph_membership_nums[block_membership] += 1 sampled_graph_membership_nums = np.zeros(membership_size) for block_membership in sample_assignment: sampled_graph_membership_nums[block_membership] += 1 ideal_block_membership_nums = full_graph_membership_nums * \ (sampled_graph.num_vertices() / full_graph.num_vertices()) difference_from_ideal_block_membership_nums = np.abs( ideal_block_membership_nums - sampled_graph_membership_nums) self.difference_from_ideal_sample = np.sum( difference_from_ideal_block_membership_nums / sampled_graph.num_vertices())