def graph_characteristics(graphName): ''' Return all the characteristics of a graph that we present in the paper. ''' storedFolder = roles.graph_folder(graphName) inGraph = graph_analysis.IO.load_data("../Data/Graphs/" + storedFolder + "/" + graphName + ".GT.graph").next() groupTaxa, blackList = roles.graph_node_clusters(graphName, inGraph, metric="default") res = [] headers = [ "Graph Name", "\#Nodes", "\#Edges", "Edge density", "Clustering Coef.", "Diameter", "Role Taxonomy", "\#Clusters" ] res.append(paper_graph_name(graphName)) res.append(inGraph.num_vertices()) res.append(inGraph.num_edges()) res.append( round(inGraph.num_edges() / (2 * float(inGraph.num_vertices())), 2)) res.append(round(clustering.global_clustering(inGraph)[0], 3)) res.append(topology.pseudo_diameter(inGraph)[0]) res.append(graphRoles[graphName]) res.append(len(set(groupTaxa))) return res, headers
def examine_graph(graph: Graph, experiment: str, graphname: str, real: bool, directed: bool = True) -> Properties: vertices = graph.num_vertices() edges = graph.num_edges() total_degrees = graph.get_total_degrees(np.arange(vertices)) min_degree = np.min(total_degrees) max_degree = np.max(total_degrees) avg_degree = vertex_average(graph, "total")[0] largest_component = extract_largest_component( graph, directed=False).num_vertices() num_islands = np.sum(total_degrees == 0) cc = global_clustering(graph)[0] # _degrees, _counts = np.unique(total_degrees, return_counts=True) # log_degrees = np.log(_degrees) # log_counts = np.log(_counts) # regressor = LinearRegression() # regressor.fit(log_degrees.reshape(-1, 1), log_counts) # exponent = regressor.coef_[0] result = powerlaw.Fit(total_degrees, xmin=1, discrete=True, xmax=max_degree) exponent = -result.alpha percentile = np.percentile(total_degrees, 95) # print("Exponent for this graph is: ", exponent) # print("Using powerlaw package: e = {} xmin = {} xmax = {}".format( # exponent2, result.xmin, result.xmax)) # print("degrees: {}\ncounts: {}".format(_degrees[:20], _counts[:20])) return Properties(experiment, graphname, real, vertices, edges, min_degree, max_degree, avg_degree, largest_component, num_islands, cc, directed, exponent, percentile)
def metrics(file, use_cache=True): # use cache or recompute cache = os.path.splitext(file)[0] + ".json" if use_cache and os.path.isfile(cache): print('using cached metrics for', os.path.basename(file)) with open(cache, "r") as fp: return json.load(fp) print('computing metrics for', os.path.basename(file)) # read file g = load_graph(file) degrees = list(g.degree_property_map("out")) with open(file) as f: metalines = [next(f) for x in range(13)] # gather data metrics = {} metrics['file'] = os.path.basename(file) metrics['edges'] = int(metalines[5].split()[-1]) metrics['rounds'] = int(metalines[1].split()[-1]) metrics['max_degree'] = max(degrees) metrics['avg_degree'] = mean(degrees) metrics['min_degree'] = min(degrees) metrics['local_clustering'] = mean(local_clustering(g).get_array()) metrics['global_clustering'] = global_clustering(g)[0] metrics['pseudo_diameter'] = int(pseudo_diameter(g)[0]) fit = powerlaw.Fit(degrees, discrete=True, verbose=False) metrics['exponent'] = fit.alpha metrics['KS'] = fit.power_law.KS() metrics['x_min'] = fit.xmin with open(cache, "w") as fp: json.dump(metrics, fp) return metrics
def calculate_measures(g, tmp_measures=None, measure_list=['BC', 'T', 'E']): if tmp_measures is None: tmp_measures = dict((k, []) for k in measure_list) tmp_measures['BC'].append(np.mean(gtc.betweenness(g)[0].get_array())) tmp_measures['T'].append(gtclust.global_clustering(g)[0]) tmp_measures['E'].append(np.mean(gtc.closeness(g,harmonic=True).get_array())) return tmp_measures
def calculate_measures(g, tmp_measures=None, measure_list=['BC', 'T', 'E']): if tmp_measures is None: tmp_measures = dict((k, []) for k in measure_list) tmp_measures['BC'].append(np.mean(gtc.betweenness(g)[0].get_array())) tmp_measures['T'].append(gtclust.global_clustering(g)[0]) tmp_measures['E'].append( np.mean(gtc.closeness(g, harmonic=True).get_array())) return tmp_measures
def f_global_clustering(U, stats, options={ 'features': [], 'skip_features': [] }): """""" if not 'global_clustering' in options['features'] or ( 'skip_features' in options and 'global_clustering' in options['skip_features']): log.debug('Skipping global_clustering') return stats['global_clustering'] = global_clustering(U)[0] log.debug('done global_clustering')
def global_clustering_binary_undirected(g): ''' Returns the undirected global clustering coefficient. This corresponds to the ratio of undirected triangles to the number of undirected triads. Parameters ---------- g : :class:`~nngt.Graph` Graph to analyze. References ---------- .. [gt-global-clustering] :gtdoc:`clustering.global_clustering` ''' # use undirected graph view, filter parallel edges u = GraphView(g.graph, directed=False) u = GraphView(u, efilt=label_parallel_edges(u).fa == 0) return gtc.global_clustering(u, weight=None)[0]
def global_clustering_coeff(g): return global_clustering(g)[0]
def get_descriptors(network, short_name, nx_network, already_calculated=False): def _prefixToTitle(prefix): if prefix == 'a': return "Artists" elif prefix == 't': return "Tags" elif prefix == 'u': return 'Users' filename = "cache/{}.pickle".format(short_name) if os.path.isfile(filename): result = pickle.load(open(filename, 'rb')) return result result = {} prefix1, prefix2 = short_name[0], short_name[1] t1 = _prefixToTitle(prefix1) t2 = _prefixToTitle(prefix2) result['name'] = short_name result['title_dd1'] = PLOT_TITLES[short_name].format(t1, "") result['title_dd2'] = PLOT_TITLES[short_name].format(t2, "") result['title_dd1_acum'] = PLOT_TITLES[short_name].format( t1, " Cumulative") result['title_dd2_acum'] = PLOT_TITLES[short_name].format( t2, " Cumulative") result['title_wd'] = PLOT_TITLES['wd'].format("", t1, t2) result['title_wd_acum'] = PLOT_TITLES['wd'].format("Cumulative ", t1, t2) result['title_cd'] = PLOT_TITLES['cd'].format(t1, t2) result['title_sp'] = PLOT_TITLES['sp'].format(t1, t2) result['filename_dd'] = '{}_dd'.format(short_name) # degree input dist result['filename_ddl'] = '{}_dd_log'.format( short_name) # degree dist (log) result['filename_dd1'] = '{}_{}_dd'.format(short_name[0], short_name) # degree input dist result['filename_dd2'] = '{}_{}_dd'.format(short_name[1], short_name) # degree input dist result['filename_dd1l'] = '{}_{}_dd_log'.format( short_name[0], short_name) # degree input dist result['filename_dd2l'] = '{}_{}_dd_log'.format( short_name[1], short_name) # degree input dist result['filename_dd1_acum'] = '{}_{}_dd_acum'.format( short_name[0], short_name) # degree input dist result['filename_dd2_acum'] = '{}_{}_dd_acum'.format( short_name[1], short_name) # degree input dist result['filename_wd'] = '{}_wd'.format(short_name) # weight distribution result['filename_wdl'] = '{}_wd_log'.format( short_name) # weight distribution result['filename_wd_acum'] = '{}_wd_acum'.format( short_name) # weight distribution result['filename_sp'] = '{}_sp'.format(short_name) # shortest path result['filename_cd'] = '{}_cd'.format(short_name) # components result['filename_cdl'] = '{}_cd_log'.format(short_name) # nodes = network.get_vertices() edges = network.get_edges() result['num_nodes'] = {} result['num_nodes']['total'] = nodes.shape[0] result['num_edges'] = edges.shape[0] result['degree'] = {"total": {}, "prefix1": {}, "prefix2": {}} result['degree']["total"]['max'] = network.get_out_degrees(nodes).max() result['degree']["total"]['min'] = network.get_out_degrees(nodes).min() result['degree']["total"]['avg'] = network.get_out_degrees(nodes).mean() result['degree']["total"]["counts"], result['degree']["total"][ "bins"] = st.vertex_hist(network, "out") nodes1, nodes2 = [], [] for node in nodes: if prefix1 in network.vp['id'][node]: nodes1.append(node) elif prefix2 in network.vp['id'][node]: nodes2.append(node) result['num_nodes']['prefix1'] = len(nodes1) result['degree']["prefix1"]['max'] = network.get_out_degrees(nodes1).max() result['degree']["prefix1"]['min'] = network.get_out_degrees(nodes1).min() result['degree']["prefix1"]['avg'] = network.get_out_degrees(nodes1).mean() result['degree']["prefix1"]["counts"], result['degree']["prefix1"][ "bins"] = np.histogram( network.get_out_degrees(nodes1), bins=15) # result['degree']["total"]["bins"].shape[0] result['degree']["prefix1"]["d"] = network.get_out_degrees( nodes1) # result['degree']["total"]["bins"].shape[0] if prefix1 == prefix2: nodes2 = nodes1 result['num_nodes']['prefix2'] = len(nodes2) result['degree']["prefix2"]['max'] = network.get_out_degrees(nodes2).max() result['degree']["prefix2"]['min'] = network.get_out_degrees(nodes2).min() result['degree']["prefix2"]['avg'] = network.get_out_degrees(nodes2).mean() result['degree']["prefix2"]["counts"], result['degree']["prefix2"][ "bins"] = np.histogram(network.get_out_degrees(nodes2), bins=15) result['degree']["prefix2"]["d"] = network.get_out_degrees( nodes2) # result['degree']["total"]["bins"].shape[0] result['weights'] = {} weights = [] for v1, v2 in nx_network.edges(): weight = nx_network.get_edge_data(v1, v2)['weight'] weights.append(weight) # result['weights']['counts'], result['weights']['bins'] = np.histogram(weights, bins=8) result['weights']['d'] = weights # estimated diamater and longest path d, (v1, v2) = top.pseudo_diameter(network) result['diameter'] = d d_path = "{}-{}".format(network.vp['id'][v1], network.vp['id'][v2]) result['diameter_path'] = d_path result['clustering'] = clu.global_clustering(network) if not already_calculated: net2 = gt.Graph(network) # undirected version net2.set_directed(False) result['sp'] = {} result['sp']['counts'], result['sp']['bins'] = shortest_paths(net2) # connected components _, c2 = top.label_components(net2) result['components'] = {} result['components']['num'] = len(c2) result['components']['bins'] = range(len(c2)) result['components']['counts'] = c2 pickle.dump(result, open(filename, "wb")) return result
def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph, full_partition: BlockState, sampled_graph_partition: BlockState, block_mapping: Dict[int, int], vertex_mapping: Dict[int, int], assignment: np.ndarray): """Evaluates the goodness of the samples. Parameters ---------- full_graph : Graph the full, unsampled Graph object sampled_graph : Graph the sampled graph full_partition : Partition the partitioning results on the full graph sampled_graph_partition : Partition the partitioning results on the sampled graph block_mapping : Dict[int, int] the mapping of blocks from the full graph to the sampled graph vertex_mapping : Dict[int, int] the mapping of vertices from the full graph to the sampled graph assignment : np.ndarray[int] the true vertex-to-community mapping """ ##### # General ##### self.sampled_graph_num_vertices = sampled_graph.num_vertices() self.sampled_graph_num_edges = sampled_graph.num_edges() self.blocks_retained = sampled_graph_partition.get_B( ) / full_partition.get_B() # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex)) self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0] self.full_graph_diameter = pseudo_diameter(full_graph)[0] for vertex in sampled_graph.vertices(): if (vertex.in_degree() + vertex.out_degree()) == 0: self.sampled_graph_island_vertices += 1 self.sampled_graph_largest_component = extract_largest_component( sampled_graph, directed=False).num_vertices() self.full_graph_largest_component = extract_largest_component( full_graph, directed=False).num_vertices() ###### # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762) ###### # Expansion factor = Neighbors of sample / size of sample # Maximum expansion factor = (size of graph - size of sample) / size of sample # Expansion quality = Neighbors of sample / (size of graph - size of sample) # Expansion quality = 1 means sample is at most 1 edge away from entire graph sampled_graph_vertices = set(vertex_mapping.keys()) neighbors = set() for vertex in sampled_graph_vertices: for neighbor in full_graph.get_out_neighbors(vertex): neighbors.add(neighbor) neighbors = neighbors - sampled_graph_vertices self.expansion_quality = len(neighbors) / ( full_graph.num_vertices() - sampled_graph.num_vertices()) ###### # Clustering coefficient ###### self.sampled_graph_clustering_coefficient = global_clustering( sampled_graph)[0] self.full_graph_clustering_coefficient = global_clustering( full_graph)[0] ###### # Info on communities ###### self.get_community_details( assignment, full_partition.get_blocks().get_array(), sampled_graph_partition.get_blocks().get_array(), vertex_mapping) if np.unique( assignment ).size == 1: # Cannot compute below metrics if no true partition is provided return ##### # % difference in ratio of within-block to between-block edges ##### sample_assignment = assignment[np.fromiter(vertex_mapping.keys(), dtype=np.int32)] true_sampled_graph_partition = partition_from_truth( sampled_graph, sample_assignment) sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix() self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal( ).sum() / sampled_graph_blockmatrix.sum() true_full_partition = partition_from_truth(full_graph, assignment) full_blockmatrix = true_full_partition.get_matrix() self.graph_edge_ratio = full_blockmatrix.diagonal().sum( ) / full_blockmatrix.sum() ##### # Normalized difference from ideal-block membership ##### membership_size = max(np.max(assignment), np.max(sample_assignment)) + 1 full_graph_membership_nums = np.zeros(membership_size) for block_membership in assignment: full_graph_membership_nums[block_membership] += 1 sampled_graph_membership_nums = np.zeros(membership_size) for block_membership in sample_assignment: sampled_graph_membership_nums[block_membership] += 1 ideal_block_membership_nums = full_graph_membership_nums * \ (sampled_graph.num_vertices() / full_graph.num_vertices()) difference_from_ideal_block_membership_nums = np.abs( ideal_block_membership_nums - sampled_graph_membership_nums) self.difference_from_ideal_sample = np.sum( difference_from_ideal_block_membership_nums / sampled_graph.num_vertices())
def _get_clustering_coefficient(G): '''Return the clustering coefficient :math:`C(G)`.''' return global_clustering(G)[0]