def clustering_coefficient(self, graph: Graph) -> float: """Calculates the clustering coefficient of a given graph. Clustering coefficient = number of closed triangles / total possible number of triangles. Current version also counts self-connections as triangles as well. Parameters --------- graph : Graph the graph whose clustering coefficient is of interest Returns ------- clustering_coefficient : float the clustering coefficient of said graph """ n_triangles_sample = 0 for vertex in range(graph.num_vertices()): for neighbor in graph.get_out_neighbors(vertex): for neighbor2 in graph.get_out_neighbors(vertex): # TODO: If not counting self-links, add check for that here if neighbor2 in graph.get_out_neighbors(neighbor): n_triangles_sample += 1 return n_triangles_sample / (graph.num_vertices() * (graph.num_vertices() - 1))
def random_walk_sample(graph: Graph, num_vertices: int, prev_state: RandomWalkSampleState, args: argparse.Namespace) -> SampleState: """Random walk sampling. Start from a vertex and walk along the edges, sampling every vertex that is a part of the walk. With a probability of 0.15, restart the walk from the original vertex. To prevent getting stuck, after making N attempts, where N = the target number of vertices in the sample, change the starting vertex to a random vertex. Parameters ---------- graph : Graph the filtered graph from which to sample vertices num_vertices : int number of vertices in the unfiltered graph prev_state : RandomWalkSampleState the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object should be passed in here. args : argparse.Namespace the command-line arguments provided by the user Returns ------- state : SampleState the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have to be mapped back to the unfiltered graph) """ state = RandomWalkSampleState(graph.num_vertices(), prev_state) sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) sample_num += len(state.sample_idx) num_tries = 0 start = np.random.randint(sample_num) # start with a random vertex vertex = start while len(state.index_set) == 0 or len( state.index_set) % sample_num != 0: num_tries += 1 if not state.sampled_marker[vertex]: state.index_set.append(vertex) state.sampled_marker[vertex] = True if num_tries % sample_num == 0: # If the number of tries is large, restart from new random vertex start = np.random.randint(sample_num) vertex = start num_tries = 0 elif np.random.random( ) < 0.15: # With a probability of 0.15, restart at original node vertex = start elif len( graph.get_out_neighbors(vertex) ) > 0: # If the vertex has out neighbors, go to one of them vertex = np.random.choice(graph.get_out_neighbors(vertex)) else: # Otherwise, restart from the original vertex if len( graph.get_out_neighbors(start) ) == 0: # if original vertex has no out neighbors, change it start = np.random.randint(sample_num) vertex = start state.sample_idx = np.asarray(state.index_set) return state
def calc_random_spanning_tree(q: gt.Graph): curr_node = np.random.choice(list(q.vertices())) # root_node = curr_node nodes_visited = {curr_node} edges_used = set() n_nodes = q.num_vertices() while len(nodes_visited) < n_nodes: # Choose a random neighbour next_node = np.random.choice(q.get_out_neighbors(curr_node)) if next_node not in nodes_visited: edges_used.add((curr_node, next_node)) nodes_visited.add(next_node) curr_node = next_node t = construct_gt_graph(nodes_visited, edges_used, q) return t
def random_node_neighbor_sample(graph: Graph, num_vertices: int, prev_state: RandomNodeNeighborSampleState, args: argparse.Namespace) -> SampleState: """Random node neighbor sampling. Whenever a single vertex is selected, all its out neighbors are selected as well. Parameters ---------- graph : Graph the filtered graph from which to sample vertices num_vertices : int number of vertices in the unfiltered graph prev_state : UniformRandomSampleState the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object should be passed in here. args : argparse.Namespace the command-line arguments provided by the user Returns ------- state : SampleState the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have to be mapped back to the unfiltered graph) """ state = RandomNodeNeighborSampleState(graph.num_vertices(), prev_state) sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) choices = np.setdiff1d(np.asarray(range(graph.num_vertices())), state.sample_idx) random_samples = np.random.choice(choices, sample_num, replace=False) sample_num += len(state.sample_idx) for vertex in random_samples: if not state.sampled_marker[vertex]: state.index_set.append(vertex) state.sampled_marker[vertex] = True for neighbor in graph.get_out_neighbors(vertex): if not state.sampled_marker[neighbor]: state.index_set.append(neighbor) state.sampled_marker[neighbor] = True if len(state.index_set) >= sample_num: break state.sample_idx = np.asarray(state.index_set[:sample_num]) return state
def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph, full_partition: BlockState, sampled_graph_partition: BlockState, block_mapping: Dict[int, int], vertex_mapping: Dict[int, int], assignment: np.ndarray): """Evaluates the goodness of the samples. Parameters ---------- full_graph : Graph the full, unsampled Graph object sampled_graph : Graph the sampled graph full_partition : Partition the partitioning results on the full graph sampled_graph_partition : Partition the partitioning results on the sampled graph block_mapping : Dict[int, int] the mapping of blocks from the full graph to the sampled graph vertex_mapping : Dict[int, int] the mapping of vertices from the full graph to the sampled graph assignment : np.ndarray[int] the true vertex-to-community mapping """ ##### # General ##### self.sampled_graph_num_vertices = sampled_graph.num_vertices() self.sampled_graph_num_edges = sampled_graph.num_edges() self.blocks_retained = sampled_graph_partition.get_B( ) / full_partition.get_B() # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex)) self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0] self.full_graph_diameter = pseudo_diameter(full_graph)[0] for vertex in sampled_graph.vertices(): if (vertex.in_degree() + vertex.out_degree()) == 0: self.sampled_graph_island_vertices += 1 self.sampled_graph_largest_component = extract_largest_component( sampled_graph, directed=False).num_vertices() self.full_graph_largest_component = extract_largest_component( full_graph, directed=False).num_vertices() ###### # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762) ###### # Expansion factor = Neighbors of sample / size of sample # Maximum expansion factor = (size of graph - size of sample) / size of sample # Expansion quality = Neighbors of sample / (size of graph - size of sample) # Expansion quality = 1 means sample is at most 1 edge away from entire graph sampled_graph_vertices = set(vertex_mapping.keys()) neighbors = set() for vertex in sampled_graph_vertices: for neighbor in full_graph.get_out_neighbors(vertex): neighbors.add(neighbor) neighbors = neighbors - sampled_graph_vertices self.expansion_quality = len(neighbors) / ( full_graph.num_vertices() - sampled_graph.num_vertices()) ###### # Clustering coefficient ###### self.sampled_graph_clustering_coefficient = global_clustering( sampled_graph)[0] self.full_graph_clustering_coefficient = global_clustering( full_graph)[0] ###### # Info on communities ###### self.get_community_details( assignment, full_partition.get_blocks().get_array(), sampled_graph_partition.get_blocks().get_array(), vertex_mapping) if np.unique( assignment ).size == 1: # Cannot compute below metrics if no true partition is provided return ##### # % difference in ratio of within-block to between-block edges ##### sample_assignment = assignment[np.fromiter(vertex_mapping.keys(), dtype=np.int32)] true_sampled_graph_partition = partition_from_truth( sampled_graph, sample_assignment) sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix() self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal( ).sum() / sampled_graph_blockmatrix.sum() true_full_partition = partition_from_truth(full_graph, assignment) full_blockmatrix = true_full_partition.get_matrix() self.graph_edge_ratio = full_blockmatrix.diagonal().sum( ) / full_blockmatrix.sum() ##### # Normalized difference from ideal-block membership ##### membership_size = max(np.max(assignment), np.max(sample_assignment)) + 1 full_graph_membership_nums = np.zeros(membership_size) for block_membership in assignment: full_graph_membership_nums[block_membership] += 1 sampled_graph_membership_nums = np.zeros(membership_size) for block_membership in sample_assignment: sampled_graph_membership_nums[block_membership] += 1 ideal_block_membership_nums = full_graph_membership_nums * \ (sampled_graph.num_vertices() / full_graph.num_vertices()) difference_from_ideal_block_membership_nums = np.abs( ideal_block_membership_nums - sampled_graph_membership_nums) self.difference_from_ideal_sample = np.sum( difference_from_ideal_block_membership_nums / sampled_graph.num_vertices())
def expansion_snowball_sample(graph: Graph, num_vertices: int, prev_state: ExpansionSnowballSampleState, args: argparse.Namespace) -> SampleState: """Expansion snowball sampling. At every iteration, picks a vertex adjacent to the current sample that contributes the most new neighbors. Parameters ---------- graph : Graph the filtered graph from which to sample vertices num_vertices : int number of vertices in the unfiltered graph prev_state : UniformRandomSampleState the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object should be passed in here. args : argparse.Namespace the command-line arguments provided by the user Returns ------- state : SampleState the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have to be mapped back to the unfiltered graph) """ state = ExpansionSnowballSampleState(graph.num_vertices(), prev_state) sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) sample_num += len(state.sample_idx) if not state.neighbors: # If there are no neighbors, start with the state.start vertex state.index_flag[state.start] = True state.neighbors = set(graph.get_out_neighbors(state.start)) for neighbor in graph.get_out_neighbors(state.start): if neighbor == state.start: state.neighbors.remove(neighbor) else: state.neighbors_flag[neighbor] = True new_neighbors = 0 for _neighbor in graph.get_out_neighbors(neighbor): if not (state.index_flag[_neighbor] or state.neighbors_flag[_neighbor]): new_neighbors += 1 state.contribution[neighbor] += new_neighbors while len(state.index_set) == 0 or len( state.index_set) % sample_num != 0: if len(state.neighbors ) == 0: # choose random vertex not in index set vertex = np.random.choice( np.setxor1d(np.arange(graph.num_vertices()), state.index_set)) state.index_set.append(vertex) state.index_flag[vertex] = True for neighbor in graph.get_out_neighbors(vertex): if not (state.neighbors_flag[neighbor] or state.index_flag[neighbor]): Sample._add_neighbor(neighbor, state.contribution, state.index_flag, state.neighbors_flag, graph.get_out_neighbors(neighbor), graph.get_in_neighbors(neighbor), state.neighbors) continue elif np.max(state.contribution ) == 0: # choose random neighbors from neighbor set num_choices = min(len(state.neighbors), sample_num - len(state.index_set)) vertices = np.random.choice(np.fromiter( state.neighbors, int, len(state.neighbors)), num_choices, replace=False) for vertex in vertices: state.index_set.append(vertex) state.index_flag[vertex] = True state.neighbors.remove(vertex) for neighbor in graph.get_out_neighbors(vertex): if not (state.neighbors_flag[neighbor] or state.index_flag[neighbor]): Sample._add_neighbor( neighbor, state.contribution, state.index_flag, state.neighbors_flag, graph.get_out_neighbors(neighbor), graph.get_in_neighbors(neighbor), state.neighbors) continue vertex = np.argmax(state.contribution) state.index_set.append(vertex) state.index_flag[vertex] = True state.neighbors.remove(vertex) state.contribution[vertex] = 0 for neighbor in graph.get_in_neighbors(vertex): if not (state.neighbors_flag[neighbor] or state.index_flag[neighbor]): Sample._add_neighbor(neighbor, state.contribution, state.index_flag, state.neighbors_flag, graph.get_out_neighbors(neighbor), graph.get_in_neighbors(neighbor), state.neighbors) state.sample_idx = np.asarray(state.index_set) return state
def forest_fire_sample(graph: Graph, num_vertices: int, prev_state: ForestFireSampleState, args: argparse.Namespace) -> SampleState: """Forest-fire sampling with forward probability = 0.7. At every stage, select 70% of the neighbors of the current sample. Vertices that were not selected are 'blacklisted', and no longer viable for future selection. If all vertices are thus 'burnt' before the target number of vertices has been selected, restart sampling from a new starting vertex. Parameters ---------- graph : Graph the filtered graph from which to sample vertices num_vertices : int number of vertices in the unfiltered graph prev_state : UniformRandomSampleState the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object should be passed in here. args : argparse.Namespace the command-line arguments provided by the user Returns ------- state : SampleState the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have to be mapped back to the unfiltered graph) """ state = ForestFireSampleState(graph.num_vertices(), prev_state) sample_num = int( (num_vertices * (args.sample_size / 100)) / args.sample_iterations) sample_num += len(state.sample_idx) while len(state.index_set) == 0 or len( state.index_set) % sample_num != 0: for vertex in state.current_fire_front: # add vertex to index set if not state.sampled_marker[vertex]: state.sampled_marker[vertex] = True state.burnt_marker[vertex] = True state.num_burnt += 1 state.index_set.append(vertex) # select edges to burn num_to_choose = np.random.geometric(0.7) out_neighbors = graph.get_out_neighbors(vertex) if len(out_neighbors ) < 1: # If there are no outgoing neighbors continue if len(out_neighbors) <= num_to_choose: num_to_choose = len(out_neighbors) mask = np.zeros(len(out_neighbors)) indexes = np.random.choice(np.arange(len(out_neighbors)), num_to_choose, replace=False) mask[indexes] = 1 for index, value in enumerate(mask): neighbor = out_neighbors[index] if value == 1: # if chosen, add to next frontier if not state.burnt_marker[neighbor]: state.next_fire_front.append(neighbor) state.burnt_marker[ neighbor] = True # mark all neighbors as visited if state.num_burnt == graph.num_vertices( ): # all samples are burnt, restart state.num_burnt = 0 state.burnt_marker = [False] * graph.num_vertices() state.current_fire_front = [ np.random.randint(graph.num_vertices()) ] state.next_fire_front = list() continue if len(state.next_fire_front) == 0: # if fire is burnt-out state.current_fire_front = [ np.random.randint(graph.num_vertices()) ] else: state.current_fire_front = list(state.next_fire_front) state.next_fire_front = list() state.sample_idx = np.asarray(state.index_set[:sample_num]) return state