Пример #1
0
    def clustering_coefficient(self, graph: Graph) -> float:
        """Calculates the clustering coefficient of a given graph.

        Clustering coefficient = number of closed triangles / total possible number of triangles.

        Current version also counts self-connections as triangles as well.

            Parameters
            ---------
            graph : Graph
                the graph whose clustering coefficient is of interest

            Returns
            -------
            clustering_coefficient : float
                the clustering coefficient of said graph
        """
        n_triangles_sample = 0
        for vertex in range(graph.num_vertices()):
            for neighbor in graph.get_out_neighbors(vertex):
                for neighbor2 in graph.get_out_neighbors(vertex):
                    # TODO: If not counting self-links, add check for that here
                    if neighbor2 in graph.get_out_neighbors(neighbor):
                        n_triangles_sample += 1
        return n_triangles_sample / (graph.num_vertices() *
                                     (graph.num_vertices() - 1))
Пример #2
0
    def random_walk_sample(graph: Graph, num_vertices: int,
                           prev_state: RandomWalkSampleState,
                           args: argparse.Namespace) -> SampleState:
        """Random walk sampling. Start from a vertex and walk along the edges, sampling every vertex that is a part of
        the walk. With a probability of 0.15, restart the walk from the original vertex. To prevent getting stuck,
        after making N attempts, where N = the target number of vertices in the sample, change the starting vertex to a
        random vertex.

        Parameters
        ----------
        graph : Graph
            the filtered graph from which to sample vertices
        num_vertices : int
            number of vertices in the unfiltered graph
        prev_state : RandomWalkSampleState
            the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object
            should be passed in here.
        args : argparse.Namespace
            the command-line arguments provided by the user

        Returns
        -------
        state : SampleState
            the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have
            to be mapped back to the unfiltered graph)
        """
        state = RandomWalkSampleState(graph.num_vertices(), prev_state)
        sample_num = int(
            (num_vertices * (args.sample_size / 100)) / args.sample_iterations)
        sample_num += len(state.sample_idx)
        num_tries = 0
        start = np.random.randint(sample_num)  # start with a random vertex
        vertex = start

        while len(state.index_set) == 0 or len(
                state.index_set) % sample_num != 0:
            num_tries += 1
            if not state.sampled_marker[vertex]:
                state.index_set.append(vertex)
                state.sampled_marker[vertex] = True
            if num_tries % sample_num == 0:  # If the number of tries is large, restart from new random vertex
                start = np.random.randint(sample_num)
                vertex = start
                num_tries = 0
            elif np.random.random(
            ) < 0.15:  # With a probability of 0.15, restart at original node
                vertex = start
            elif len(
                    graph.get_out_neighbors(vertex)
            ) > 0:  # If the vertex has out neighbors, go to one of them
                vertex = np.random.choice(graph.get_out_neighbors(vertex))
            else:  # Otherwise, restart from the original vertex
                if len(
                        graph.get_out_neighbors(start)
                ) == 0:  # if original vertex has no out neighbors, change it
                    start = np.random.randint(sample_num)
                vertex = start

        state.sample_idx = np.asarray(state.index_set)
        return state
Пример #3
0
def calc_random_spanning_tree(q: gt.Graph):
    curr_node = np.random.choice(list(q.vertices()))
    # root_node = curr_node
    nodes_visited = {curr_node}
    edges_used = set()
    n_nodes = q.num_vertices()

    while len(nodes_visited) < n_nodes:
        # Choose a random neighbour
        next_node = np.random.choice(q.get_out_neighbors(curr_node))
        if next_node not in nodes_visited:
            edges_used.add((curr_node, next_node))
            nodes_visited.add(next_node)
        curr_node = next_node

    t = construct_gt_graph(nodes_visited, edges_used, q)
    return t
Пример #4
0
    def random_node_neighbor_sample(graph: Graph, num_vertices: int,
                                    prev_state: RandomNodeNeighborSampleState,
                                    args: argparse.Namespace) -> SampleState:
        """Random node neighbor sampling. Whenever a single vertex is selected, all its out neighbors are selected
        as well.

        Parameters
        ----------
        graph : Graph
            the filtered graph from which to sample vertices
        num_vertices : int
            number of vertices in the unfiltered graph
        prev_state : UniformRandomSampleState
            the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object
            should be passed in here.
        args : argparse.Namespace
            the command-line arguments provided by the user

        Returns
        -------
        state : SampleState
            the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have
            to be mapped back to the unfiltered graph)
        """
        state = RandomNodeNeighborSampleState(graph.num_vertices(), prev_state)
        sample_num = int(
            (num_vertices * (args.sample_size / 100)) / args.sample_iterations)
        choices = np.setdiff1d(np.asarray(range(graph.num_vertices())),
                               state.sample_idx)
        random_samples = np.random.choice(choices, sample_num, replace=False)
        sample_num += len(state.sample_idx)
        for vertex in random_samples:
            if not state.sampled_marker[vertex]:
                state.index_set.append(vertex)
                state.sampled_marker[vertex] = True
            for neighbor in graph.get_out_neighbors(vertex):
                if not state.sampled_marker[neighbor]:
                    state.index_set.append(neighbor)
                    state.sampled_marker[neighbor] = True
            if len(state.index_set) >= sample_num:
                break
        state.sample_idx = np.asarray(state.index_set[:sample_num])
        return state
Пример #5
0
    def evaluate_sampling(self, full_graph: Graph, sampled_graph: Graph,
                          full_partition: BlockState,
                          sampled_graph_partition: BlockState,
                          block_mapping: Dict[int, int],
                          vertex_mapping: Dict[int,
                                               int], assignment: np.ndarray):
        """Evaluates the goodness of the samples.

        Parameters
        ----------
        full_graph : Graph
            the full, unsampled Graph object
        sampled_graph : Graph
            the sampled graph
        full_partition : Partition
            the partitioning results on the full graph
        sampled_graph_partition : Partition
            the partitioning results on the sampled graph
        block_mapping : Dict[int, int]
            the mapping of blocks from the full graph to the sampled graph
        vertex_mapping : Dict[int, int]
            the mapping of vertices from the full graph to the sampled graph
        assignment : np.ndarray[int]
            the true vertex-to-community mapping
        """
        #####
        # General
        #####
        self.sampled_graph_num_vertices = sampled_graph.num_vertices()
        self.sampled_graph_num_edges = sampled_graph.num_edges()
        self.blocks_retained = sampled_graph_partition.get_B(
        ) / full_partition.get_B()
        # pseudo_diameter returns a tuple: (diameter, (start_vertex, end_vertex))
        self.sampled_graph_diameter = pseudo_diameter(sampled_graph)[0]
        self.full_graph_diameter = pseudo_diameter(full_graph)[0]
        for vertex in sampled_graph.vertices():
            if (vertex.in_degree() + vertex.out_degree()) == 0:
                self.sampled_graph_island_vertices += 1
        self.sampled_graph_largest_component = extract_largest_component(
            sampled_graph, directed=False).num_vertices()
        self.full_graph_largest_component = extract_largest_component(
            full_graph, directed=False).num_vertices()

        ######
        # Expansion quality (http://portal.acm.org/citation.cfm?doid=1772690.1772762)
        ######
        # Expansion factor = Neighbors of sample / size of sample
        # Maximum expansion factor = (size of graph - size of sample) / size of sample
        # Expansion quality = Neighbors of sample / (size of graph - size of sample)
        # Expansion quality = 1 means sample is at most 1 edge away from entire graph
        sampled_graph_vertices = set(vertex_mapping.keys())
        neighbors = set()
        for vertex in sampled_graph_vertices:
            for neighbor in full_graph.get_out_neighbors(vertex):
                neighbors.add(neighbor)
        neighbors = neighbors - sampled_graph_vertices
        self.expansion_quality = len(neighbors) / (
            full_graph.num_vertices() - sampled_graph.num_vertices())

        ######
        # Clustering coefficient
        ######
        self.sampled_graph_clustering_coefficient = global_clustering(
            sampled_graph)[0]
        self.full_graph_clustering_coefficient = global_clustering(
            full_graph)[0]

        ######
        # Info on communities
        ######
        self.get_community_details(
            assignment,
            full_partition.get_blocks().get_array(),
            sampled_graph_partition.get_blocks().get_array(), vertex_mapping)

        if np.unique(
                assignment
        ).size == 1:  # Cannot compute below metrics if no true partition is provided
            return

        #####
        # % difference in ratio of within-block to between-block edges
        #####
        sample_assignment = assignment[np.fromiter(vertex_mapping.keys(),
                                                   dtype=np.int32)]
        true_sampled_graph_partition = partition_from_truth(
            sampled_graph, sample_assignment)
        sampled_graph_blockmatrix = true_sampled_graph_partition.get_matrix()
        self.sampled_graph_edge_ratio = sampled_graph_blockmatrix.diagonal(
        ).sum() / sampled_graph_blockmatrix.sum()
        true_full_partition = partition_from_truth(full_graph, assignment)
        full_blockmatrix = true_full_partition.get_matrix()
        self.graph_edge_ratio = full_blockmatrix.diagonal().sum(
        ) / full_blockmatrix.sum()

        #####
        # Normalized difference from ideal-block membership
        #####
        membership_size = max(np.max(assignment),
                              np.max(sample_assignment)) + 1
        full_graph_membership_nums = np.zeros(membership_size)
        for block_membership in assignment:
            full_graph_membership_nums[block_membership] += 1
        sampled_graph_membership_nums = np.zeros(membership_size)
        for block_membership in sample_assignment:
            sampled_graph_membership_nums[block_membership] += 1
        ideal_block_membership_nums = full_graph_membership_nums * \
            (sampled_graph.num_vertices() / full_graph.num_vertices())
        difference_from_ideal_block_membership_nums = np.abs(
            ideal_block_membership_nums - sampled_graph_membership_nums)
        self.difference_from_ideal_sample = np.sum(
            difference_from_ideal_block_membership_nums /
            sampled_graph.num_vertices())
Пример #6
0
    def expansion_snowball_sample(graph: Graph, num_vertices: int,
                                  prev_state: ExpansionSnowballSampleState,
                                  args: argparse.Namespace) -> SampleState:
        """Expansion snowball sampling. At every iteration, picks a vertex adjacent to the current sample that
        contributes the most new neighbors.

        Parameters
        ----------
        graph : Graph
            the filtered graph from which to sample vertices
        num_vertices : int
            number of vertices in the unfiltered graph
        prev_state : UniformRandomSampleState
            the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object
            should be passed in here.
        args : argparse.Namespace
            the command-line arguments provided by the user

        Returns
        -------
        state : SampleState
            the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have
            to be mapped back to the unfiltered graph)
        """
        state = ExpansionSnowballSampleState(graph.num_vertices(), prev_state)
        sample_num = int(
            (num_vertices * (args.sample_size / 100)) / args.sample_iterations)
        sample_num += len(state.sample_idx)
        if not state.neighbors:  # If there are no neighbors, start with the state.start vertex
            state.index_flag[state.start] = True
            state.neighbors = set(graph.get_out_neighbors(state.start))
            for neighbor in graph.get_out_neighbors(state.start):
                if neighbor == state.start:
                    state.neighbors.remove(neighbor)
                else:
                    state.neighbors_flag[neighbor] = True
                    new_neighbors = 0
                    for _neighbor in graph.get_out_neighbors(neighbor):
                        if not (state.index_flag[_neighbor]
                                or state.neighbors_flag[_neighbor]):
                            new_neighbors += 1
                    state.contribution[neighbor] += new_neighbors
        while len(state.index_set) == 0 or len(
                state.index_set) % sample_num != 0:
            if len(state.neighbors
                   ) == 0:  # choose random vertex not in index set
                vertex = np.random.choice(
                    np.setxor1d(np.arange(graph.num_vertices()),
                                state.index_set))
                state.index_set.append(vertex)
                state.index_flag[vertex] = True
                for neighbor in graph.get_out_neighbors(vertex):
                    if not (state.neighbors_flag[neighbor]
                            or state.index_flag[neighbor]):
                        Sample._add_neighbor(neighbor, state.contribution,
                                             state.index_flag,
                                             state.neighbors_flag,
                                             graph.get_out_neighbors(neighbor),
                                             graph.get_in_neighbors(neighbor),
                                             state.neighbors)
                continue
            elif np.max(state.contribution
                        ) == 0:  # choose random neighbors from neighbor set
                num_choices = min(len(state.neighbors),
                                  sample_num - len(state.index_set))
                vertices = np.random.choice(np.fromiter(
                    state.neighbors, int, len(state.neighbors)),
                                            num_choices,
                                            replace=False)
                for vertex in vertices:
                    state.index_set.append(vertex)
                    state.index_flag[vertex] = True
                    state.neighbors.remove(vertex)
                    for neighbor in graph.get_out_neighbors(vertex):
                        if not (state.neighbors_flag[neighbor]
                                or state.index_flag[neighbor]):
                            Sample._add_neighbor(
                                neighbor, state.contribution, state.index_flag,
                                state.neighbors_flag,
                                graph.get_out_neighbors(neighbor),
                                graph.get_in_neighbors(neighbor),
                                state.neighbors)
                continue
            vertex = np.argmax(state.contribution)
            state.index_set.append(vertex)
            state.index_flag[vertex] = True
            state.neighbors.remove(vertex)
            state.contribution[vertex] = 0
            for neighbor in graph.get_in_neighbors(vertex):
                if not (state.neighbors_flag[neighbor]
                        or state.index_flag[neighbor]):
                    Sample._add_neighbor(neighbor, state.contribution,
                                         state.index_flag,
                                         state.neighbors_flag,
                                         graph.get_out_neighbors(neighbor),
                                         graph.get_in_neighbors(neighbor),
                                         state.neighbors)
        state.sample_idx = np.asarray(state.index_set)
        return state
Пример #7
0
    def forest_fire_sample(graph: Graph, num_vertices: int,
                           prev_state: ForestFireSampleState,
                           args: argparse.Namespace) -> SampleState:
        """Forest-fire sampling with forward probability = 0.7. At every stage, select 70% of the neighbors of the
        current sample. Vertices that were not selected are 'blacklisted', and no longer viable for future selection.
        If all vertices are thus 'burnt' before the target number of vertices has been selected, restart sampling from
        a new starting vertex.

        Parameters
        ----------
        graph : Graph
            the filtered graph from which to sample vertices
        num_vertices : int
            number of vertices in the unfiltered graph
        prev_state : UniformRandomSampleState
            the state of the previous sample in the stack. If there is no previous sample, an empty SampleState object
            should be passed in here.
        args : argparse.Namespace
            the command-line arguments provided by the user

        Returns
        -------
        state : SampleState
            the sample state with the sampled vertex ids (Note: these ids correspond to the filtered graph, and have
            to be mapped back to the unfiltered graph)
        """
        state = ForestFireSampleState(graph.num_vertices(), prev_state)
        sample_num = int(
            (num_vertices * (args.sample_size / 100)) / args.sample_iterations)
        sample_num += len(state.sample_idx)
        while len(state.index_set) == 0 or len(
                state.index_set) % sample_num != 0:
            for vertex in state.current_fire_front:
                # add vertex to index set
                if not state.sampled_marker[vertex]:
                    state.sampled_marker[vertex] = True
                    state.burnt_marker[vertex] = True
                    state.num_burnt += 1
                    state.index_set.append(vertex)
                # select edges to burn
                num_to_choose = np.random.geometric(0.7)
                out_neighbors = graph.get_out_neighbors(vertex)
                if len(out_neighbors
                       ) < 1:  # If there are no outgoing neighbors
                    continue
                if len(out_neighbors) <= num_to_choose:
                    num_to_choose = len(out_neighbors)
                mask = np.zeros(len(out_neighbors))
                indexes = np.random.choice(np.arange(len(out_neighbors)),
                                           num_to_choose,
                                           replace=False)
                mask[indexes] = 1
                for index, value in enumerate(mask):
                    neighbor = out_neighbors[index]
                    if value == 1:  # if chosen, add to next frontier
                        if not state.burnt_marker[neighbor]:
                            state.next_fire_front.append(neighbor)
                    state.burnt_marker[
                        neighbor] = True  # mark all neighbors as visited
            if state.num_burnt == graph.num_vertices(
            ):  # all samples are burnt, restart
                state.num_burnt = 0
                state.burnt_marker = [False] * graph.num_vertices()
                state.current_fire_front = [
                    np.random.randint(graph.num_vertices())
                ]
                state.next_fire_front = list()
                continue
            if len(state.next_fire_front) == 0:  # if fire is burnt-out
                state.current_fire_front = [
                    np.random.randint(graph.num_vertices())
                ]
            else:
                state.current_fire_front = list(state.next_fire_front)
                state.next_fire_front = list()
        state.sample_idx = np.asarray(state.index_set[:sample_num])
        return state