Exemplo n.º 1
0
    def gen_graph_from_mongo(self):
        '''
        load graph structure from storage. note that add_edge_list will not
        match vertex ids (str ids) in subsequent calls of the function
        '''

        self.from_storage(find={'projection': {'material_id': 1, 'edges': 1}})
        sources = self.memory['material_id']
        destinations = self.memory['edges']

        self.memory = None  # cleanup memory attribute

        print('loaded data structures')

        edge_list = [(sources[i], destinations[i][j])
                     for i in range(len(sources))
                     for j in range(len(destinations[i]))]

        print('generated edge list')

        sources = None  # cleanup temporary data variables
        destinations = None

        graph = Graph(directed=False)
        graph.add_edge_list(edge_list, hashed=True, string_vals=True)

        return graph
Exemplo n.º 2
0
def test_feasibility(g, weights):
    internal_g = from_gt(g, weights)
    edges = minimum_branching(internal_g, [0])

    tree = Graph(directed=True)
    tree.add_edge_list(edges)
    assert is_arborescence(tree)
Exemplo n.º 3
0
def alignment_graph(lengths=[], pairings=[], alignments=[]):
    #print('making graph')
    g = Graph(directed=False)
    seq_index = g.new_vertex_property("int")
    time = g.new_vertex_property("int")
    #add vertices
    g.add_vertex(sum(lengths))
    seq_index.a = np.concatenate([np.repeat(i,l) for i,l in enumerate(lengths)])
    time.a = np.concatenate([np.arange(l) for l in lengths])
    #add edges (alignments)
    alignment_index = g.new_edge_property("int")
    segment_index = g.new_edge_property("int")
    for i,a in enumerate(alignments):
        if len(a) > 0:
            j, k = pairings[i]
            pairs = np.concatenate(a, axis=0)
            indicesJ = (np.arange(lengths[j]) + sum(lengths[:j]))[pairs.T[0]]
            indicesK = (np.arange(lengths[k]) + sum(lengths[:k]))[pairs.T[1]]
            seg_indices = np.concatenate([np.repeat(i, len(a))
                for i,a in enumerate(a)])
            g.add_edge_list(np.vstack([indicesJ, indicesK,
                np.repeat(i, len(pairs)), seg_indices]).T,
                eprops=[alignment_index, segment_index])
    #g.add_edge_list([(b, a) for (a, b) in g.edges()])
    #print('created alignment graph', g)
    #g = prune_isolated_vertices(g)
    #print('pruned alignment graph', g)
    #g = transitive_closure(g)
    #graph_draw(g, output_size=(1000, 1000), output="results/casey_jones_bars.pdf")
    return g, seq_index, time, alignment_index, segment_index
Exemplo n.º 4
0
def construct_motif_graph(graph_container, motif, vertex_maps=None):
    """Construct and return a undirected gt graph containing
    motif relationship. Note that graph_tool generates empty nodes
    to fill in the missing indices. For example, if we add edge (1,2)
    to an empty graph, the graph will have 3 nodes: 0, 1, 2 and 1 edge (1,2).
    For this reason, the returned `m_graph` usually has a large number of
    disconnected nodes.

    Parameters:
    graph_container - GraphContainer - Store the original network
    motif - Motif - Motif in study

    Returns:
    m_graph - gt.Graph - Undirected graph for motif cooccurence
    """
    if motif.anchors is None:
        print("Warning: Turning motif groups into cliques.")
    graph = graph_container.get_gt_graph()
    graph.set_directed(motif.gt_motif.is_directed())
    # graph_tool.Graph
    m_graph = Graph(directed=False)
    if vertex_maps is None:
        m, c, vertex_maps = count_motif(graph, motif)
    for prop_list in vertex_maps:
        for prop in prop_list:
            edges = [i for i in motif.anchored_edges(graph, prop.get_array())]
            m_graph.add_edge_list(edges)
    return m_graph
Exemplo n.º 5
0
    def gen_sub_graph_from_mongo(self, center, snn=1):
        '''
        load graph structure from storage. note that add_edge_list will not
        match vertex ids (str ids) in subsequent calls of the function

        Args:
            center (str) mp-id of the center of the graph
            snn (int) the number of second nearest neighbors to expand to
        '''

        edge_list = []

        self.from_storage(
            find={
                'filter': {
                    'material_id': center
                },
                'projection': {
                    'material_id': 1,
                    'edges': 1
                }
            })
        sources = self.memory['material_id'][0]
        destinations = self.memory['edges'][0]

        edge_list.extend([(sources, destinations[j])
                          for j in range(len(destinations))])

        for i in range(snn):
            self.from_storage(
                find={
                    'filter': {
                        'material_id': {
                            '$in': destinations
                        }
                    },
                    'projection': {
                        'material_id': 1,
                        'edges': 1
                    }
                })
            sources = self.memory['material_id']
            destinations = self.memory['edges']

            edge_list.extend([(sources[i], destinations[i][j])
                              for i in range(len(sources))
                              for j in range(len(destinations[i]))])

            destinations = [
                destinations[i][j] for i in range(len(sources))
                for j in range(len(destinations[i]))
            ]

        print('generated edge list')

        graph = Graph(directed=False)
        graph.add_edge_list(edge_list, hashed=True, string_vals=True)

        return graph
Exemplo n.º 6
0
def test_feasibility(g, weights):
    edges = [(e[0], e[1], w) for e, w in zip(g.get_edges(), weights)]

    min_edges = find_minimum_branching(g.num_vertices(), edges, roots=[0])

    tree = Graph(directed=True)
    tree.add_edge_list(min_edges)
    assert is_arborescence(tree)
Exemplo n.º 7
0
def graph_from_matrix(matrix, directed=False):
    g = Graph(directed=directed)
    g.add_vertex(len(matrix))
    weights = g.new_ep("float")
    edges = np.nonzero(matrix)
    edges = np.append(edges, [matrix[edges]], axis=0)
    g.add_edge_list(list(zip(*edges)), eprops=[weights])
    #graph_draw(g, output_size=(1000, 1000), output="results/structure.pdf")
    return g, weights
Exemplo n.º 8
0
def test_graphtool():
    g = Graph(directed=True)
    g.add_vertex(4)
    g.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 0)])
    weight = g.new_edge_property('float')
    weight[g.edge(0, 1)] = 1
    weight[g.edge(1, 2)] = 2
    weight[g.edge(2, 3)] = 3
    weight[g.edge(3, 0)] = 4
    
    assert set(gt2edges_and_weights(g, weight)) == {
        (0, 1, 1), (1, 2, 2), (2, 3, 3), (3, 0, 4)
    }
Exemplo n.º 9
0
class ZonedNetwork:
    def __init__(self,
                 size: Tuple[int] = (10, 10),
                 field_size: Tuple[int] = (100, 100)):
        self.g = Graph(directed=True)
        self.n_zones = size[0] * size[1]
        self.fwidth = field_size[0]
        self.fheight = field_size[1]
        self.n_rows = size[0]
        self.n_cols = size[1]
        self.row_size: float = self.fheight / self.n_rows
        self.col_size: float = self.fwidth / self.n_cols
        self.g.add_vertex(self.n_zones)

    def get_zone(self, coords: Tuple):
        r = int(coords[1] / self.row_size)
        c = int(coords[0] / self.col_size)
        r = min(self.n_rows - 1, r)
        c = min(self.n_cols - 1, c)
        return self.g.vertex(r * self.n_cols + c)

    def add_passes(self, coords_pairs: List[Tuple]):
        pairs = [(self.get_zone((x1, y1)), self.get_zone((x2, y2)))
                 for x1, y1, x2, y2 in coords_pairs]
        return self.g.add_edge_list(pairs)

    def save(self, file: str):
        self.g.save(file, fmt='graphml')
Exemplo n.º 10
0
def clean_up(g, seg_index):
    #plot_matrix(np.triu(adjacency_matrix(g)), "results/clean0.png")
    #graph_draw(g, output_size=(1000, 1000), output="results/clean_up0.pdf")
    
    seg_combos = get_segment_combos(g, seg_index)
    best = sorted(seg_combos.items(), key=lambda c: c[1], reverse=True)#[:200]
    #print(best)
    best = best[0][0]
    #print(best)
    
    #print(edges[:100])
    reduced = Graph(directed=False)
    reduced.add_vertex(len(g.get_vertices()))
    edges = g.get_edges([seg_index])
    edges = edges[np.where(np.isin(edges[:,2], best))]
    reduced.add_edge_list(edges)
    #print(reduced)
    #plot_matrix(np.triu(adjacency_matrix(reduced)), "results/cleani2.png")
    #graph_draw(reduced, output_size=(1000, 1000), output="results/clean_up1.pdf")
    return reduced
Exemplo n.º 11
0
def build_graph(m_codes, m_list):
    n_models, n_attributes = m_codes.shape

    g = Graph()

    v_map = {}
    names = g.new_vertex_property("object")

    v_atts = g.add_vertex(n_attributes)
    v_mods = g.add_vertex(n_models)
    v_imps = g.add_vertex(n_attributes)

    for v_idx, v in enumerate(v_atts):
        v_n = v_name(v_idx, kind="data")
        v_map[v_n] = int(v)
        names[v] = v_n

    for v_idx, v in enumerate(v_mods):
        v_n = v_name(v_idx, kind="model")
        v_map[v_n] = int(v)
        names[v] = v_n

        in_edges = ((d, v) for d in m_list[v_idx].desc_ids)
        out_edges = ((v, t) for t in m_list[v_idx].targ_ids)

        g.add_edge_list(in_edges)
        g.add_edge_list(out_edges)

    for v_idx, v in enumerate(v_imps):
        v_n = v_name(v_idx, kind="imputation")
        v_map[v_n] = int(v)
        names[v] = v_n

    g.vp.names = names
    g.v_map = v_map
    return g
Exemplo n.º 12
0
 def get_pagerank_values(self):
     start = time.time()
     logger.info('Started call to get_pagerank')
     g = Graph()
     vp = g.add_edge_list(self.__v.get_graph_edges(),
                          hashed=True,
                          hash_type='int')
     logger.info('Delta time to build graph: {}s'.format(
         timedelta(seconds=(time.time() - start))))
     start = time.time()
     ranks = pagerank(g)
     logger.info('Delta time to compute pagerank: {}s'.format(
         timedelta(seconds=(time.time() - start))))
     for vertex in g.vertices():
         qid = vp[vertex]
         r = ranks[vertex]
         yield qid, r
Exemplo n.º 13
0
        dict_map = pickle.load(handle)

    print (dict_map)

    print ("Carregando arquivo...")

    g = Graph(directed=False)

    edgelist = []

    with open(args.edge_list) as f:
        for line in f:
            if(line):
                edgelist.append(map(int,line.split()))

    labels_vertices = g.add_edge_list(edgelist,hashed=True)

    labels_vertices_str = g.new_vertex_property("string")
    for v in g.vertices():
        labels_vertices_str[v] = str(labels_vertices[v])

    labels_vertices_inv = mapeiaLabels(g,labels_vertices)

    pos = sfdp_layout(g)
    colors = trataCores(dict_map)
    pos_new = trataPosicoes(g,pos,dict_map,labels_vertices_inv)

    color = g.new_vertex_property("string")

    for v in g.vertices():
        index = g.vertex_index[v]
Exemplo n.º 14
0
class GeneralGraph():
    """
    General wrapper for graph-tool or networkx graphs to add edges and nodes
    according to constraints
    """
    def __init__(self, directed=True, verbose=1):
        self.graphtool = GRAPH_TOOL
        # Initialize graph
        if self.graphtool:
            self.graph = Graph(directed=directed)
            self.weight = self.graph.new_edge_property("float")
        else:
            if directed:
                print("directed graph")
                self.graph = nx.DiGraph()
            else:
                self.graph = nx.Graph()
        # set metaparameter
        self.time_logs = {}
        self.verbose = verbose

    def set_edge_costs(self,
                       layer_classes=["resistance"],
                       class_weights=[1],
                       **kwargs):
        """
        Initialize edge cost variables
        :param classes: list of cost categories
        :param weights: list of weights for cost categories - must be of same 
                        shape as classes (if None, then equal weighting)
        """
        class_weights = np.array(class_weights)
        # set different costs:
        self.cost_classes = layer_classes
        if self.graphtool:
            self.cost_props = [
                self.graph.new_edge_property("float")
                for _ in range(len(layer_classes))
            ]
        self.cost_weights = class_weights / np.sum(class_weights)
        if self.verbose:
            print(self.cost_classes, self.cost_weights)
        # save weighted instance for plotting
        self.instance = np.sum(
            np.moveaxis(self.cost_instance, 0, -1) * self.cost_weights,
            axis=2) * self.hard_constraints

    def set_shift(self,
                  start,
                  dest,
                  pylon_dist_min=3,
                  pylon_dist_max=5,
                  max_angle=np.pi / 2,
                  **kwargs):
        """
        Initialize shift variable by getting the donut values
        :param lower, upper: min and max distance of pylons
        :param vec: vector of diretion of edges
        :param max_angle: Maximum angle of edges to vec
        """
        vec = dest - start
        if self.verbose:
            print("SHIFT:", pylon_dist_min, pylon_dist_max, vec, max_angle)
        self.shifts = get_half_donut(pylon_dist_min,
                                     pylon_dist_max,
                                     vec,
                                     angle_max=max_angle)
        self.shift_tuples = self.shifts

    def set_corridor(self,
                     dist_surface,
                     start_inds,
                     dest_inds,
                     sample_func="mean",
                     sample_method="simple",
                     factor_or_n_edges=1):
        # set new corridor
        corridor = (dist_surface > 0).astype(int)

        self.factor = factor_or_n_edges
        self.cost_rest = self.cost_instance * (self.hard_constraints >
                                               0).astype(int) * corridor
        # downsample
        tic = time.time()
        if self.factor > 1:
            self.cost_rest = CostUtils.downsample(self.cost_rest,
                                                  self.factor,
                                                  mode=sample_method,
                                                  func=sample_func)

        self.time_logs["downsample"] = round(time.time() - tic, 3)

        # repeat because edge artifacts
        self.cost_rest = self.cost_rest * (self.hard_constraints >
                                           0).astype(int) * corridor

        # add start and end TODO ugly
        self.cost_rest[:, dest_inds[0],
                       dest_inds[1]] = self.cost_instance[:, dest_inds[0],
                                                          dest_inds[1]]
        self.cost_rest[:, start_inds[0],
                       start_inds[1]] = self.cost_instance[:, start_inds[0],
                                                           start_inds[1]]

    def add_nodes(self, nodes):
        """
        Add vertices to the graph
        param nodes: list of node names if networkx, integer if graphtool
        """
        tic = time.time()
        # add nodes to graph
        if self.graphtool:
            _ = self.graph.add_vertex(nodes)
            self.n_nodes = len(list(self.graph.vertices()))
        else:
            self.graph.add_nodes_from(np.arange(nodes))
            self.n_nodes = len(self.graph.nodes())
        # verbose
        if self.verbose:
            print("Added nodes:", nodes, "in time:", time.time() - tic)
        self.time_logs["add_nodes"] = round(time.time() - tic, 3)

    def add_edges(self):
        tic_function = time.time()

        n_edges = 0
        # kernels, posneg = ConstraintUtils.get_kernel(self.shifts,
        # self.shift_vals)
        # edge_array = []

        times_edge_list = []
        times_add_edges = []

        if self.verbose:
            print("n_neighbors:", len(self.shift_tuples))

        for i in range(len(self.shift_tuples)):

            tic_edges = time.time()

            # set cost rest if necessary (random graph)
            self.set_cost_rest()

            # compute shift and weights
            out = self._compute_edges(self.shift_tuples[i])

            # Error if -1 entries because graph-tool crashes with -1 nodes
            if np.any(out[:, :2].flatten() < 0):
                print(np.where(out[:, :2] < 0))
                raise RuntimeError

            n_edges += len(out)
            times_edge_list.append(round(time.time() - tic_edges, 3))

            # add edges to graph
            tic_graph = time.time()
            if self.graphtool:
                self.graph.add_edge_list(out, eprops=self.cost_props)
            else:
                nx_edge_list = [(e[0], e[1], {
                    "weight": np.sum(e[2:] * self.cost_weights)
                }) for e in out]
                self.graph.add_edges_from(nx_edge_list)
            times_add_edges.append(round(time.time() - tic_graph, 3))

            # alternative: collect edges here and add alltogether
            # edge_array.append(out)

        # # alternative: add edges all in one go
        # tic_concat = time.time()
        # edge_lists_concat = np.concatenate(edge_array, axis=0)
        # self.time_logs["concatenate"] = round(time.time() - tic_concat, 3)
        # print("time for concatenate:", self.time_logs["concatenate"])
        # tic_graph = time.time()
        # self.graph.add_edge_list(edge_lists_concat, eprops=[self.weight])
        # self.time_logs["add_edges"] = round(
        #     (time.time() - tic_graph) / len(shifts), 3
        # )
        self.n_edges = len(list(self.graph.edges()))
        self._update_time_logs(times_add_edges, times_edge_list, tic_function)
        if self.verbose:
            print("DONE adding", n_edges, "edges:", time.time() - tic_function)

    def _update_time_logs(self, times_add_edges, times_edge_list,
                          tic_function):
        self.time_logs["add_edges"] = round(np.mean(times_add_edges), 3)
        self.time_logs["add_edges_times"] = times_add_edges

        self.time_logs["edge_list"] = round(np.mean(times_edge_list), 3)
        self.time_logs["edge_list_times"] = times_edge_list

        self.time_logs["add_all_edges"] = round(time.time() - tic_function, 3)

        if self.verbose:
            print("Done adding edges:", len(list(self.graph.edges())))

    def sum_costs(self):
        """
        Additive weighting of costs
        Take the individual edge costs, compute weighted sum --> self.weight
        """
        # add sum of all costs
        if not self.graphtool:
            return
        tic = time.time()
        summed_costs_arr = np.zeros(self.cost_props[0].get_array().shape)
        for i in range(len(self.cost_props)):
            prop = self.cost_props[i].get_array()
            summed_costs_arr += prop * self.cost_weights[i]
        self.weight.a = summed_costs_arr

        self.time_logs["sum_of_costs"] = round(time.time() - tic, 3)

    def remove_vertices(self, dist_surface, delete_padding=0):
        """
        Remove edges in a certain corridor (or all) to replace them by
        a refined surface

        @param dist_surface: a surface where each pixel value corresponds to 
        the distance of the pixel to the shortest path
        @param delete_padding: define padding in which part of the corridor to 
        delete vertices (cannot delete all because then graph unconnected)
        """
        tic = time.time()
        self.graph.clear_edges()
        self.graph.shrink_to_fit()
        self.time_logs["remove_edges"] = round(time.time() - tic, 3)

    def get_pareto(self,
                   vary,
                   source,
                   dest,
                   out_path=None,
                   compare=[0, 1],
                   plot=1):
        """
        Arguments:
            vary: how many weights to explore
                    e.g 3 --> each cost class can have weight 0, 0.5 or 1
            source, dest: as always the source and destination vertex
            out_path: where to save the pareto figure(s)
            compare: indices of cost classes to compare
        Returns:
            paths: All found paths
            pareto: The costs for each combination of weights
        """
        tic = time.time()
        # initialize lists
        pareto = list()
        paths = list()
        cost_sum = list()
        # get the edge costs
        cost_arrs = [cost.get_array() for cost in self.cost_props]
        # [self.cost_props[comp].get_array() for comp in compare]

        # get vary weights between 0 and 1
        var_weights = np.around(np.linspace(0, 1, vary), 2)

        # construct weights array
        if len(compare) == 2:
            weights = [[v, 1 - v] for v in var_weights]
        elif len(compare) == 3:
            weights = list()
            for w0 in var_weights:
                for w1 in var_weights[var_weights <= 1 - w0]:
                    weights.append([w0, w1, 1 - w0 - w1])
        else:
            raise ValueError("argument compare can only have length 2 or 3")

        # w_avail: keep weights of non-compare classes, get leftover amount
        w_avail = np.sum(np.asarray(self.cost_weights)[compare])
        # compute paths for each combination of weights
        for j in range(len(weights)):
            # option 2: np.zeros(len(cost_arrs)) + non_compare_weight
            w = self.cost_weights.copy()
            # replace the ones we want to compare
            w[compare] = np.array(weights[j]) * w_avail

            # weighted sum of edge costs
            self.weight.a = np.sum(
                [cost_arrs[i] * w[i] for i in range(len(cost_arrs))], axis=0)
            # get shortest path
            path, path_costs, _ = self.get_shortest_path(source, dest)
            # don't take cost_sum bc this is sum of original weighting
            pareto.append(np.sum(path_costs, axis=0)[compare])
            paths.append(path)
            # take overall sum of costs (unweighted) that this w leads to
            cost_sum.append(np.sum(path_costs))

        # print best weighting
        best_weight = np.argmin(cost_sum)
        w = self.cost_weights.copy()
        w[compare] = np.array(weights[best_weight]) * w_avail
        print("Best weights:", w, "with (unweighted) costs:", np.min(cost_sum))

        self.time_logs["pareto"] = round(time.time() - tic, 3)

        pareto = np.array(pareto)
        classes = [self.cost_classes[comp] for comp in compare]
        # Plotting
        if plot:
            if len(compare) == 2:
                plot_pareto_scatter_2d(pareto,
                                       weights,
                                       classes,
                                       cost_sum=cost_sum,
                                       out_path=out_path)
            elif len(compare) == 3:
                # plot_pareto_3d(pareto, weights, classes)
                plot_pareto_scatter_3d(pareto,
                                       weights,
                                       classes,
                                       cost_sum=cost_sum,
                                       out_path=out_path)
        return paths, weights, cost_sum

    def get_shortest_path(self, source, target):
        """
        Compute shortest path from source vertex to target vertex
        """
        tic = (time.time())
        # #if source and target are given as indices:
        if self.graphtool:
            vertices_path, _ = shortest_path(self.graph,
                                             source,
                                             target,
                                             weights=self.weight,
                                             negative_weights=True)
        else:
            try:
                vertices_path = nx.dijkstra_path(self.graph, source, target)
            except nx.exception.NetworkXNoPath:
                return []

        self.time_logs["shortest_path"] = round(time.time() - tic, 3)
        return vertices_path

    def save_graph(self, OUT_PATH):
        """
        Save the graph in OUT_PATH
        """
        if self.graphtool:
            for i, cost_class in enumerate(self.cost_classes):
                self.graph.edge_properties[cost_class] = self.cost_props[i]
            self.graph.edge_properties["weight"] = self.weight
            self.graph.save(OUT_PATH + ".xml.gz")
        else:
            nx.write_weighted_edgelist(self.graph,
                                       OUT_PATH + '.weighted.edgelist')

    def load_graph(self, IN_PATH):
        """
        Retrieve graph from IN_PATH
        """
        if self.graphtool:
            self.g_prev = load_graph(IN_PATH + ".xml.gz")
            self.weight_prev = self.g_prev.ep.weight
            # weight = G2.ep.weight[G2.edge(66, 69)]
        else:
            self.g_prev = nx.read_edgelist(IN_PATH + '.weighted.edgelist',
                                           nodetype=int,
                                           data=(('weight', float), ))

    # -----------------------------------------------------------------------
    # INTERFACE

    def single_sp(self, **kwargs):
        """
        Function for full processing until shortest path
        """
        self.start_inds = kwargs["start_inds"]
        self.dest_inds = kwargs["dest_inds"]
        self.set_shift(self.start_inds, self.dest_inds, **kwargs)
        # self.set_corridor(
        #     np.ones(self.hard_constraints.shape) * 0.5,
        #     self.start_inds,
        #     self.dest_inds,
        #     factor_or_n_edges=1
        # )
        if self.verbose:
            print("1) Initialize shifts and instance (corridor)")
        self.set_edge_costs(**kwargs)
        # add vertices
        self.add_nodes()
        if self.verbose:
            print("2) Initialize distances to inf and predecessors")
        self.add_edges()
        if self.verbose:
            print("3) Compute source shortest path tree")
            print("number of vertices and edges:", self.n_nodes, self.n_edges)

        # weighted sum of all costs
        self.sum_costs()
        source_v, target_v = self.add_start_and_dest(self.start_inds,
                                                     self.dest_inds)
        # get actual best path
        path, path_costs, cost_sum = self.get_shortest_path(source_v, target_v)
        if self.verbose:
            print("4) shortest path", cost_sum)
        return path, path_costs, cost_sum
Exemplo n.º 15
0
def phylomemetic_graph(steps,
                       communities,
                       min_size=3,
                       max_size=50,
                       parent_limit=2,
                       workers='auto',
                       chunksize='auto',
                       method='fast',
                       min_backwards_containment=0,
                       min_forward_containment=0):
    '''phylomemetic_graph

    Parameters
    ----------
        steps : :obj:`iter` of :obj:`int`
        communities : :obj:`iter` of :obj:`iter` of :obj:`int`
        min_size : :obj:`int`
        max_size : :obj:`int`
        parent_limit : :obj:`int`
        workers : :obj:`int`
        chunksize : :obj:`int`
        method : :obj:`str`
        min_backwards_containment : :obj:`float`
        min_forward_containment : :obj:`float`

    Returns
    -------
        g : :obj:`graph_too.Graph`
        group_link_strength : :obj:`graph_tool.EdgePropertyMap`
        single_link_strength : :obj:`graph_tool.EdgePropertyMap`
        vertex_steps : :obj:`graph_tool.VertexPropertyMap`
        element_vertex_map : :obj:`dict`
    '''
    if workers == 'auto':
        workers = cpu_count() - 1

    communities_filt = []
    communities_lengths = []
    element_community_mappings = []
    for sequences in communities:
        s_filt = list(filter_by_size(sequences, min_size, max_size))
        communities_filt.append(s_filt)
        communities_lengths.append(len(s_filt))
        element_community_mappings.append(reverse_index(s_filt))

    community_vertex_maps = []
    communities_offsets = []
    cumsum_lengths = np.cumsum(communities_lengths)

    for length, count in zip(communities_lengths, cumsum_lengths):
        start = count - length
        end = count
        communities_offsets.append((start, end))
        community_vertex_maps.append(
            {c: v
             for c, v in zip(range(length), range(start, end))})

    n_communities = np.sum(communities_lengths)

    phylomemetic_links = []

    for i, (cps, cfs) in enumerate(window(communities_filt, 2)):
        n_cf = len(cfs)
        logger.info(f'Processing {i+1} of {len(communities)-1} periods')
        if chunksize == 'auto':
            chunksize_i = int(np.ceil((1 / workers) * n_cf))
        else:
            chunksize_i = chunksize

        with Pool(workers) as pool:
            phylomemetic_links.append(
                pool.map(
                    find_links,
                    zip(
                        cfs,
                        range(0, len(cfs)),
                        repeat(cps, n_cf),
                        repeat(communities_offsets[i], n_cf),
                        repeat(element_community_mappings[i], n_cf),
                        repeat(parent_limit, n_cf),
                    ),
                    chunksize=chunksize_i,
                ))
            pool.close()
            pool.join()

    g = Graph(directed=True)
    g.add_vertex(n_communities)

    group_link_strength = g.new_edge_property('float')
    single_link_strength = g.new_edge_property('float')

    phylomemetic_links = flatten(flatten(phylomemetic_links))
    g.add_edge_list(phylomemetic_links,
                    eprops=[group_link_strength, single_link_strength])

    element_vertex_map = reverse_index_communities(flatten(communities_filt))

    vertex_steps = g.new_vertex_property('int')
    for (start, end), step in zip(communities_offsets, steps):
        vertex_steps.a[start:end] = step

    return (g, group_link_strength, single_link_strength, vertex_steps,
            element_vertex_map)
Exemplo n.º 16
0
class BiblioNetwork():
    "Bibliography network displayer"

    def __init__(self, filepath):
        self.filepath = filepath
        self.db = None
        self._auth_betw = None
        self._auth_betw_computed_from = 0
        self.layout_pos = None
        self.graph = None
        self.author_list = []

    @staticmethod
    def _split_authors(row):
        "Split authors of the row"
        auth = row['Authors'].split(", ")
        auth = [", ".join(auth[2*i:2*i+2])
                for i in range(int(len(auth)/2))]
        return auth

    def parse(self, nmb_to_import=None, delimiter=","):
        "Parse the database csv file"
        # import database
        self.db = pd.read_csv(self.filepath, delimiter, index_col=False,
                              nrows=nmb_to_import, encoding="ISO8859",
                              error_bad_lines=False, warn_bad_lines=True)
        self.db.reset_index()
        # separate authors
        self.db['Authors'] = self.db.apply(self._split_authors, axis=1)
        # Replace missing values
        self.db['Cited by'].fillna(0, inplace=True)
        # Updat author list
        self.update_author_list()

    def clean(self, min_citations=10):
        "Remove some entries"
        len_bef = len(self.db)
        self.db.drop(self.db[self.db["Cited by"] < min_citations].index,
                     inplace=True)
        len_after = len(self.db)
        print("    Removed {} articles, {} remaining".format(len_bef-len_after,
                                                             len_after))
        self.update_author_list()
        self._auth_betw = None

    def remove_anterior(self, year):
        "Remove some entries"
        len_bef = len(self.db)
        self.db.drop(self.db[self.db["Year"] <= year].index,
                     inplace=True)
        len_after = len(self.db)
        print("    Removed {} articles, {} remaining".format(len_bef-len_after,
                                                             len_after))
        self.update_author_list()
        self._auth_betw = None

    def remove_posterior(self, year):
        "Remove some entries"
        len_bef = len(self.db)
        self.db.drop(self.db[self.db["Year"] > year].index,
                     inplace=True)
        len_after = len(self.db)
        print("    Removed {} articles, {} remaining".format(len_bef-len_after,
                                                             len_after))
        self.update_author_list()
        self._auth_betw = None

    def update_author_list(self):
        "Update author list from database"
        auths = list(set(np.concatenate(self.db['Authors'].values)))
        self.author_list = np.sort(auths)

    @property
    def author_betweeness(self):
        "Compute authors betweness"
        # If already computed, just return it
        if self._auth_betw is not None and \
                self._auth_betw_computed_from == len(self.db):
            return self._auth_betw
        # else compute it
        self._auth_betw_computed_from = len(self.db)
        auth_betw = {auth: {}
                     for auth in self.author_list}
        for auths in self.db['Authors']:
            # skip if only one author
            if len(auths) == 1:
                continue
            # Loop on authors couples
            for i1, auth1 in enumerate(auths):
                for auth2 in auths[i1+1::]:
                    keys = auth_betw.keys()
                    # create couple if necessary, or increment
                    if auth2 not in auth_betw[auth1].keys():
                        auth_betw[auth1][auth2] = 1
                    else:
                        auth_betw[auth1][auth2] += 1
                    if auth1 not in auth_betw[auth2].keys():
                        auth_betw[auth2][auth1] = 1
                    else:
                        auth_betw[auth2][auth1] += 1
        self._auth_betw = auth_betw
        return self._auth_betw

    @author_betweeness.setter
    def author_betweeness(self, val):
        raise Exception("You cannot change that")

    def get_total_citation(self):
        """ Return total number of citations for each author"""
        nmbcits = {}
        for _, art in self.db.iterrows():
            auths = art['Authors']
            nmbcit = int(art['Cited by'])
            for auth in auths:
                if auth in nmbcits.keys():
                    nmbcits[auth] += nmbcit
                else:
                    nmbcits[auth] = nmbcit
        return nmbcits

    def get_auth_nmb_of_art(self):
        """ Return number of article for each author"""
        nmbart = {}
        for _, art in self.db.iterrows():
            auths = art['Authors']
            for auth in auths:
                if auth in nmbart.keys():
                    nmbart[auth] += 1
                else:
                    nmbart[auth] = 1
        return nmbart

    def _get_author_publication(self):
        auth2pub = {}
        for _, art in self.db.iterrows():
            for auth in art['Authors']:
                if auth in auth2pub.keys():
                    auth2pub[auth] += [art.name]
                else:
                    auth2pub[auth] = [art.name]
        return auth2pub

    def write_author_list(self, filepath):
        with open(filepath, "w") as f:
            data = ['{}: {}\n'.format(i, auth)
                    for i, auth in enumerate(self.author_list)]
            f.writelines(data)

    def make_article_graph(self, layout="arf"):
        """Make an article graph"""
        self.graph = Graph(directed=False)
        # add vertex
        self.graph.add_vertex(len(self.db))
        # add properties
        cb = self.graph.new_vertex_property("int", self.db['Cited by'].values)
        self.graph.vertex_properties['nmb_citation'] = cb
        # Add links
        auths = list(self.author_betweeness.keys())
        auth2ind = {auths[i]: i
                    for i in range(len(auths))}
        auth2pub = self._get_author_publication()
        for _, pubs in auth2pub.items():
            if len(pubs) < 2:
                continue
            combis = itertools.combinations(pubs, 2)
            self.graph.add_edge_list(list(combis))
        # layout
        if layout == "arf":
            self.layout_pos = arf_layout(self.graph)
        elif layout == "sfpd":
            self.layout_pos = sfdp_layout(self.graph)
        elif layout == "fr":
            self.layout_pos = fruchterman_reingold_layout(self.graph)
        elif layout == "radial":
            self.layout_pos = radial_tree_layout(self.graph,
                                                 auth2ind['Logan, B.E.'])
        else:
            raise ValueError()

    def make_author_graph(self, layout="arf"):
        """Make an author graph"""
        self.graph = Graph(directed=False)
        # add vertex
        auths = self.author_list
        self.graph.add_vertex(len(auths))
        # add links
        auth2ind = {auths[i]: i
                    for i in range(len(auths))}
        abet = []
        authbet = copy.deepcopy(self.author_betweeness)
        for auth in auths:
            for col, weight in authbet[auth].items():
                if col == auth:
                    continue
                self.graph.add_edge(auth2ind[auth], auth2ind[col])
                del authbet[col][auth]  # ensure that edges are not doubled
                abet.append(weight)
        # add properties
        cb = self.graph.new_edge_property("int", abet)
        self.graph.edge_properties['weight'] = cb
        # layout
        if layout == "arf":
            self.layout_pos = arf_layout(self.graph,
                                         weight=self.graph.ep.weight,
                                         pos=self.layout_pos,
                                         max_iter=10000)
        elif layout == "sfpd":
            self.layout_pos = sfdp_layout(self.graph,
                                          eweight=self.graph.ep.weight,
                                          pos=self.layout_pos)
        elif layout == "fr":
            self.layout_pos = fruchterman_reingold_layout(self.graph,
                                                          weight=self.graph.ep.weight,
                                                          circular=True,
                                                          pos=self.layout_pos)
        elif layout == "radial":
            nc = self.get_total_citation()
            main_auth_ind = np.argmax(list(nc.values()))
            main_auth = list(nc.keys())[main_auth_ind]
            self.layout_pos = radial_tree_layout(self.graph,
                                                 auth2ind[main_auth])
        elif layout == "planar":
            self.layout_pos = planar_layout(self.graph)

        else:
            raise ValueError()

    def display_article_graph(self, out="graph.pdf", min_size=1,
                              max_size=10, indice=False):
        """Display an article graph

        One point per article.
        Size and color corespond to the number of citation.
        """
        cb = np.log(np.array(self.graph.vp.nmb_citation.a)+2)
        ms = cb/max(cb)*(max_size - min_size) + min_size
        ms = self.graph.new_vertex_property('float', ms)
        graph_draw(self.graph, pos=self.layout_pos, output=out,
                   vertex_size=ms,
                   vertex_fill_color=self.graph.vp.nmb_citation,
                   vcmap=plt.cm.viridis)

    def display_author_graph(self, out="graph.pdf", min_size=1, max_size=10,
                             indice=False):
        """Display an author graph """
        auths = self.author_list
        nc = self.get_total_citation()
        nc = [int(nc[auth]) for auth in auths]
        na = self.get_auth_nmb_of_art()
        na = [int(na[auth]) for auth in auths]
        # normalize citation number
        nc = np.array(nc, dtype=float)
        nc /= np.max(nc)
        nc *= (max_size - min_size)
        nc += min_size
        # normalize edge width
        weight = np.array(self.graph.ep.weight.a, dtype=float)
        weight /= np.max(weight)
        weight *= (1 - 0.1)
        weight += 0.1
        # Get vertex display order
        vorder = np.argsort(nc)
        # Get index
        if indice:
            text = range(len(vorder))
            textg = self.graph.new_vertex_property('string', text)
        else:
            textg = None
        # plot
        ncg = self.graph.new_vertex_property('float', nc)
        nag = self.graph.new_vertex_property('int', na)
        vorderg = self.graph.new_vertex_property('int', vorder)
        weightg = self.graph.new_edge_property('float', weight)
        self.graph.vp['nmb_citation'] = ncg
        graph_draw(self.graph, pos=self.layout_pos, output=out,
                   vertex_fill_color=nag, vertex_size=ncg,
                   edge_pen_width=weightg, vertex_text=textg,
                   vorder=vorderg,
                   vertex_text_position=0,
                   vcmap=plt.cm.PuBu)
Exemplo n.º 17
0
def test_find_minimum_branching(g, weights):
    edges = find_minimum_branching(g, weights=weights)
    
    tree = Graph(directed=True)
    tree.add_edge_list(edges)
    assert is_arborescence(tree)