Exemplo n.º 1
0
 def plan(cls, pipeline):
     try:
         graph = Pipeline.graph(pipeline)
         topological_sort(graph)
         return graph
     except (nx.NetworkXUnfeasible, ):
         raise ImpossiblePipelineException()
Exemplo n.º 2
0
    def inference_and_backprop(self,
                               input_dict: Dict[str, np.ndarray],
                               y: str = 'loss') -> Dict[str, np.ndarray]:
        self.network.train_mode = True

        for event in self.events:
            event.before_executor(input_dict)

        if not self.network.built_graph:
            # Choose visitor according to settings
            if self.use_python_ops:
                from .reference_build_graph_visitor_impl import ReferenceBuildGraphVisitor
            else:
                from .reference_build_graph_visitor_impl_cpp import ReferenceBuildGraphVisitor
                print('Compiling operators, this may take a while')

            # Build graph
            self.network.variables.update(input_dict)
            self.model.accept(ReferenceBuildGraphVisitor(), self.network)
            self.network.nodes_sorted_fwd = list(
                topological_sort(self.network.graph))
            self.network.nodes_sorted_bwd = list(
                reversed(list(topological_sort(self.network.graph))))
            self.network.built_graph = True

        out_dict = self.network.inference_and_backprop(input_dict, y)

        for event in self.events:
            event.after_backprop(out_dict)

        return out_dict
Exemplo n.º 3
0
 def of(cls, graph):
     # first determine rank by incoming sequence edges, ignoring near matching
     variant_graph_ranking = VariantGraphRanking()
     topological_sorted_vertices = topological_sort(graph.graph)
     for v in topological_sorted_vertices:
         rank = -1
         for (source, _) in graph.in_edges(v):
             rank = max(rank, variant_graph_ranking.byVertex[source])
         rank += 1
         variant_graph_ranking.byVertex[v] = rank
         variant_graph_ranking.byRank.setdefault(rank, []).append(v)
     # reverse_topological_sorted_vertices = topological_sort(graph.graph, reverse=True)
     reverse_topological_sorted_vertices = reversed(list(topological_sort(graph.graph)))
     for v in reverse_topological_sorted_vertices:
         incoming_edges = graph.in_near_edges(v, data=True)
         if incoming_edges:
             for (u, v, edgedata) in incoming_edges:
                 # u is at new rank; v is being moved to that same rank
                 u_rank = variant_graph_ranking.byVertex[u]
                 old_v_rank = variant_graph_ranking.byVertex[v]
                 # byVertex: change rank of v
                 variant_graph_ranking.byVertex[v] = u_rank
                 # byRank 1: remove v from old rank
                 variant_graph_ranking.byRank[old_v_rank].remove(v)
                 # byRank 2: add v to new rank (u_rank)
                 variant_graph_ranking.byRank[u_rank].append(v)
     return variant_graph_ranking
Exemplo n.º 4
0
 def of(cls, graph):
     # first determine rank by incoming sequence edges, ignoring near matching
     variant_graph_ranking = VariantGraphRanking()
     topological_sorted_vertices = topological_sort(graph.graph)
     for v in topological_sorted_vertices:
         rank = -1
         for (source, _) in graph.in_edges(v):
             rank = max(rank, variant_graph_ranking.byVertex[source])
         rank += 1
         variant_graph_ranking.byVertex[v] = rank
         variant_graph_ranking.byRank.setdefault(rank, []).append(v)
     # reverse_topological_sorted_vertices = topological_sort(graph.graph, reverse=True)
     reverse_topological_sorted_vertices = reversed(
         list(topological_sort(graph.graph)))
     for v in reverse_topological_sorted_vertices:
         incoming_edges = graph.in_near_edges(v, data=True)
         if incoming_edges:
             for (u, v, edgedata) in incoming_edges:
                 # u is at new rank; v is being moved to that same rank
                 u_rank = variant_graph_ranking.byVertex[u]
                 old_v_rank = variant_graph_ranking.byVertex[v]
                 # byVertex: change rank of v
                 variant_graph_ranking.byVertex[v] = u_rank
                 # byRank 1: remove v from old rank
                 variant_graph_ranking.byRank[old_v_rank].remove(v)
                 # byRank 2: add v to new rank (u_rank)
                 variant_graph_ranking.byRank[u_rank].append(v)
     return variant_graph_ranking
Exemplo n.º 5
0
    def compute_dependent_cohorts(self, objects, deletion):
        model_map = defaultdict(list)
        n = len(objects)
        r = range(n)
        indexed_objects = zip(r, objects)

        mG = self.model_dependency_graph[deletion]

        oG = DiGraph()

        for i in r:
            oG.add_node(i)

        for v0, v1 in mG.edges():
            try:
                for i0 in range(n):
                   for i1 in range(n):
                       if i0 != i1:
                            if not deletion and self.concrete_path_exists(
                                    objects[i0], objects[i1]):
                                oG.add_edge(i0, i1)
                            elif deletion and self.concrete_path_exists(objects[i1], objects[i0]):
                                oG.add_edge(i0, i1)
            except KeyError:
                pass

        components = weakly_connected_component_subgraphs(oG)
        cohort_indexes = [reversed(topological_sort(g)) for g in components]
        cohorts = [[objects[i] for i in cohort_index]
                   for cohort_index in cohort_indexes]

        return cohorts
Exemplo n.º 6
0
    def get_sorted_topology(self):
        """
        Gets the sorted topology of the DAG.

        :return: Sorted topology of the DAG.
        """
        return list(topological_sort(self.g))
Exemplo n.º 7
0
    def compute_dependent_cohorts(self, objects, deletion):
        model_map = defaultdict(list)
        n = len(objects)
        r = range(n)
        indexed_objects = zip(r, objects)

        mG = self.model_dependency_graph[deletion]

        oG = DiGraph()

        for i in r:
            oG.add_node(i)

        for v0, v1 in mG.edges():
            try:
                for i0 in range(n):
                    for i1 in range(n):
                        if i0 != i1:
                            if not deletion and self.concrete_path_exists(
                                    objects[i0], objects[i1]):
                                oG.add_edge(i0, i1)
                            elif deletion and self.concrete_path_exists(
                                    objects[i1], objects[i0]):
                                oG.add_edge(i0, i1)
            except KeyError:
                pass

        components = weakly_connected_component_subgraphs(oG)
        cohort_indexes = [reversed(topological_sort(g)) for g in components]
        cohorts = [[objects[i] for i in cohort_index]
                   for cohort_index in cohort_indexes]

        return cohorts
def segment (graph):
    """
    Joins vertices to form segments

    Joins a string of vertices with no ramifications into one vertex.
    """

    def join_tokens (graph, vertex1, vertex2):
        """ Join vertex2 to vertex1 """

        node1 = graph.node[vertex1]
        node2 = graph.node[vertex2]
        if 'tokens' in node1 and 'tokens' in node2:
            node1['tokens'] += node2['tokens']

    sorted_vertices = topological_sort (graph)[1:-1] # remove start, end

    for vertex in sorted_vertices:
        if graph.in_degree (vertex) == 1:
            prev_vertex = graph.predecessors (vertex)[0]
            if prev_vertex != 0 and graph.out_degree (prev_vertex) == 1:
                join_tokens (graph, prev_vertex, vertex)

                for (_, neighbor, data) in graph.out_edges (vertex, data=True):
                    graph.remove_edge (vertex, neighbor)
                    # must be a new edge because out_degree of prev_vertex was 1
                    graph.add_edge (prev_vertex, neighbor, label=data['witnesses'])

                graph.remove_edge (prev_vertex, vertex)
                graph.remove_node (vertex)
Exemplo n.º 9
0
    def _connect(self):
        """Connects the nodes & edges of the graph together by examining who
        the requirements of each node and finding another node that will
        create said dependency.
        """
        if len(self._graph) == 0:
            return []
        if self._connected:
            return self._runners

        # Clear out all automatically added edges since we want to do a fresh
        # connections. Leave the manually connected ones intact so that users
        # still retain the dependencies they established themselves.
        def discard_edge_func(u, v, e_data):
            if e_data and e_data.get('reason') != 'manual':
                return True
            return False

        # Link providers to requirers.
        graph_utils.connect(self._graph, discard_func=discard_edge_func)

        # Now figure out the order so that we can give the runners there
        # optional item providers as well as figure out the topological run
        # order.
        run_order = dag.topological_sort(self._graph)
        run_stack = []
        for r in run_order:
            r.runs_before = list(reversed(run_stack))
            run_stack.append(r)
        self._runners = run_order
        self._connected = True
        return run_order
Exemplo n.º 10
0
 def step(self):
     for n in dag.topological_sort(self.G):
         attrs = self.G.nodes[n]
         if 'type' not in attrs:
             continue
         if attrs['type'] == 'DFF':
             assert 'value' in attrs, 'uninitialized dff'
         elif attrs['type'] == 'NOT':
             preds = list(self.G.predecessors(n))
             assert len(preds) == 1
             inp = self.G.nodes[preds[0]]['value']
             self.tfhe.tfhe.bootsNOT(attrs['value'], inp, self.cloud_key)
         elif attrs['type'] == 'AND':
             preds = list(self.G.predecessors(n))
             assert len(preds) == 2
             inp0 = self.G.nodes[preds[0]]['value']
             inp1 = self.G.nodes[preds[1]]['value']
             self.tfhe.tfhe.bootsAND(attrs['value'], inp0, inp1,
                                     self.cloud_key)
         else:
             print('unhandled node type during eval:', attrs['type'])
             assert False
     # set dffs
     for n in self.dffs:
         inp_node = self.G.nodes[n]['input_node']
         self.tfhe.tfhe.bootsCOPY(self.G.nodes[n]['value'],
                                  self.G.nodes[inp_node]['value'],
                                  self.cloud_key)
Exemplo n.º 11
0
    def _topologically_sort_documents(self, documents):
        """Topologically sorts the DAG formed from the documents' substitution
        dependency chain.
        """
        documents_by_name = {}
        result = []

        g = networkx.DiGraph()
        for document in documents:
            document = document_wrapper.DocumentDict(document)
            documents_by_name.setdefault((document.schema, document.name),
                                         document)
            for sub in document.substitutions:
                g.add_edge((document.schema, document.name),
                           (sub['src']['schema'], sub['src']['name']))

        try:
            cycle = find_cycle(g)
        except networkx.exception.NetworkXNoCycle:
            pass
        else:
            LOG.error(
                'Cannot determine substitution order as a dependency '
                'cycle exists for the following documents: %s.', cycle)
            raise errors.SubstitutionDependencyCycle(cycle=cycle)

        sorted_documents = reversed(list(topological_sort(g)))

        for document in sorted_documents:
            if document in documents_by_name:
                result.append(documents_by_name.pop(document))
        for document in documents_by_name.values():
            result.append(document)

        return result
Exemplo n.º 12
0
    def plan(self):
        """ Plan execution order of functions, along with initial conditions to check

        Chainable.
        """
        if len(self._graph) == 0:
            logger.warning('Planning an empty graph!')
        if self._is_planned:
            logger.info('Graph is already planned. Skipping...')
            return

        sorted_graph = dag.topological_sort(self._graph)

        initial_nodes = set()
        execution_plan = []

        for nm in sorted_graph:
            node = self[nm]
            if not node.is_complete:
                initial_nodes.add(node.name)
            else:
                execution_plan.append(node)

        self.initial_nodes = initial_nodes
        self.execution_plan = execution_plan
        self._graph = nx.freeze(self._graph)
        self._is_planned = True
        return self
Exemplo n.º 13
0
def sat_2cnf_valuating(path):
    scc, implication_graph = get_scc(path)

    if sat_2cnf_satisfied(path, scc):
        scc_graph = nx.DiGraph()

        nodes_scc = {}  # to have faster access to node's strongly connected component
        nodes_valuating = {}

        for s in scc.keys():
            scc_graph.add_node(s)

            for node in scc[s]:
                nodes_scc[node] = s
                nodes_valuating[node] = False

        for index, node_set in scc.items():
            for node in node_set:
                for neigh in implication_graph.adj[node]:
                    if index != nodes_scc[neigh]:
                        scc_graph.add_edge(index, nodes_scc[neigh])

        top_sort = topological_sort(scc_graph)

        for s in top_sort:
            for node in scc[s]:
                if not nodes_valuating[node]:
                    nodes_valuating[-node] = True

        print(1) if check_valuating(nodes_valuating, path) else print(0)
        print(nodes_valuating)
Exemplo n.º 14
0
def _analyze(rules: List[Rule]) -> List[List[Rule]]:
    # build rule dependency graph
    occ: Dict[Atom, Set[RuleIndex]] = {}
    dep_graph = DiGraph()
    for u, rule in enumerate(rules):
        dep_graph.add_node(u)
        for lit in rule.body:
            occ.setdefault(abs(lit), set()).add(u)

    for u, rule in enumerate(rules):
        atm, = rule.head
        for v in occ.get(atm, []):
            dep_graph.add_edge(u, v)

    sccs = list(strongly_connected_components(dep_graph))

    # build scc dependency graph
    # (this part only exists because the networkx library does not document the
    # order of components; in principle, the tarjan algorithm guarentees a
    # topological order)
    scc_rule: Dict[RuleIndex, RuleIndex] = {}
    scc_graph = DiGraph()
    for i, scc in enumerate(sccs):
        scc_graph.add_node(i)
        for u in scc:
            scc_rule[u] = i

    for i, scc in enumerate(sccs):
        for u in scc:
            for v in dep_graph[u]:
                j = scc_rule[u]
                if i != j:
                    scc_graph.add_edge(i, j)

    return [[rules[j] for j in sccs[i]] for i in topological_sort(scc_graph)]
Exemplo n.º 15
0
def segment (graph):
    """
    Joins vertices to form segments

    Joins a string of vertices with no ramifications into one vertex.
    """

    def join_tokens (graph, v1, v2):
        """ Join v2 to v1 """

        node1 = graph.node[v1]
        node2 = graph.node[v2]
        for sigil, tokens in node2['tokens'].items ():
            node1['tokens'].setdefault (sigil, []).extend (tokens)
        node1['label'] += node2['label']

    sorted_vertices = topological_sort (graph)[1:-1] # remove start, end

    for vertex in sorted_vertices:
        if graph.in_degree (vertex) == 1:
            prev_vertex = graph.predecessors (vertex)[0]
            if prev_vertex != 0 and graph.out_degree (prev_vertex) == 1:
                join_tokens (graph, prev_vertex, vertex)

                for (_, neighbor, data) in graph.out_edges (vertex, data=True):
                    graph.remove_edge (vertex, neighbor)
                    # must be a new edge because out_degree of prev_vertex was 1
                    graph.add_edge (prev_vertex, neighbor, label=data['label'])

                graph.remove_edge (prev_vertex, vertex)
                graph.remove_node (vertex)
Exemplo n.º 16
0
def execution_sequence(graph):

    unique_edges(graph)

    sequence = []

    for n in topological_sort(graph):
        node = graph.nodes[n]
        mod = node['module']

        bind = {}
        out = {}

        action = None
        if isinstance(mod, SourceNode):
            action = 'standard_start'
        elif isinstance(mod, GraphSourceNode):
            action = 'graph_start'
        elif isinstance(mod, SinkNode):
            action = 'output'
            out['forward'] = ['__out__']
        else:
            action = n

        for _, _, D in graph.in_edges(n, data=True):
            bind[D['identifier']] = D['to']

        for _, _, k, id in graph.out_edges(n, data='identifier', keys=True):
            if k not in out:
                out[k] = []
            out[k].append(id)

        sequence.append((bind, action, out))

    return sequence
Exemplo n.º 17
0
def parse_graph(from_blocks: List[Namespace],
                block: Namespace) -> Dict[str, List[str]]:
    """Parses a graph of a block.

    Args:
        from_blocks: The input blocks.
        block: The block to parse its graph.

    Returns:
        Dictionary in topological order mapping node IDs to its respective input nodes IDs.

    Raises:
        ValueError: If there are problems parsing the graph.
        ValueError: If the graph is not directed and acyclic.
        ValueError: If topological sort does not include all nodes.
    """
    if isinstance(block, dict):
        block = dict_to_namespace(block)
    if any(isinstance(x, dict) for x in from_blocks):
        from_blocks = [dict_to_namespace(x) for x in from_blocks]

    ## Get graph list ##
    if hasattr(block, '_class') and block._class == 'Sequential':
        graph_list = [
            from_blocks[0]._id + ' -> ' +
            ' -> '.join([b._id for b in block.blocks])
        ]
    else:
        graph_list = block.graph
        if hasattr(block, 'input') and isinstance(block.input, str):
            graph_list = [from_blocks[0]._id + ' -> ' + block.input
                          ] + graph_list

    ## Parse graph ##
    try:
        graph = digraph_from_graph_list(graph_list)
    except Exception as ex:
        raise ValueError(
            f'Problems parsing graph for block[id={block._id}]: {ex}') from ex
    if not is_directed_acyclic_graph(graph):
        raise ValueError(
            f'Expected graph to be directed and acyclic for block[id={block._id}], graph={graph_list}.'
        )

    ## Create topologically ordered dict mapping all nodes to its inputs ##
    topological_predecessors = OrderedDict()
    for node in topological_sort(graph):
        predecessors = [n for n in graph.predecessors(node)]
        if len(predecessors) > 0:
            topological_predecessors[node] = predecessors

    nodes_blocks = {b._id for b in block.blocks}
    nodes_topological = {k for k in topological_predecessors.keys()}
    missing = nodes_blocks - nodes_topological
    if len(missing) > 0:
        raise ValueError(
            f'Graph in block[id={block._id}] does not reference all of its blocks: missing={missing}.'
        )

    return topological_predecessors
Exemplo n.º 18
0
    def create_id_from_nxgraph(self, nx_graph):
        """ Create an influence diagram from another nx_graph defining an influence diagram

        Parameters
        ----------
        nx_graph            :   networkX DiGraph defining an influence diagram

        **Behavior**

        take input nx_graph as self.nx_diagram and recover the basic information from the nx_graph

        this function behaves in an opposite way compared to create_id_from_scopes()

        Note
        ----
        the scope information only reflects the topology of the diagram

        """
        from networkx.algorithms.dag import topological_sort
        self.nx_diagram = nx_graph
        vars = sorted([
            v for v in nx_graph.nodes_iter()
            if nx_graph.node[v]["node_type"] in ['C', 'D']
        ])
        self.nvar = len(vars)
        assert self.nvar == vars[
            -1] + 1, "uai format assumes variables ids are from 0 to nvar-1"
        self.var_types = [nx_graph.node[v]["node_type"] for v in vars]
        self.scopes = []
        self.scope_types = []
        for node_id in sorted(nx_graph.nodes_iter()):
            if nx_graph.node[node_id]["node_type"] == 'C':
                self.scopes.append(
                    sorted(nx_graph.predecessors(node_id)) + [node_id])
                self.scope_types.append('P')
            elif nx_graph.node[node_id]["node_type"] == 'U':
                self.scopes.append(sorted(nx_graph.predecessors(node_id)))
                self.scope_types.append('U')

        self.decision_nodes = []
        for node_id in topological_sort(nx_graph):
            if nx_graph.node[node_id]["node_type"] == 'D':
                self.decision_nodes.append(node_id)

        partial_temporal_order = []
        nodes_covered = set(self.decision_nodes)
        for d in self.decision_nodes:
            obs = nx_graph.predecessors(d)
            if obs:
                partial_temporal_order.append(sorted(obs))
                nodes_covered.update(obs)
            partial_temporal_order.append([d])
        if len(nodes_covered) < self.nvar:
            hidden_nodes = [
                nn for nn in nx_graph.nodes_iter() if nn not in nodes_covered
                and nx_graph.node[nn]["node_type"] != "U"
            ]
            partial_temporal_order.append(hidden_nodes)
        self.partial_elim_order = list(reversed(partial_temporal_order))
Exemplo n.º 19
0
def make_layout(adj, g=None, gtype='directed'):
    """Creates a layout from an adjacency matrix.

    Parameters :
        adj : integer nd.ndarray[n_node, n_node] :
            Adjacency matrix.

        g : nx.DiGraph or nx.Graph, optional, default : None :
            If g is None, use adj to generate the graph.

        gtype : 2-value string, optional, default: 'directed' :
            'directed' if g is directed, 'undirected' if g is undirected.

    Returns :
        x : 1-d nd.ndarray[n_node] :
            X Positions of nodes.

        y : 1-d nd.ndarray[n_node] :
            Y Positions of nodes.

    Raises :
        None

    Notes :
        x, y = make_layout(adj<, g, gtype='directed'>)
    """
    n_node = adj.shape[0]
    if g is None:
        if gtype == 'directed':
            g = nx.DiGraph(adj)
        else:
            g = nx.Graph(adj)

    try:
        seq = topological_sort(g)
    except (nx.NetworkXError, nx.NetworkXUnfeasible):
        seq = []
    if not seq:  # seq is empty
        level = poset(adj, 0) - 1
    else:  # not empty
        level = np.zeros(n_node, dtype=int)
        for node in seq:
            idx = adj[:, node].nonzero()[0]
            if idx.size != 0:
                l = np.max(level[idx])
                level[node] = l + 1
    y = (level + 1.0) / (np.max(level) + 2.0)
    y = 1.0 - y
    x = np.zeros(y.size, dtype=float)
    for i in range(np.max(level)):
        idx = (level == i).nonzero()[0]
        offset = (i % 2 - 0.5) / 10.0
        x[idx] = 1.0 * np.arange(idx.size) / (idx.size + 1) + offset
        #offset = 0.1
        #n_idx = idx.size
        #for j, index in enumerate(idx):
        #    x[index] = offset * ( j - (n_idx - 1.0) / 2.0)

    return x, y
Exemplo n.º 20
0
 def __iter__(self):
     """Generate lines of ASCII representation of a DAG."""
     for node in reversed(list(topological_sort(self.graph.G))):
         for node_symbol, lines, column_info in self.iter_edges(node):
             for line in self.iter_node_lines(node_symbol,
                                              self.node_text(node),
                                              column_info):
                 yield line
Exemplo n.º 21
0
 def translate(self, translators: List) -> None:
     for node in topological_sort(self._dag):
         if node.type != 'gate':
             continue
         possible_translations = [f(node) for f in translators]
         possible_translations = list(filter(None, possible_translations))
         if possible_translations:
             self.replace_subgraph([node], possible_translations[0])
Exemplo n.º 22
0
def get_operator_images(model_spec):

    '''
    Yields a dict
    {
        [node key]: [operator composition result at node]
    }
    '''

    graph = get_graph(model_spec)

    def to_layers_map(comps_map, node_key):

        in_edges = graph.in_edges(node_key)

        if len(in_edges) == 0:

            comps_map.update({
                node_key: Input(
                    name=node_key,
                    **model_spec['legs']['in'][node_key]
                )
            })


        elif len(in_edges) == 1:
            comps_map.update({
                node_key: get_operator_image(
                    comps_map[in_edges[0][0]],
                    get_graph_edge(graph, in_edges[0])[0]['operator'],
                    in_edges[0][1],
                    model_spec['operators']
                )
            })

        else:
            comps_map.update({
                node_key: merge(
                    [
                        get_operator_image(
                            comps_map[in_edge[0]],
                            get_graph_edge(graph, in_edge)[0]['operator'],
                            in_edge[1],
                            model_spec['operators'],
                        )
                        for in_edge in in_edges
                    ],
                    mode='concat'
                )
            })

        return comps_map

    return reduce(
        to_layers_map,
        topological_sort(graph),
        {}
    )
Exemplo n.º 23
0
 def get_n2i(parents):
     g = nx.DiGraph()
     for k in parents:
         g.add_node(k)
     for ch, pas in parents.items():
         for pa in pas:
             g.add_edge(pa, ch)
     nodes = list(topological_sort(g))
     return {n: i for i, n in enumerate(nodes)}
Exemplo n.º 24
0
 def order(self):
     self.connect()
     try:
         return dag.topological_sort(self._graph)
     except g_exc.NetworkXUnfeasible:
         raise exc.InvalidStateException("Unable to correctly determine "
                                         "the path through the provided "
                                         "flow which will satisfy the "
                                         "tasks needed inputs and outputs.")
Exemplo n.º 25
0
 def order(self):
     self.connect()
     try:
         return dag.topological_sort(self._graph)
     except g_exc.NetworkXUnfeasible:
         raise exc.InvalidStateException("Unable to correctly determine "
                                         "the path through the provided "
                                         "workflow which will satisfy the "
                                         "tasks needed inputs and outputs.")
Exemplo n.º 26
0
 def of(cls, graph):
     variant_graph_ranking = VariantGraphRanking()
     topological_sorted_vertices = topological_sort(graph.graph)
     for v in topological_sorted_vertices:
         rank = -1
         for (source, _) in graph.in_edges(v):
             rank = max(rank, variant_graph_ranking.byVertex[source])
         rank += 1
         variant_graph_ranking.byVertex[v]=rank
         variant_graph_ranking.byRank.setdefault(rank, []).append(v)
     return variant_graph_ranking
Exemplo n.º 27
0
def remove_unused_def_vars(prob):
    dag = prob.dag
    removed = [ ]
    for n in du.itr_sinks(dag, prob.defined_vars):
        deps = ancestors(dag, n)
        deps.add(n)
        con_dag = dag.subgraph(deps)
        reverse_order = topological_sort(con_dag, reverse=True)
        removed = delete_sinks_recursively(dag, reverse_order)
    print('Unused nodes:', removed)
    prob.defined_vars.difference_update(removed)
Exemplo n.º 28
0
 def of(cls, graph):
     variant_graph_ranking = VariantGraphRanking()
     topological_sorted_vertices = topological_sort(graph.graph)
     for v in topological_sorted_vertices:
         rank = -1
         for (source, _) in graph.in_edges(v):
             rank = max(rank, variant_graph_ranking.byVertex[source])
         rank += 1
         variant_graph_ranking.byVertex[v] = rank
         variant_graph_ranking.byRank.setdefault(rank, []).append(v)
     return variant_graph_ranking
Exemplo n.º 29
0
def remove_unused_def_vars(prob):
    dag = prob.dag
    removed = []
    for n in du.itr_sinks(dag, prob.defined_vars):
        deps = ancestors(dag, n)
        deps.add(n)
        con_dag = dag.subgraph(deps)
        reverse_order = topological_sort(con_dag, reverse=True)
        removed = delete_sinks_recursively(dag, reverse_order)
    print('Unused nodes:', removed)
    prob.defined_vars.difference_update(removed)
Exemplo n.º 30
0
def set_start_point(g_orig, model, y, i_nodeid, feasible_sol):
    # FIXME It assumes that we only have a single SCC
    elims, _cost = feasible_solution(
        g_orig) if feasible_sol is None else feasible_sol
    g = g_orig.copy()
    g.remove_edges_from(elims)
    order = {n: i for i, n in enumerate(topological_sort(g))}
    for i, j in combinations(irange(len(i_nodeid)), 2):
        pos_a = order[i_nodeid[i]]
        pos_b = order[i_nodeid[j]]
        y[(i, j)].start = 0 if pos_a < pos_b else 1
Exemplo n.º 31
0
    def replace_subgraph(self, nodes: Iterable[Node],
                         replacement: nx.DiGraph) -> None:
        sorted_dag = list(topological_sort(self._dag))
        sorted_nodes = sorted(nodes, key=lambda node: sorted_dag.index(node))

        sorted_replacement = list(topological_sort(replacement))

        self._dag.update(replacement)

        predecessors = list(self._dag.predecessors(sorted_nodes[0]))

        for predecessor in predecessors:
            self._dag.add_edge(predecessor, sorted_replacement[0])
            self._dag.remove_edge(predecessor, sorted_nodes[0])

        successors = list(self._dag.successors(sorted_nodes[-1]))

        for successor in successors:
            self._dag.add_edge(sorted_replacement[-1], successor)
            self._dag.remove_edge(sorted_nodes[-1], successor)

        self._dag.remove_nodes_from(nodes)
Exemplo n.º 32
0
def ore_for_fuel(tree, need=1):
    """Computes how much ore is needed for ``need`` fuel."""
    for node in tree.nodes:
        tree.nodes[node]['need'] = 0
    tree.nodes['FUEL']['need'] = need
    for node in topological_sort(tree):
        for output_node, input_node, stoichio in tree.out_edges(node,
                                                                data=True):
            times = int(
                math.ceil(tree.nodes[output_node]['need'] /
                          stoichio['output_qty']))
            tree.nodes[input_node]['need'] += times * stoichio['input_qty']
    return tree.nodes['ORE']['need']
Exemplo n.º 33
0
    def get_sorted_deps(self, name=None):
        """Returns an appropriately-sorted list of filenames that are name or depended-on by name.

        Each file appears earlier in the list than any file including it.

        If name is None, gets all sorted deps."""
        if name:
            nodes = [name]
            nodes.extend(self.graph.successors(name))
            graph = self.graph.subgraph(nodes)
        else:
            graph = self.graph
        return list(reversed(list(topological_sort(graph))))
Exemplo n.º 34
0
    def getStageOrdering(self, phase):
        graph = self.getStageGraph(phase)

        try:
            stageList = topological_sort(graph)
        except NetworkXError as e:
            print >>sys.stderr, "getStageOrdering() failed: stage graph "\
                "is not directed"
            sys.exit(1)
        except NetworkXUnfeasible as e:
            print >> sys.stderr, "getStageOrdering(): stage graph contains "
            "cycles; falling back to listing stages in the order they are "
            "specified"
            stageList = self.structureInfo[phase][consts.STAGES_KEY].keys()

        return stageList
    def getStageOrdering(self, phase):
        graph = self.getStageGraph(phase)

        try:
            stageList = topological_sort(graph)
        except NetworkXError as e:
            print >>sys.stderr, "getStageOrdering() failed: stage graph "\
                "is not directed"
            sys.exit(1)
        except NetworkXUnfeasible as e:
            print >>sys.stderr, "getStageOrdering(): stage graph contains "
            "cycles; falling back to listing stages in the order they are "
            "specified"
            stageList = self.structureInfo[phase][consts.STAGES_KEY].keys()

        return stageList
Exemplo n.º 36
0
Arquivo: graph.py Projeto: rrane/jcvi
def merge_paths(paths):
    """
    Zip together sorted lists.

    >>> paths = [[1, 2, 3], [1, 3, 4], [2, 4, 5]]
    >>> merge_paths(paths)
    [1, 2, 3, 4, 5]
    """
    from jcvi.utils.iter import pairwise

    edges = []
    for a in paths:
        edges.extend(list(pairwise(a)))

    g = nx.DiGraph(edges)
    return topological_sort(g)
Exemplo n.º 37
0
def initialize_group(group):
    # topologically sort
    component_data = []
    for node in topological_sort(groups[group]):
        component_data.append({
            "name":
            node,
            "ancestors":
            list(groups[group].predecessors(node)),
            "successors":
            list(groups[group].successors(node))
        })

    spans, orderings = parallelise_components(component_data=component_data)
    # pprint(spans)
    previous_picked_host = ""
    # allocate spans to hosts
    for span in spans:
        span_name = span["name"]

        picked_host = ""
        for dependency in resource_dependencies[span_name]:
            print(
                "Span '{}' would prefer to be on a host with fast access to '{}'"
                .format(span_name, dependency))
            print(" - Hosts that have fast access to {}: {}".format(
                dependency, has_index[dependency]))
            for host in has_index[dependency]:
                picked_host = host
                break

        preferred_host_components = span_name.split(".")

        if len(preferred_host_components) == 2:
            preferred_host = preferred_host_components[0]
            picked_host = preferred_host

        if picked_host == "" and previous_picked_host != "":
            # no dependencies, can pick any host
            # faster to pick the previous host
            picked_host = previous_picked_host
        previous_picked_host = picked_host

        print(" - Assigning {} to host {} which is at {}:{}".format(
            span_name, picked_host, hosts[picked_host], ports[picked_host]))
        span["host"] = picked_host
    return spans
Exemplo n.º 38
0
 def generate_order(cls, component_dict):
     """Regenerated the topologically sorted order of the graph"""
     edges = cls._get_edges(component_dict)
     if len(component_dict) == 1:
         return list(component_dict.keys())
     if len(edges) == 0:
         return []
     digraph = nx.DiGraph()
     digraph.add_edges_from(edges)
     if not nx.is_weakly_connected(digraph):
         raise ValueError('The given graph is not completely connected')
     try:
         compute_order = list(topological_sort(digraph))
     except NetworkXUnfeasible:
         raise ValueError('The given graph contains a cycle')
     end_components = [component for component in compute_order if len(nx.descendants(digraph, component)) == 0]
     if len(end_components) != 1:
         raise ValueError('The given graph has more than one final (childless) component')
     return compute_order
Exemplo n.º 39
0
                def resolve_child(x):
                    added = self._build_symbol_dag(
                        x, dag, resolution_stack, check_dag=False
                    )
                    if (x._impl.id, symbol_id) not in dag.edges:
                        dag.add_edge(x._impl.id, symbol_id)
                        self._raise_not_dag(dag, list(added | {x._impl.id}))
                    elif added:
                        self._raise_not_dag(dag, list(added))

                    ancestors = set(dag.pred[x._impl.id])
                    for node in list(ancestors):
                        if "result" in dag.nodes[node]:
                            ancestors.remove(node)

                    for sym_id in list(topological_sort(dag.subgraph(ancestors))):
                        wrapped_handle_symbol_id(sym_id)

                    return wrapped_handle_symbol_id(x._impl.id)
Exemplo n.º 40
0
def unify_ids(G, label_dict, running_id):
    new_ids = {}
    node_move_chain = {}
    for n in topological_sort(G):
        parent = get_parent(G, n)
        if parent is None:
            chain = ''
        else:
            chain = node_move_chain[parent] + G.nodes[n]['move']

        node_move_chain[n] = chain
        id = label_dict.get(chain, None)
        if id is None:
            id = running_id
            running_id += 1
            label_dict[chain] = id
        new_ids[n] = id
    #print('IDs',new_ids)
    G = nx.relabel_nodes(G, new_ids, copy=True)
    return (G, label_dict, running_id)
Exemplo n.º 41
0
    def compute_dependent_cohorts(self, objects, deletion):
        n = len(objects)
        r = list(range(n))

        oG = DiGraph()

        for i in r:
            oG.add_node(i)

        try:
            for i0 in range(n):
                for i1 in range(n):
                    if i0 != i1:
                        if deletion:
                            path_args = (objects[i1], objects[i0])
                        else:
                            path_args = (objects[i0], objects[i1])

                        is_connected, edge_type = self.concrete_path_exists(*path_args)
                        if is_connected:
                            try:
                                edge_type = oG[i1][i0]["type"]
                                if edge_type == PROXY_EDGE:
                                    oG.remove_edge(i1, i0)
                                    oG.add_edge(i0, i1, type=edge_type)
                            except KeyError:
                                oG.add_edge(i0, i1, type=edge_type)
        except KeyError:
            pass

        components = weakly_connected_component_subgraphs(oG)
        cohort_indexes = [reversed(list(topological_sort(g))) for g in components]
        cohorts = [
            [objects[i] for i in cohort_index] for cohort_index in cohort_indexes
        ]

        return cohorts
Exemplo n.º 42
0
def perform_near_match(graph, ranking):
    # Walk ranking table in reverse order and add near-match edges to graph
    reverse_topological_sorted_vertices = reversed(list(topological_sort(graph.graph)))
    for v in reverse_topological_sorted_vertices:
        ##### Doesn't work:
        #         target_rank = ranking.byVertex[v] # get the rank of a vertex
        #
        # in_edges = graph.in_edges(v) # if it has more than one in_edge, perhaps something before it can be moved
        # if len(in_edges) > 1:
        #     # candidates for movement are the sources of in edges more than one rank earlir
        #     move_candidates = [in_edge[0] for in_edge in in_edges \
        #                        if target_rank > ranking.byVertex[in_edge[0]] + 1]
        #     for move_candidate in move_candidates:
        #         move_candidate_witnesses = set(move_candidate.tokens) # prepare to get intersection later
        #         min_rank = ranking.byVertex[move_candidate] # lowest possible rank is current position
        #         max_rank = target_rank - 1 # highest possible rank is one more before the target
        #         vertices_to_compare = flatten([ranking.byRank[r] for r in range(min_rank, max_rank + 1)])
        #         vertices_to_compare.remove(move_candidate) # don't compare it to itself
        #         print('comparing ', move_candidate, ' to ', vertices_to_compare)
        #         ratio_dict = {} # ratio:vertex_to_compare
        #         for vertex_to_compare in vertices_to_compare:
        #             # don't move if there's already a vertex there with any of the same witnesses
        #             if not move_candidate_witnesses.intersection(vertex_to_compare.tokens):
        #                 print('now comparing move candidate ', move_candidate, \
        #                       ' (witnesses ', move_candidate_witnesses,\
        #                       ') with ', vertex_to_compare, ' (witnesses ', vertex_to_compare.tokens, ')')
        #                 ratio = Levenshtein.ratio(str(move_candidate), str(vertex_to_compare))
        #                 ratio_dict[ratio] = vertex_to_compare
        #         # Create only winning edge; losing edges can create later cycles
        #         graph.connect_near(ratio_dict[max(ratio_dict)], move_candidate, ratio)
        #         print('connected ', move_candidate, ' to ', ratio_dict[max(ratio_dict)], \
        #               ' with ratio ', max(ratio_dict))
        ######
        in_edges = graph.in_edges(v, data=True)
        for source, target, edgedata in in_edges:
            # can only move if two conditions are both true:
            # 1) rank of source differs from v by more than 1; max target rank will be rank of v - 1
            # 2) out_edges from source must have no target at exactly one rank higher than source
            if ranking.byVertex[v] - ranking.byVertex[source] > 1 and \
                    1 not in [ranking.byVertex[v] - ranking.byVertex[u] for (u,v) in graph.out_edges(source)]:
                min_rank = ranking.byVertex[source]
                max_rank = ranking.byVertex[v]
                match_candidates = [item for item in flatten([ranking.byRank[rank] \
                                            for rank in range(min_rank, max_rank)]) if item is not source]
                # print(match_candidates)
                levenshtein_dict = defaultdict(list)
                for match_candidate in match_candidates:
                    ratio = Levenshtein.ratio(str(source), str(match_candidate))
                    # print(source, match_candidate, ratio)
                    levenshtein_dict[ratio].append(match_candidate)
                weight = max(levenshtein_dict)
                winner = levenshtein_dict[max(levenshtein_dict)][0]
                # print('weight:',weight,'source:',winner)
                graph.connect_near(winner,source,weight)
                # print('before: byRank',str(ranking.byRank))
                # print('before: byVertex',str(ranking.byVertex))
                # update ranking table for next pass through loop and verify
                ranking = VariantGraphRanking.of(graph)
                # print('after: byRank',str(ranking.byRank))
                # print('after: byVertex',str(ranking.byVertex))
    # Create new ranking table (passed along to creation of alignment table)
    return VariantGraphRanking.of(graph)
Exemplo n.º 43
0
def gantt(job_list, context, filename='gantt.html'):
    """

    """

    db = context.get_compmake_db()
    if not job_list:
#        job_list = list(top_targets(db))
        job_list = all_jobs(db)
    # plus all the jobs that were defined by them
    job_list = set(job_list)
#    job_list.update(definition_closure(job_list, db))

    from networkx import DiGraph
    G = DiGraph()
    cq = CacheQueryDB(db)

    for job_id in job_list:
        cache = cq.get_job_cache(job_id)
        length = cache.int_make.get_cputime_used()
        attr_dict = dict(cache=cache, length=length)
        G.add_node(job_id, **attr_dict)

        dependencies = cq.direct_children(job_id)
        for c in dependencies:
            G.add_edge(c, job_id)

        defined = cq.jobs_defined(job_id)
        for c in defined:
            G.add_edge(job_id, c)

    order = topological_sort(G)
    for job_id in order:
        length = G.node[job_id]['length']
        pre = list(G.predecessors(job_id))
#        print('%s pred %s' % (job_id, pre))
        if not pre:
            T0 = 0
            G.node[job_id]['CP'] = None
        else:
            # find predecessor with highest T1
            import numpy as np
            T1s = list(G.node[_]['T1'] for _ in pre)
            i = np.argmax(T1s)
            T0 = T1s[i]
            G.node[job_id]['CP'] = pre[i]
        T1 = T0 + length
        G.node[job_id]['T0'] = T0
        G.node[job_id]['T1'] = T1

        G.node[job_id]['critical'] = False

    sg_ideal = SimpleGantt()

    by_ideal_completion = sorted(order, key=lambda _: G.node[_]['T1'])
    last = by_ideal_completion[-1]
    path = []
    while last is not None:
        path.append(last)
        G.node[last]['critical'] = True
        last = G.node[last]['CP']

    print('Critical path:')
    for job_id in reversed(path):
        length = G.node[job_id]['length']
        print('-  %.1f s   %s' % (length, job_id))

    for job_id in by_ideal_completion:
        T0 = G.node[job_id]['T0']
        T1 = G.node[job_id]['T1']
        # length = G.node[job_id]['length']

        dependencies = list(G.predecessors(job_id))
        # cache = G.node[job_id]['cache']
        periods = OrderedDict()
        periods['ideal'] = (T0, T1)
        critical = G.node[job_id]['critical']
        sg_ideal.add_job(job_id, dependencies, periods=periods, critical=critical)

    sg_actual = SimpleGantt()

    order_actual = sorted(order, key=lambda _: G.node[_]['cache'].int_make.t0)
    for job_id in order_actual:
        cache = G.node[job_id]['cache']
        critical = G.node[job_id]['critical']
        dependencies = list(G.predecessors(job_id))
        periods = OrderedDict()
        periods['make'] = cache.int_make.walltime_interval()
        sg_actual.add_job(job_id, dependencies, periods=periods, critical=critical)

    sg_actual_detailed = SimpleGantt()
    for job_id in order_actual:
        cache = G.node[job_id]['cache']
        critical = G.node[job_id]['critical']
        periods = OrderedDict()
        periods['load'] = cache.int_load_results.walltime_interval()
        periods['compute'] = cache.int_compute.walltime_interval()
        periods['gc'] = cache.int_gc.walltime_interval()
        periods['save'] = cache.int_save_results.walltime_interval()

        assert periods['load'][1] <= periods['compute'][0]
        assert periods['compute'][1] <= periods['save'][0]
        sg_actual_detailed.add_job(job_id, dependencies, periods=periods, critical=critical)

    html = ''
    width_pixels = 1000
    if True:
        html += '\n<h1>Actual</h1>'
        html += sg_actual.as_html(width_pixels)
    if True:
        html += '\n<h1>Actual (detailed)</h1>'
        html += sg_actual_detailed.as_html(width_pixels)
    if True:
        html += '\n<h1>Ideal</h1>'
        html += sg_ideal.as_html(width_pixels)

    html += '''
    <style>
        tr:hover td:first-child {

        }
        td:first-child {
        font-size: 10px;
        }
        td:nth-child(2) {
            background-color: grey;
            width: %spx;
        }
        .compute, .make, .save, .load, .ideal, .gc {
            outline: solid 1px black;
            float: left;
            clear: left;
        }
        .compute {
            background-color: red;
        }
        .make {
            background-color: blue;
        }
        .gc {
            background-color: brown;
        }
        .save {
            background-color: green;
        }
        .load {
            background-color: yellow;
        }
        .ideal {
            background-color: magenta;
        }
        .critical  {
            /* outline: solid 2px red !important; */
            background-color: pink;
        }
    </style>
        ''' % width_pixels

    with open(filename, 'w') as f:
        f.write(html)
    print('written to %s' % filename)
Exemplo n.º 44
0
    def run(self, dry=False, set_successful=True, cmd_wrapper=signature.default_cmd_fxn_wrapper, log_out_dir_func=default_task_log_output_dir):
        """
        Runs this Execution's DAG

        :param log_out_dir_func: (function) a function that computes a task's log_out_dir_func.
             It receives one parameter: the task instance.
             By default task log output is stored in output_dir/log/stage_name/task_id.
             See _default_task_log_output_dir for more info.
        :param dry: (bool) if True, do not actually run any jobs.
        :param set_successful: (bool) sets this execution as successful if all tasks finish without a failure.  You might set this to False if you intend to add and
            run more tasks in this execution later.
        """
        assert os.path.exists(os.getcwd()), 'current working dir does not exist! %s' % os.getcwd()

        assert hasattr(self, 'cosmos_app'), 'Execution was not initialized using the Execution.start method'
        assert hasattr(log_out_dir_func, '__call__'), 'log_out_dir_func must be a function'
        assert self.session, 'Execution must be part of a sqlalchemy session'
        session = self.session
        self.log.info('Preparing to run %s using DRM `%s`, output_dir: `%s`' % (
            self, self.cosmos_app.default_drm, self.output_dir))

        from ..job.JobManager import JobManager

        self.jobmanager = JobManager(cosmos_app=self.cosmos_app, get_submit_args=self.cosmos_app.get_submit_args,
                                     default_queue=self.cosmos_app.default_queue, cmd_wrapper=cmd_wrapper, log_out_dir_func=log_out_dir_func
                                     )

        self.status = ExecutionStatus.running
        self.successful = False

        if self.started_on is None:
            import datetime

            self.started_on = datetime.datetime.now()

        task_g = self.task_graph()
        stage_g = self.stage_graph()

        # def assert_no_duplicate_taskfiles():
        #     taskfiles = (tf for task in task_g.nodes() for tf in task.output_files if not tf.duplicate_ok)
        #     f = lambda tf: tf.path
        #     for path, group in it.groupby(sorted(filter(lambda tf: not tf.task_output_for.NOOP, taskfiles), key=f), f):
        #         group = list(group)
        #         if len(group) > 1:
        #             t1 = group[0].task_output_for
        #             tf1 = group[0]
        #             t2 = group[1].task_output_for
        #             tf2 = group[1]
        #             div = "-" * 72 + "\n"
        #             self.log.error("Duplicate taskfiles paths detected:\n "
        #                            "{div}"
        #                            "{t1}\n"
        #                            "* {tf1}\n"
        #                            "{div}"
        #                            "{t2}\n"
        #                            "* {tf2}\n"
        #                            "{div}".format(**locals()))
        #
        #             raise ValueError('Duplicate taskfile paths')
        #
        # assert_no_duplicate_taskfiles()


        # Collapse
        # from ..graph.collapse import collapse
        #
        # for stage_bubble, name in recipe.collapses:
        #     self.log.debug('Collapsing %s into `%s`' % ([s.name for s in stage_bubble], name))
        #     collapse(session, task_g, stage_g, stage_bubble, name)

        # taskg and stageg are now finalized

        # stages = stage_g.nodes()
        assert len(set(self.stages)) == len(self.stages), 'duplicate stage name detected: %s' % (
            next(duplicates(self.stages)))

        # renumber stages
        for i, s in enumerate(topological_sort(stage_g)):
            s.number = i + 1

        # Add final taskgraph to session
        # session.expunge_all()
        session.add(self)
        # session.add_all(stage_g.nodes())
        # session.add_all(task_g.nodes())
        successful = filter(lambda t: t.successful, task_g.nodes())

        # print stages
        for s in topological_sort(stage_g):
            self.log.info('%s %s' % (s, s.status))

        # Create Task Queue
        task_queue = _copy_graph(task_g)
        self.log.info('Skipping %s successful tasks...' % len(successful))
        task_queue.remove_nodes_from(successful)

        handle_exits(self)

        # self.log.info('Checking stage status...')

        # def check_stage_status():
        #     """Update stage attributes if new tasks were added to them"""
        #     from .. import StageStatus
        #     for stage in self.stages:
        #         if stage.status != StageStatus.no_attempt and any(not task.successful for task in stage.tasks):
        #             stage.successful = False
        #             stage.finished_on = None
        #             stage.status = StageStatus.running
        #
        # check_stage_status()

        if self.max_cpus is not None:
            self.log.info('Ensuring there are enough cores...')
            # make sure we've got enough cores
            for t in task_queue:
                assert t.cpu_req <= self.max_cpus, '%s requires more cpus (%s) than `max_cpus` (%s)' % (t, t.cpu_req, self.max_cpus)

        # Run this thing!
        self.log.info('Committing to SQL db...')
        session.commit()
        if not dry:
            _run(self, session, task_queue)

            # set status
            if self.status == ExecutionStatus.failed_but_running:
                self.status = ExecutionStatus.failed
                # set stage status to failed
                for s in self.stages:
                    if s.status == StageStatus.running_but_failed:
                        s.status = StageStatus.failed
                session.commit()
                return False
            elif self.status == ExecutionStatus.running:
                if set_successful:
                    self.status = ExecutionStatus.successful
                session.commit()
                return True
            else:
                raise AssertionError('Bad execution status %s' % self.status)

        self.log.info('Execution complete')
Exemplo n.º 45
0
    def run(self, max_cores=None, dry=False, set_successful=True,
            cmd_wrapper=signature.default_cmd_fxn_wrapper,
            log_out_dir_func=default_task_log_output_dir):
        """
        Runs this Workflow's DAG

        :param int max_cores: The maximum number of cores to use at once.  A value of None indicates no maximum.
        :param int max_attempts: The maximum number of times to retry a failed job.
             Can be overridden with on a per-Task basis with Workflow.add_task(..., max_attempts=N, ...)
        :param callable log_out_dir_func: A function that returns a Task's logging directory (must be unique).
             It receives one parameter: the Task instance.
             By default a Task's log output is stored in log/stage_name/task_id.
             See _default_task_log_output_dir for more info.
        :param callable cmd_wrapper: A decorator which will be applied to every Task's cmd_fxn.
        :param bool dry: If True, do not actually run any jobs.
        :param bool set_successful: Sets this workflow as successful if all tasks finish without a failure.  You might set this to False if you intend to add and
            run more tasks in this workflow later.

        Returns True if all tasks in the workflow ran successfully, False otherwise.
        If dry is specified, returns None.
        """
        try:
            assert os.path.exists(os.getcwd()), 'current working dir does not exist! %s' % os.getcwd()

            assert hasattr(self, 'cosmos_app'), 'Workflow was not initialized using the Workflow.start method'
            assert hasattr(log_out_dir_func, '__call__'), 'log_out_dir_func must be a function'
            assert self.session, 'Workflow must be part of a sqlalchemy session'

            session = self.session
            self.log.info("Preparing to run %s using DRM `%s`, cwd is `%s`",
                self, self.cosmos_app.default_drm, os.getcwd())
            try:
                user = getpass.getuser()
            except:
                # fallback to uid if we can't respove a user name
                user = os.getuid()

            self.log.info('Running as %s@%s, pid %s',
                          user, os.uname()[1], os.getpid())

            self.max_cores = max_cores

            from ..job.JobManager import JobManager

            if self.jobmanager is None:
                self.jobmanager = JobManager(get_submit_args=self.cosmos_app.get_submit_args,
                                             cmd_wrapper=cmd_wrapper,
                                             log_out_dir_func=log_out_dir_func)

            self.status = WorkflowStatus.running
            self.successful = False

            if self.started_on is None:
                self.started_on = datetime.datetime.now()

            task_graph = self.task_graph()
            stage_graph = self.stage_graph()

            assert len(set(self.stages)) == len(self.stages), 'duplicate stage name detected: %s' % (
                next(duplicates(self.stages)))

            # renumber stages
            stage_graph_no_cycles = nx.DiGraph()
            stage_graph_no_cycles.add_nodes_from(stage_graph.nodes())
            stage_graph_no_cycles.add_edges_from(stage_graph.edges())
            for cycle in nx.simple_cycles(stage_graph):
                stage_graph_no_cycles.remove_edge(cycle[-1], cycle[0])
            for i, s in enumerate(topological_sort(stage_graph_no_cycles)):
                s.number = i + 1
                if s.status != StageStatus.successful:
                    s.status = StageStatus.no_attempt

            # Make sure everything is in the sqlalchemy session
            session.add(self)
            successful = filter(lambda t: t.successful, task_graph.nodes())

            # print stages
            for s in sorted(self.stages, key=lambda s: s.number):
                self.log.info('%s %s' % (s, s.status))

            # Create Task Queue
            task_queue = _copy_graph(task_graph)
            self.log.info('Skipping %s successful tasks...' % len(successful))
            task_queue.remove_nodes_from(successful)

            handle_exits(self)

            if self.max_cores is not None:
                self.log.info('Ensuring there are enough cores...')
                # make sure we've got enough cores
                for t in task_queue:
                    assert int(t.core_req) <= self.max_cores, '%s requires more cpus (%s) than `max_cores` (%s)' % (t, t.core_req, self.max_cores)

            # Run this thing!
            self.log.info('Committing to SQL db...')
            session.commit()
            if not dry:
                _run(self, session, task_queue)

                # set status
                if self.status == WorkflowStatus.failed_but_running:
                    self.status = WorkflowStatus.failed
                    # set stage status to failed
                    for s in self.stages:
                        if s.status == StageStatus.running_but_failed:
                            s.status = StageStatus.failed
                    session.commit()
                    return False
                elif self.status == WorkflowStatus.running:
                    if set_successful:
                        self.status = WorkflowStatus.successful
                    session.commit()
                    return True
                else:
                    self.log.warning('%s exited with status "%s"', self, self.status)
                    session.commit()
                    return False
            else:
                self.log.info('Workflow dry run is complete')
                return None
        except Exception as ex:
            self.log.fatal(ex, exc_info=True)
            raise
Exemplo n.º 46
0
def lst_dag(G, r, U,
            node_reward_key='r',
            edge_cost_key='c',
            edge_weight_decimal_point=None,
            fixed_point_func=round,
            debug=False):
    """
    Param:
    -------------
    binary_dag: a DAG in networkx format. Each node can have at most 2 child
    r: root node in dag
    U: the maximum threshold of edge weight sum

    Return:
    maximum-sum subtree rooted at r whose sum of edge weights <= A
    ------------
    """
    # round edge weight to fixed decimal point if necessary

    if edge_weight_decimal_point is not None:
        G = G.copy()
        G, U = round_edge_weights_by_multiplying(
            G,
            U,
            edge_weight_decimal_point,
            edge_cost_key=edge_cost_key,
            fixed_point_func=fixed_point_func
        )

    if debug:
        print('U => {}'.format(U))

    ns = G.nodes()
    if debug:
        print("total #nodes {}".format(len(ns)))
    
    A, D, BP = {}, {}, {}
    for n in ns:
        A[n] = {}  # maximum sum of node u at a cost i
        A[n][0] = G.node[n][node_reward_key]

        D[n] = {}  # set of nodes included corresponding to A[u][i]
        D[n][0] = {n}

        BP[n] = defaultdict(list)  # backpointer corresponding to A[u][i]

    for n_i, n in enumerate(
            topological_sort(G, reverse=True)):  # leaves come first

        if debug:
            print("#nodes processed {}".format(n_i))
        
        children = G.neighbors(n)
        reward = G.node[n][node_reward_key]
        if len(children) == 1:
            child = children[0]
            w = G[n][child][edge_cost_key]
            for i in xrange(U, w - 1, -1):
                if (i-w) in A[child]:
                    A[n][i] = A[child][i-w] + reward
                    D[n][i] = D[child][i-w] | {n}
                    BP[n][i] = [(child, i-w)]
        elif len(children) > 1:
            lchild, rchild = children
            lw = G[n][lchild][edge_cost_key]
            rw = G[n][rchild][edge_cost_key]

            for i in A[lchild]:
                c = lw + i
                if debug:
                    print('n={}, D={}, cost_child_tuples={}'.format(
                        n, D, [(i, lchild)])
                    )
                    print('c={}'.format(c))
                if c <= U:
                    if A[n].get(c) is None or A[lchild][i] + reward > A[n][c]:
                        A[n][c] = A[lchild][i] + reward
                        D[n][c] = D[lchild][i] | {n}
                        BP[n][c] = [(lchild, i)]

            for i in A[rchild]:
                c = rw + i
                if c <= U:
                    if A[n].get(c) is None or A[rchild][i] + reward > A[n][c]:
                        A[n][c] = A[rchild][i] + reward
                        D[n][c] = D[rchild][i] | {n}
                        BP[n][c] = [(rchild, i)]
            
            for i in A[lchild]:
                for j in A[rchild]:
                    c = lw + rw + i + j
                    if c <= U:
                        if (A[n].get(c) is None or
                            A[lchild][i] + A[rchild][j] + reward > A[n][c]) and \
                           len(D[lchild][i] & D[rchild][j]) == 0:
                            A[n][c] = A[lchild][i] + A[rchild][j] + reward
                            D[n][c] = D[lchild][i] | D[rchild][j] | {n}
                            BP[n][c] = [(lchild, i), (rchild, j)]
            
            # if n == r:  # no need to continue once we processed root
            #     break
                
    if debug:
        print('A[r]', A[r])

    best_cost = max(xrange(U + 1),
                    key=lambda i: A[r][i] if i in A[r] else float('-inf'))
    if debug:
        print("best_cost", best_cost)

    tree = DiGraph()
    tree.add_node(r)
    stack = []
    for n, cost in BP[r][best_cost]:
        stack.append((r, n, cost))
    while len(stack) > 0:
        # if debug:
        #     print('stack size: {}'.format(len(stack)))
        #     print('stack: {}'.format(stack))
        
        parent, child, cost = stack.pop(0)
        tree.add_edge(parent, child)

        # copy the attributes
        tree[parent][child] = G[parent][child]
        tree.node[parent] = G.node[parent]
        tree.node[child] = G.node[child]

        for grandchild, cost2 in BP[child][cost]:
            # if debug:
            #     print(grandchild, cost2)
            stack.append((child, grandchild, cost2))

    return tree
Exemplo n.º 47
0
    def run(self, log_out_dir_func=_default_task_log_output_dir, dry=False, set_successful=True,
            cmd_wrapper=signature.default_cmd_fxn_wrapper):
        """
        Renders and executes the :param:`recipe`

        :param log_out_dir_func: (function) a function that computes a task's log_out_dir_func.
             It receives one parameter: the task instance.
             By default task log output is stored in output_dir/log/stage_name/task_id.
             See _default_task_log_output_dir for more info.
        :param dry: (bool) if True, do not actually run any jobs.
        :param set_successful: (bool) sets this execution as successful if all tasks finish without a failure.  You might set this to False if you intend to add and
            run more tasks in this execution later.
        """
        assert os.path.exists(os.getcwd()), 'current working dir does not exist! %s' % os.getcwd()

        assert hasattr(self, 'cosmos_app'), 'Execution was not initialized using the Execution.start method'
        assert hasattr(log_out_dir_func, '__call__'), 'log_out_dir_func must be a function'
        assert self.session, 'Execution must be part of a sqlalchemy session'
        session = self.session
        self.log.info('Preparing to run %s using DRM `%s`, output_dir: `%s`' % (
            self, self.cosmos_app.default_drm, self.output_dir))

        from ..job.JobManager import JobManager

        self.jobmanager = JobManager(get_submit_args=self.cosmos_app.get_submit_args,
                                     default_queue=self.cosmos_app.default_queue, cmd_wrapper=cmd_wrapper)

        self.status = ExecutionStatus.running
        self.successful = False

        if self.started_on is None:
            import datetime

            self.started_on = datetime.datetime.now()

        # Render task graph and to session
        # import ipdb
        # with ipdb.launch_ipdb_on_exception():
        #     print self.tasks
        task_g = self.task_graph()
        stage_g = self.stage_graph()

        # Set output_dirs of new tasks
        # for task in nx.topological_sort(task_g):
        # if not task.successful:
        # task.output_dir = task_output_dir(task)
        #         assert task.output_dir not in ['', None], "Computed an output file root_path of None or '' for %s" % task
        #         for tf in task.output_files:
        #             if tf.path is None:
        #                 tf.path = opj(task.output_dir, tf.basename)
        #                 assert tf.path is not None, 'computed an output_dir for %s of None' % task
        #                 # recipe_stage2stageprint task, tf.root_path, 'basename:',tf.basename

        # set commands of new tasks
        # for task in topological_sort(task_g):
        #     if not task.successful: # and not task.NOOP:
        #         task.command = task.tool._generate_command(task)

        import itertools as it

        # def assert_no_duplicate_taskfiles():
        #     taskfiles = (tf for task in task_g.nodes() for tf in task.output_files if not tf.duplicate_ok)
        #     f = lambda tf: tf.path
        #     for path, group in it.groupby(sorted(filter(lambda tf: not tf.task_output_for.NOOP, taskfiles), key=f), f):
        #         group = list(group)
        #         if len(group) > 1:
        #             t1 = group[0].task_output_for
        #             tf1 = group[0]
        #             t2 = group[1].task_output_for
        #             tf2 = group[1]
        #             div = "-" * 72 + "\n"
        #             self.log.error("Duplicate taskfiles paths detected:\n "
        #                            "{div}"
        #                            "{t1}\n"
        #                            "* {tf1}\n"
        #                            "{div}"
        #                            "{t2}\n"
        #                            "* {tf2}\n"
        #                            "{div}".format(**locals()))
        #
        #             raise ValueError('Duplicate taskfile paths')
        #
        # assert_no_duplicate_taskfiles()


        # Collapse
        # from ..graph.collapse import collapse
        #
        # for stage_bubble, name in recipe.collapses:
        #     self.log.debug('Collapsing %s into `%s`' % ([s.name for s in stage_bubble], name))
        #     collapse(session, task_g, stage_g, stage_bubble, name)

        # taskg and stageg are now finalized

        # stages = stage_g.nodes()
        assert len(set(self.stages)) == len(self.stages), 'duplicate stage name detected: %s' % (
            next(duplicates(self.stages)))

        # renumber stages
        for i, s in enumerate(topological_sort(stage_g)):
            s.number = i + 1

        # Add final taskgraph to session
        # session.expunge_all()
        session.add(self)
        # session.add_all(stage_g.nodes())
        # session.add_all(task_g.nodes())
        successful = filter(lambda t: t.successful, task_g.nodes())

        # commit so task.id is set for log dir
        self.log.info('Committing %s Tasks to the SQL database...' % (len(task_g.nodes()) - len(successful)))
        session.commit()

        # print stages
        for s in topological_sort(stage_g):
            self.log.info('%s %s' % (s, s.status))

        # Create Task Queue
        task_queue = _copy_graph(task_g)
        self.log.info('Skipping %s successful tasks...' % len(successful))
        task_queue.remove_nodes_from(successful)

        handle_exits(self)

        self.log.info('Setting log output directories...')

        def set_log_dirs():
            log_dirs = {t.log_dir: t for t in successful}
            for task in task_queue.nodes():
                log_dir = log_out_dir_func(task)
                assert log_dir not in log_dirs, 'Duplicate log_dir detected for %s and %s' % (task, log_dirs[log_dir])
                log_dirs[log_dir] = task
                task.log_dir = log_dir

        set_log_dirs()

        self.log.info('Checking stage attributes...')

        def reset_stage_attrs():
            """Update stage attributes if new tasks were added to them"""
            from .. import Stage, StageStatus
            # using .update() threw an error, so have to do it the slow way. It's not too bad though, since
            # there shouldn't be that many stages to update.
            for s in session.query(Stage).join(Task).filter(~Task.successful, Stage.execution_id == self.id,
                                                            Stage.status != StageStatus.no_attempt):
                s.successful = False
                s.finished_on = None
                s.status = StageStatus.running

        reset_stage_attrs()

        self.log.info('Ensuring there are enough cores...')
        # make sure we've got enough cores
        for t in task_queue:
            assert t.cpu_req <= self.max_cpus or self.max_cpus is None, '%s requires more cpus (%s) than `max_cpus` (%s)' % (
                t, t.cpu_req, self.max_cpus)

        # Run this thing!
        if not dry:
            _run(self, session, task_queue)

            # set status
            if self.status == ExecutionStatus.failed_but_running:
                self.status = ExecutionStatus.failed
                # set stage status to failed
                for s in self.stages:
                    if s.status == StageStatus.running_but_failed:
                        s.status = StageStatus.failed
                session.commit()
                return False
            elif self.status == ExecutionStatus.running:
                if set_successful:
                    self.status = ExecutionStatus.successful
                session.commit()
                return True
            else:
                raise AssertionError('Bad execution status %s' % self.status)

        self.log.info('Execution complete')
Exemplo n.º 48
0
def graph_to_json (graph, witnesses, empty_cell_content = []):
    """
    Converts the graph into JSON representation.
    """

    # Sort vertices into ranks.  (More than one vertex can have the same rank.)
    sorted_vertices = topological_sort (graph)[1:-1] # remove start, end

    vertex_to_rank = collections.defaultdict (lambda: 0)
    for vertex in sorted_vertices:
        for successor in graph.successors (vertex):
            vertex_to_rank[successor] = max (vertex_to_rank[successor],
                                             vertex_to_rank[vertex] + 1);

    # The nodes in each rank
    ranks = collections.defaultdict (list)
    for vertex in sorted_vertices:
        ranks[vertex_to_rank[vertex]].append (vertex)

    # Sort and group the vertices according to their rank.
    def keyfunc (vertex):
        return vertex_to_rank[vertex]

    # Construct table columns. Each rank becomes a table column.
    columns = []
    for rank, vertices in itertools.groupby (sorted_vertices, keyfunc):
        column = {}
        columns.append (column)

        # FIXME: keep the vertex `id´ around
        for vertex in vertices:
            node = graph.node[vertex]

            # the incoming edges are the witnesses that contain this token
            edges = graph.in_edges (vertex, data=True)
            for edge in edges:
                sigli = edge[2]['label'].split(', ')
                for sigil in sigli:
                    column[sigil] = [token.token_data for token in node['tokens'][sigil]]

    # Build JSON
    json_output = {}
    json_output['witnesses'] = [witness.sigil for witness in witnesses]

    # Write the columns to JSON
    table = []
    variant_columns = []
    for column in columns:
        json_column = []
        variants = set () # only to see if a column is invariant
        for witness in witnesses:
            tokens = column.get (witness.sigil, empty_cell_content)
            json_column.append (tokens)
            variants.add (''.join ([token['t'] for token in tokens]))
        table.append (json_column)
        variant_columns.append (len (variants) > 1)
    json_output['table'] = table
    json_output['status'] = variant_columns

    # Most of the time it is more practical to have rows. Each witness becomes a row.
    # So let's write rows to JSON too.
    table = []
    for witness in witnesses:
        json_row = []
        for column in columns:
            json_row.append (column.get (witness.sigil, empty_cell_content))
        table.append (json_row)
    json_output['inverted_table'] = table

    return json.dumps (json_output, sort_keys = True, indent = True)
Exemplo n.º 49
0
 def resolve_order(self):
     return topological_sort(self.g, reverse=True)
Exemplo n.º 50
0
# rosalind_dag
# cycles in a graph

import networkx as nx
from networkx.algorithms.dag import topological_sort

lines = open('rosalind_ts.txt').read().rstrip().split('\n')
n, _m = [int(i) for i in lines[0].split()]
nodes = [i+1 for i in range(n)]
edges = [tuple([int(i) for i in j.split()]) for j in lines[1:]]

G = nx.DiGraph()
G.add_nodes_from(nodes)
G.add_edges_from(edges)

result = ' '.join([str(i) for i in topological_sort(G)])

print(result)
open('rosalind_ts_sub.txt', 'wt').write(result)
Exemplo n.º 51
0
def dp_dag_general(G, r, U,
                   cost_func,
                   node_reward_key='r',
                   debug=False):
    """
    cost_func(node, D table, graph, [(cost at child , child)])

    It should return cost as integer type(fixed point is used when appropriate)
    """
    ns = G.nodes()
    if debug:
        print("total #nodes {}".format(len(ns)))
    
    A, D, BP = {}, {}, {}
    for n in ns:
        A[n] = {}  # maximum sum of node u at a cost i
        A[n][0] = G.node[n][node_reward_key]

        D[n] = {}  # set of nodes included corresponding to A[u][i]
        D[n][0] = {n}

        BP[n] = defaultdict(list)  # backpointer corresponding to A[u][i]

    for n_i, n in enumerate(
            topological_sort(G, reverse=True)):  # leaves come first

        if debug:
            print("#nodes processed {}".format(n_i))

        children = G.neighbors(n)
        if debug:
                print('{}\'s children={}'.format(n, children))
        reward = G.node[n][node_reward_key]
        if len(children) == 1:
            child = children[0]
            if debug:
                print('child={}'.format(child))
            for i in A[child]:
                c = cost_func(n, D, G,
                              [(i, child)])
                assert isinstance(c, int)
                if c <= U:
                    A[n][c] = A[child][i] + reward
                    D[n][c] = D[child][i] | {n}
                    BP[n][c] = [(child, i)]
        elif len(children) > 1:
            assert len(children) == 2
            lchild, rchild = children

            for i in A[lchild]:
                c = cost_func(n, D, G,
                              [(i, lchild)])
                assert isinstance(c, int)
                if debug:
                    print('n={}, D={}, cost_child_tuples={}'.format(
                        n, D, [(i, lchild)])
                    )
                    print('c={}'.format(c))
                if c <= U:
                    if A[n].get(c) is None or A[lchild][i] + reward > A[n][c]:
                        A[n][c] = A[lchild][i] + reward
                        D[n][c] = D[lchild][i] | {n}
                        BP[n][c] = [(lchild, i)]

            for i in A[rchild]:
                c = cost_func(n, D, G,
                              [(i, rchild)])
                assert isinstance(c, int)
                if c <= U:
                    if A[n].get(c) is None or A[rchild][i] + reward > A[n][c]:
                        A[n][c] = A[rchild][i] + reward
                        D[n][c] = D[rchild][i] | {n}
                        BP[n][c] = [(rchild, i)]
            
            for i in A[lchild]:
                for j in A[rchild]:
                    c = cost_func(n, D, G,
                                  [(i, lchild), (j, rchild)])
                    assert isinstance(c, int)
                    lset, rset = D[lchild][i], D[rchild][j]
                    if c <= U:
                        if (A[n].get(c) is None or
                            A[lchild][i] + A[rchild][j] + reward > A[n][c]) and \
                           len(lset & rset) == 0:
                            A[n][c] = A[lchild][i] + A[rchild][j] + reward
                            D[n][c] = D[lchild][i] | D[rchild][j] | {n}
                            BP[n][c] = [(lchild, i), (rchild, j)]

            if n == r:  # no need to continue once we processed root
                break
                
    best_cost = max(xrange(U + 1),
                    key=lambda i: A[r][i] if i in A[r] else float('-inf'))
    tree = DiGraph()
    tree.add_node(r)
    stack = []
    if debug and len(stack) == 0:
        print('stack empty')
        print(A)
    for n, cost in BP[r][best_cost]:
        stack.append((r, n, cost))
    while len(stack) > 0:
        if debug:
            print('stack size: {}'.format(len(stack)))
            print('stack: {}'.format(stack))
        
        parent, child, cost = stack.pop(0)
        tree.add_edge(parent, child)

        # copy the attributes
        tree[parent][child] = G[parent][child]
        tree.node[parent] = G.node[parent]
        tree.node[child] = G.node[child]

        for grandchild, cost2 in BP[child][cost]:
            if debug:
                print(grandchild, cost2)
            stack.append((child, grandchild, cost2))

    return tree
def graph_to_json (graph, empty_cell_content = []):
    """
    Converts the graph into JSON representation.
    """

    witnesses = set ()

    # Sort vertices into ranks.  (More than one vertex can have the same rank.)
    sorted_vertices = topological_sort (graph)[1:-1] # remove start, end

    vertex_to_rank = collections.defaultdict (lambda: 0)
    for vertex in sorted_vertices:
        my_rank = vertex_to_rank[vertex]
        for successor in graph.successors (vertex):
            vertex_to_rank[successor] = max (vertex_to_rank[successor],
                                             my_rank + 1);

    # The nodes in each rank
    ranks = collections.defaultdict (list)
    for vertex in sorted_vertices:
        ranks[vertex_to_rank[vertex]].append (vertex)

    # Sort and group the vertices according to their rank.
    def keyfunc (vertex):
        return vertex_to_rank[vertex]

    # Construct table columns. Each rank becomes a table column.
    columns = []
    variant_columns = []
    for rank, vertices in ranks.items ():
        column = {}
        for vertex in vertices:
            # the incoming edges are the witnesses that contain this token
            edges = graph.in_edges (vertex, data=True)
            for edge in edges:
                if 'witnesses' in edge[2]:
                    sigli = edge[2]['witnesses'].split(', ')
                    witnesses.update (sigli)
                    for sigil in sigli:
                        column[sigil] = vertex

        columns.append (column)
        variant_columns.append (len (vertices) > 1)

    # Build JSON
    witnesses = sorted (witnesses)
    json_output = {}
    json_output['witnesses'] = witnesses

    # Write the columns to JSON
    table = []
    for column in columns:
        json_column = []
        for witness in witnesses:
            if witness in column:
                vertex = column[witness]
                node = graph.node[vertex]
                if 'tokens' in node:
                    json_column.append (node['tokens'])
                    continue
            json_column.append (empty_cell_content)
        table.append (json_column)
    json_output['table'] = table
    json_output['status'] = variant_columns

    # Most of the time it is more practical to have rows. Each witness becomes a row.
    # So let's write rows to JSON too.
    table = []
    for witness in witnesses:
        json_row = []
        for column in columns:
            json_row.append (column.get (witness, empty_cell_content))
        table.append (json_row)
    json_output['inverted_table'] = table

    return json.dumps (json_output, sort_keys = True, indent = True)