Exemplo n.º 1
0
def antichains(G, topo_order=None):
    """Generates antichains from a directed acyclic graph (DAG).

    An antichain is a subset of a partially ordered set such that any
    two elements in the subset are incomparable.

    Parameters
    ----------
    G : NetworkX DiGraph
        A directed acyclic graph (DAG)

    topo_order: list or tuple, optional
        A topological order for G (if None, the function will compute one)

    Returns
    -------
    generator object

    Raises
    ------
    NetworkXNotImplemented
        If `G` is not directed

    NetworkXUnfeasible
        If `G` contains a cycle

    Notes
    -----
    This function was originally developed by Peter Jipsen and Franco Saliola
    for the SAGE project. It's included in NetworkX with permission from the
    authors. Original SAGE code at:

    https://github.com/sagemath/sage/blob/master/src/sage/combinat/posets/hasse_diagram.py

    References
    ----------
    .. [1] Free Lattices, by R. Freese, J. Jezek and J. B. Nation,
       AMS, Vol 42, 1995, p. 226.
    """
    if topo_order is None:
        topo_order = list(nx.topological_sort(G))

    TC = nx.transitive_closure_dag(G, topo_order)
    antichains_stacks = [([], list(reversed(topo_order)))]

    while antichains_stacks:
        (antichain, stack) = antichains_stacks.pop()
        # Invariant:
        #  - the elements of antichain are independent
        #  - the elements of stack are independent from those of antichain
        yield antichain
        while stack:
            x = stack.pop()
            new_antichain = antichain + [x]
            new_stack = [
                t for t in stack if not ((t in TC[x]) or (x in TC[t]))
            ]
            antichains_stacks.append((new_antichain, new_stack))
Exemplo n.º 2
0
Arquivo: cfg.py Projeto: mfkiwl/dace
def all_dominators(sdfg: SDFG, idom: Dict[SDFGState, SDFGState] = None) -> Dict[SDFGState, Set[SDFGState]]:
    """ Returns a mapping between each state and all its dominators. """
    idom = idom or nx.immediate_dominators(sdfg.nx, sdfg.start_state)
    # Create a dictionary of all dominators of each node by using the
    # transitive closure of the DAG induced by the idoms
    g = nx.DiGraph()
    for node, dom in idom.items():
        if node is dom:  # Skip root
            continue
        g.add_edge(node, dom)
    tc = nx.transitive_closure_dag(g)
    alldoms: Dict[SDFGState, Set[SDFGState]] = {sdfg.start_state: set()}
    for node in tc:
        alldoms[node] = set(dst for _, dst in tc.out_edges(node))

    return alldoms
Exemplo n.º 3
0
def remove_very_indirect_dependencies(
    g: nx.DiGraph, attrs: List[Attr], modified_files: List[str]
) -> nx.DiGraph:
    """This is fairly subtle, and it requires a bit of tuning and
    just looking at the results on multiple PRs to see if it's
    reasonable.

    It's supposed to do a few things:

    1. `g` is the build graph. it's a DAG. each edge points from an
       more leaf-like package to a more core package that the leaf-like
       package depends on. For example, there's an edge from a numpy
       derivation to a python derivation.
    2. the graph `g` is over .drvs. many of these .drvs correspond to named
       attributes in nixpkgs, like python38Packages.numpy. Many of them do
       not. .drvs that are not attrs include `src`s and other stuff that's
       not addressable as a nixpkgs derivation. generally this is stuff that
       users do not care about.
    3. for each attr, with some execptions, we have the position filename and
       line number where it was created. We also know the set of modified files
       in this PR. From that, we can form a reasonable _guess_ of which attrs
       were actually _directly modified_ in the pr. the other attrs are ones that
       are downstream from the modification.
    4. The identification in (3) is both under- and over- inclusive. it's over-
       inclusive because we only look for a match at the level of the filename, so
       if a file contains many attrs then all of them will be flaged. it's under-
       inclusive because the PR may have changed a file that's used in attrs, but is
       not itself where any attr is declared. for example if someone changes a shell
       hook function and only edits a `.sh` file.
    5. so, because of (3) and (4), we have two schemes for identifying the "root"
       attrs that were actually changed by the PR. the first is simply which attrs
       are in files that were changed. the second is more graph theoretical. we look
       for the set of all *longest paths* in the DAG of drvs to be built, measuring
       the length of the path by the number of edges that include at least 1 attr
       in the edge. so edges that purely include non-attr .drvs don't count. and then
       we look at the deepest attr in each of these longest paths. these seem to capture
       the roots of the DAG pretty well.
    6. then, armed with these roots, we look for all nodes that are within 2 hops of
       a root, and throw away everything else. so we keep the roots themselves. we keep
       packages that directly link against these roots. and we keep edges that directly
       link against the packages that link to the roots. dependencies that are futher
       away than that get discarded.
    7. for each of these packages that are going to be retained, we record the number
       of ancestors they have. that is, how many attrs in the build graph depend directly
       or indirectly on them. note that this includes attrs in the *full graph* -- do
       this calculation before discarding the set that need to be discarded as described
       in (6). the purpose of this calculation is so that we can assign a relative
       importance to every package that we're keeping. things that are depended on by more
       packages are more important.

    So finally, the result is a new graph
    """

    non_autogenerated_modified_files = {
        m for m in modified_files if m not in BIG_AUTOGENERATED_FILES
    }

    #
    # Determine which attrs were modified directly by the git commit, rather than modified
    # indirectly because their inputs changed
    #
    drv_to_attr = {
        a.drv_path: a
        for a in attrs
        if a.drv_path is not None and a.position is not None
    }

    # Record if an edge connects to Attrs. There are some really long un-interesting
    # paths that relate to multi-lib or cross-compilation or something.
    for e in g.edges():
        g.edges[e]["is_attr"] = int(e[0] in drv_to_attr or e[1] in drv_to_attr)

    build_roots = {
        n: drv_to_attr[n].name
        for n in g.nodes
        if (
            n in drv_to_attr
            and drv_to_attr[n].filename() in non_autogenerated_modified_files
        )
    }
    log.info("Directly modified attrs", roots=json.dumps(sorted(build_roots.values())))

    #
    # Sometimes the scheme above might give no build roots, if for example
    # the only file edited was a hook or something and not where the attrs are
    # named. We want to have at least 1 build root, so if this happens lets
    # find the longest path in the dag counting only attrs, so like attr a depends
    # on b depends on c depends on d, and then let's call attr d the build root.
    #
    longest_path_build_roots = {}
    for long_path in dag_longest_paths(g, weight="is_attr"):
        *_, long_path_end = (n for n in long_path if n in drv_to_attr)
        long_path_end_name = drv_to_attr[long_path_end].name
        # log.info("Longest path", name=long_path_end_name)
        build_roots[long_path_end] = long_path_end_name
        longest_path_build_roots[long_path_end] = long_path_end_name

    log.info(
        "Longest-path build roots", roots=sorted(longest_path_build_roots.values())
    )
    log.info('Consensus build "roots"', roots=sorted(build_roots.values()))
    assert len(build_roots) > 0 or len(g) == 0

    path_length_counts: Counter_t[int] = Counter()
    to_keep = set()
    for root in build_roots:
        g.nodes[root]["is_root"] = True
        for node, path in nx.single_target_shortest_path(g, root, 2).items():
            # If node == root, len(path) == 1. Keep that, of course
            # If node is a direct dependency of root, len(path) == 2. Keep that too.
            # If node ia a 1-step indirect dependency of root, len(path) == 3. Keep that.
            # Otherwise, throw away.
            to_keep.add(node)
            path_length_counts[len(path) - 1] += 1

    #
    # Compute the closure of everything in to_keep, because we've gotta keep
    # those too.
    before_ancestor = time.time()
    transitive_closure = nx.transitive_closure_dag(g)
    if time.time() - before_ancestor > 1:
        log.info(
            f"Computing transitive closure of build graph: {time.time() - before_ancestor:.2f} sec"
        )

    #
    # Since the changed attrs were not necesarily actually roots of the build graph
    # (this happens if we're on staging and some unchanged dependencies of the changed
    # attrs actually haven't been built yet), we need to keep them in the graph. this
    # confused be at first, but if we don't keep these things in the build graph,
    # then the precedence-constrained knapsack problem won't be working with the right
    # information, because it won't know that these packages need to be built in order
    # to build the stuff we actually care about. if the "build roots" are really roots,
    # then these should be empty.
    #
    to_keep_closure = set()
    for k in to_keep:
        to_keep_closure.update(transitive_closure.succ[k])
    to_keep.update(to_keep_closure)

    if len(g) < 500:
        # Ehhhh, just keep everything if the number of nodes is small enough
        to_keep.update(g.nodes())

    for n in to_keep:
        n_ancestors = sum(nn in drv_to_attr for nn in transitive_closure.pred[n])
        g.nodes[n]["n_ancestors"] = n_ancestors

    log.info(
        "Removing remote dependencies",
        kept=len(to_keep),
        removed=g.number_of_nodes() - len(to_keep),
        total=g.number_of_nodes(),
    )

    if DEBUG:
        print("To remove")
        import IPython

        IPython.embed()

    return g.subgraph(to_keep), transitive_closure.subgraph(to_keep)