Exemplo n.º 1
0
    def _repr_html_(self, layer_index="", highlevelgraph_key=""):
        if highlevelgraph_key != "":
            shortname = key_split(highlevelgraph_key)
        elif hasattr(self, "name"):
            shortname = key_split(self.name)
        else:
            shortname = self.__class__.__name__

        svg_repr = ""
        if (self.collection_annotations
                and self.collection_annotations.get("type")
                == "dask.array.core.Array"):
            chunks = self.collection_annotations.get("chunks")
            if chunks:
                from dask.array.svg import svg

                svg_repr = svg(chunks)

        return get_template("highlevelgraph_layer.html.j2").render(
            materialized=self.is_materialized(),
            shortname=shortname,
            layer_index=layer_index,
            highlevelgraph_key=highlevelgraph_key,
            info=self.layer_info_dict(),
            svg_repr=svg_repr,
        )
Exemplo n.º 2
0
def default_fused_keys_renamer(keys, max_fused_key_length=120):
    """Create new keys for ``fuse`` tasks.

    The optional parameter `max_fused_key_length` is used to limit the maximum string length for each renamed key.
    If this parameter is set to `None`, there is no limit.
    """
    it = reversed(keys)
    first_key = next(it)
    typ = type(first_key)

    if max_fused_key_length:  # Take into account size of hash suffix
        max_fused_key_length -= 5

    def _enforce_max_key_limit(key_name):
        if max_fused_key_length and len(key_name) > max_fused_key_length:
            name_hash = f"{hash(key_name):x}"[:4]
            key_name = f"{key_name[:max_fused_key_length]}-{name_hash}"
        return key_name

    if typ is str:
        first_name = utils.key_split(first_key)
        names = {utils.key_split(k) for k in it}
        names.discard(first_name)
        names = sorted(names)
        names.append(first_key)
        concatenated_name = "-".join(names)
        return _enforce_max_key_limit(concatenated_name)
    elif typ is tuple and len(first_key) > 0 and isinstance(first_key[0], str):
        first_name = utils.key_split(first_key)
        names = {utils.key_split(k) for k in it}
        names.discard(first_name)
        names = sorted(names)
        names.append(first_key[0])
        concatenated_name = "-".join(names)
        return (_enforce_max_key_limit(concatenated_name),) + first_key[1:]
Exemplo n.º 3
0
def default_fused_linear_keys_renamer(keys):
    """Create new keys for fused tasks"""
    typ = type(keys[0])
    if typ is str:
        names = [utils.key_split(x) for x in keys[:0:-1]]
        names.append(keys[0])
        return "-".join(names)
    elif typ is tuple and len(keys[0]) > 0 and isinstance(keys[0][0], str):
        names = [utils.key_split(x) for x in keys[:0:-1]]
        names.append(keys[0][0])
        return ("-".join(names),) + keys[0][1:]
    else:
        return None
Exemplo n.º 4
0
def clone_key(key, seed):
    """Clone a key from a Dask collection, producing a new key with the same prefix and
    indices and a token which is a deterministic function of the previous key and seed.

    Examples
    --------
    >>> clone_key("x", 123)
    'x-dc2b8d1c184c72c19faa81c797f8c6b0'
    >>> clone_key("inc-cbb1eca3bafafbb3e8b2419c4eebb387", 123)
    'inc-f81b5a88038a2132882aa29a9fcfec06'
    >>> clone_key(("sum-cbb1eca3bafafbb3e8b2419c4eebb387", 4, 3), 123)
    ('sum-fd6be9e9fe07fc232ad576fa997255e8', 4, 3)
    """
    if isinstance(key, tuple) and key and isinstance(key[0], str):
        return (clone_key(key[0], seed),) + key[1:]
    if isinstance(key, str):
        prefix = key_split(key)
        return prefix + "-" + tokenize(key, seed)
    raise TypeError(f"Expected str or tuple[str, Hashable, ...]; got {key}")
Exemplo n.º 5
0
def test_names():
    name = da.random.normal(0, 1, size=(1000,), chunks=(500,)).name

    assert name.startswith('normal')
    assert len(key_split(name)) < 10
Exemplo n.º 6
0
Arquivo: dot.py Projeto: m-rossi/dask
def _to_cytoscape_json(
    dsk,
    data_attributes=None,
    function_attributes=None,
    collapse_outputs=False,
    verbose=False,
    **kwargs,
):
    """
    Convert a dask graph to Cytoscape JSON:
    https://js.cytoscape.org/#notation/elements-json
    """
    nodes = []
    edges = []
    data = {"nodes": nodes, "edges": edges}

    data_attributes = data_attributes or {}
    function_attributes = function_attributes or {}

    seen = set()
    connected = set()

    for k, v in dsk.items():
        k_name = name(k)
        if istask(v):
            func_name = name((k, "function")) if not collapse_outputs else k_name
            if collapse_outputs or func_name not in seen:
                seen.add(func_name)
                attrs = function_attributes.get(k, {}).copy()
                nodes.append(
                    {
                        "data": {
                            "id": func_name,
                            "label": key_split(k),
                            "shape": "ellipse",
                            "color": "gray",
                            **attrs,
                        }
                    }
                )
            if not collapse_outputs:
                edges.append({"data": {"source": func_name, "target": k_name}})

                connected.add(func_name)
                connected.add(k_name)

            for dep in get_dependencies(dsk, k):
                dep_name = name(dep)
                if dep_name not in seen:
                    seen.add(dep_name)
                    attrs = data_attributes.get(dep, {}).copy()
                    nodes.append(
                        {
                            "data": {
                                "id": dep_name,
                                "label": box_label(dep, verbose),
                                "shape": "rectangle",
                                "color": "gray",
                                **attrs,
                            }
                        }
                    )
                edges.append(
                    {
                        "data": {
                            "source": dep_name,
                            "target": func_name,
                        }
                    }
                )
                connected.add(dep_name)
                connected.add(func_name)

        elif ishashable(v) and v in dsk:
            v_name = name(v)
            edges.append(
                {
                    "data": {
                        "source": v_name,
                        "target": k_name,
                    }
                }
            )
            connected.add(v_name)
            connected.add(k_name)

        if (not collapse_outputs or k_name in connected) and k_name not in seen:
            seen.add(k_name)
            attrs = data_attributes.get(k, {}).copy()
            nodes.append(
                {
                    "data": {
                        "id": k_name,
                        "label": box_label(k, verbose),
                        "shape": "rectangle",
                        "color": "gray",
                        **attrs,
                    }
                }
            )
    return data
Exemplo n.º 7
0
Arquivo: dot.py Projeto: m-rossi/dask
def to_graphviz(
    dsk,
    data_attributes=None,
    function_attributes=None,
    rankdir="BT",
    graph_attr=None,
    node_attr=None,
    edge_attr=None,
    collapse_outputs=False,
    verbose=False,
    **kwargs,
):
    graphviz = import_required(
        "graphviz",
        "Drawing dask graphs with the graphviz engine requires the `graphviz` "
        "python library and the `graphviz` system library.\n\n"
        "Please either conda or pip install as follows:\n\n"
        "  conda install python-graphviz     # either conda install\n"
        "  python -m pip install graphviz    # or pip install and follow installation instructions",
    )

    data_attributes = data_attributes or {}
    function_attributes = function_attributes or {}
    graph_attr = graph_attr or {}
    node_attr = node_attr or {}
    edge_attr = edge_attr or {}

    graph_attr["rankdir"] = rankdir
    node_attr["fontname"] = "helvetica"

    graph_attr.update(kwargs)
    g = graphviz.Digraph(
        graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr
    )

    seen = set()
    connected = set()

    for k, v in dsk.items():
        k_name = name(k)
        if istask(v):
            func_name = name((k, "function")) if not collapse_outputs else k_name
            if collapse_outputs or func_name not in seen:
                seen.add(func_name)
                attrs = function_attributes.get(k, {}).copy()
                attrs.setdefault("label", key_split(k))
                attrs.setdefault("shape", "circle")
                g.node(func_name, **attrs)
            if not collapse_outputs:
                g.edge(func_name, k_name)
                connected.add(func_name)
                connected.add(k_name)

            for dep in get_dependencies(dsk, k):
                dep_name = name(dep)
                if dep_name not in seen:
                    seen.add(dep_name)
                    attrs = data_attributes.get(dep, {}).copy()
                    attrs.setdefault("label", box_label(dep, verbose))
                    attrs.setdefault("shape", "box")
                    g.node(dep_name, **attrs)
                g.edge(dep_name, func_name)
                connected.add(dep_name)
                connected.add(func_name)

        elif ishashable(v) and v in dsk:
            v_name = name(v)
            g.edge(v_name, k_name)
            connected.add(v_name)
            connected.add(k_name)

        if (not collapse_outputs or k_name in connected) and k_name not in seen:
            seen.add(k_name)
            attrs = data_attributes.get(k, {}).copy()
            attrs.setdefault("label", box_label(k, verbose))
            attrs.setdefault("shape", "box")
            g.node(k_name, **attrs)
    return g
Exemplo n.º 8
0
def test_names():
    name = da.random.normal(0, 1, size=(1000,), chunks=(500,)).name

    assert name.startswith('normal')
    assert len(key_split(name)) < 10
Exemplo n.º 9
0
 def __str__(self) -> str:
     return "dask_histogram.PartitionedHistogram,<%s, npartitions=%d>" % (
         key_split(self.name),
         self.npartitions,
     )
Exemplo n.º 10
0
def to_graphviz(
    dsk,
    data_attributes=None,
    function_attributes=None,
    rankdir="BT",
    graph_attr=None,
    node_attr=None,
    edge_attr=None,
    collapse_outputs=False,
    verbose=False,
    **kwargs,
):
    data_attributes = data_attributes or {}
    function_attributes = function_attributes or {}
    graph_attr = graph_attr or {}
    node_attr = node_attr or {}
    edge_attr = edge_attr or {}

    graph_attr["rankdir"] = rankdir
    node_attr["fontname"] = "helvetica"

    graph_attr.update(kwargs)
    g = graphviz.Digraph(graph_attr=graph_attr,
                         node_attr=node_attr,
                         edge_attr=edge_attr)

    seen = set()
    connected = set()

    for k, v in dsk.items():
        k_name = name(k)
        if istask(v):
            func_name = name(
                (k, "function")) if not collapse_outputs else k_name
            if collapse_outputs or func_name not in seen:
                seen.add(func_name)
                attrs = function_attributes.get(k, {}).copy()
                attrs.setdefault("label", key_split(k))
                attrs.setdefault("shape", "circle")
                g.node(func_name, **attrs)
            if not collapse_outputs:
                g.edge(func_name, k_name)
                connected.add(func_name)
                connected.add(k_name)

            for dep in get_dependencies(dsk, k):
                dep_name = name(dep)
                if dep_name not in seen:
                    seen.add(dep_name)
                    attrs = data_attributes.get(dep, {}).copy()
                    attrs.setdefault("label", box_label(dep, verbose))
                    attrs.setdefault("shape", "box")
                    g.node(dep_name, **attrs)
                g.edge(dep_name, func_name)
                connected.add(dep_name)
                connected.add(func_name)

        elif ishashable(v) and v in dsk:
            v_name = name(v)
            g.edge(v_name, k_name)
            connected.add(v_name)
            connected.add(k_name)

        if (not collapse_outputs
                or k_name in connected) and k_name not in seen:
            seen.add(k_name)
            attrs = data_attributes.get(k, {}).copy()
            attrs.setdefault("label", box_label(k, verbose))
            attrs.setdefault("shape", "box")
            g.node(k_name, **attrs)
    return g