def _repr_html_(self, layer_index="", highlevelgraph_key=""): if highlevelgraph_key != "": shortname = key_split(highlevelgraph_key) elif hasattr(self, "name"): shortname = key_split(self.name) else: shortname = self.__class__.__name__ svg_repr = "" if (self.collection_annotations and self.collection_annotations.get("type") == "dask.array.core.Array"): chunks = self.collection_annotations.get("chunks") if chunks: from dask.array.svg import svg svg_repr = svg(chunks) return get_template("highlevelgraph_layer.html.j2").render( materialized=self.is_materialized(), shortname=shortname, layer_index=layer_index, highlevelgraph_key=highlevelgraph_key, info=self.layer_info_dict(), svg_repr=svg_repr, )
def default_fused_keys_renamer(keys, max_fused_key_length=120): """Create new keys for ``fuse`` tasks. The optional parameter `max_fused_key_length` is used to limit the maximum string length for each renamed key. If this parameter is set to `None`, there is no limit. """ it = reversed(keys) first_key = next(it) typ = type(first_key) if max_fused_key_length: # Take into account size of hash suffix max_fused_key_length -= 5 def _enforce_max_key_limit(key_name): if max_fused_key_length and len(key_name) > max_fused_key_length: name_hash = f"{hash(key_name):x}"[:4] key_name = f"{key_name[:max_fused_key_length]}-{name_hash}" return key_name if typ is str: first_name = utils.key_split(first_key) names = {utils.key_split(k) for k in it} names.discard(first_name) names = sorted(names) names.append(first_key) concatenated_name = "-".join(names) return _enforce_max_key_limit(concatenated_name) elif typ is tuple and len(first_key) > 0 and isinstance(first_key[0], str): first_name = utils.key_split(first_key) names = {utils.key_split(k) for k in it} names.discard(first_name) names = sorted(names) names.append(first_key[0]) concatenated_name = "-".join(names) return (_enforce_max_key_limit(concatenated_name),) + first_key[1:]
def default_fused_linear_keys_renamer(keys): """Create new keys for fused tasks""" typ = type(keys[0]) if typ is str: names = [utils.key_split(x) for x in keys[:0:-1]] names.append(keys[0]) return "-".join(names) elif typ is tuple and len(keys[0]) > 0 and isinstance(keys[0][0], str): names = [utils.key_split(x) for x in keys[:0:-1]] names.append(keys[0][0]) return ("-".join(names),) + keys[0][1:] else: return None
def clone_key(key, seed): """Clone a key from a Dask collection, producing a new key with the same prefix and indices and a token which is a deterministic function of the previous key and seed. Examples -------- >>> clone_key("x", 123) 'x-dc2b8d1c184c72c19faa81c797f8c6b0' >>> clone_key("inc-cbb1eca3bafafbb3e8b2419c4eebb387", 123) 'inc-f81b5a88038a2132882aa29a9fcfec06' >>> clone_key(("sum-cbb1eca3bafafbb3e8b2419c4eebb387", 4, 3), 123) ('sum-fd6be9e9fe07fc232ad576fa997255e8', 4, 3) """ if isinstance(key, tuple) and key and isinstance(key[0], str): return (clone_key(key[0], seed),) + key[1:] if isinstance(key, str): prefix = key_split(key) return prefix + "-" + tokenize(key, seed) raise TypeError(f"Expected str or tuple[str, Hashable, ...]; got {key}")
def test_names(): name = da.random.normal(0, 1, size=(1000,), chunks=(500,)).name assert name.startswith('normal') assert len(key_split(name)) < 10
def _to_cytoscape_json( dsk, data_attributes=None, function_attributes=None, collapse_outputs=False, verbose=False, **kwargs, ): """ Convert a dask graph to Cytoscape JSON: https://js.cytoscape.org/#notation/elements-json """ nodes = [] edges = [] data = {"nodes": nodes, "edges": edges} data_attributes = data_attributes or {} function_attributes = function_attributes or {} seen = set() connected = set() for k, v in dsk.items(): k_name = name(k) if istask(v): func_name = name((k, "function")) if not collapse_outputs else k_name if collapse_outputs or func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}).copy() nodes.append( { "data": { "id": func_name, "label": key_split(k), "shape": "ellipse", "color": "gray", **attrs, } } ) if not collapse_outputs: edges.append({"data": {"source": func_name, "target": k_name}}) connected.add(func_name) connected.add(k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}).copy() nodes.append( { "data": { "id": dep_name, "label": box_label(dep, verbose), "shape": "rectangle", "color": "gray", **attrs, } } ) edges.append( { "data": { "source": dep_name, "target": func_name, } } ) connected.add(dep_name) connected.add(func_name) elif ishashable(v) and v in dsk: v_name = name(v) edges.append( { "data": { "source": v_name, "target": k_name, } } ) connected.add(v_name) connected.add(k_name) if (not collapse_outputs or k_name in connected) and k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}).copy() nodes.append( { "data": { "id": k_name, "label": box_label(k, verbose), "shape": "rectangle", "color": "gray", **attrs, } } ) return data
def to_graphviz( dsk, data_attributes=None, function_attributes=None, rankdir="BT", graph_attr=None, node_attr=None, edge_attr=None, collapse_outputs=False, verbose=False, **kwargs, ): graphviz = import_required( "graphviz", "Drawing dask graphs with the graphviz engine requires the `graphviz` " "python library and the `graphviz` system library.\n\n" "Please either conda or pip install as follows:\n\n" " conda install python-graphviz # either conda install\n" " python -m pip install graphviz # or pip install and follow installation instructions", ) data_attributes = data_attributes or {} function_attributes = function_attributes or {} graph_attr = graph_attr or {} node_attr = node_attr or {} edge_attr = edge_attr or {} graph_attr["rankdir"] = rankdir node_attr["fontname"] = "helvetica" graph_attr.update(kwargs) g = graphviz.Digraph( graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr ) seen = set() connected = set() for k, v in dsk.items(): k_name = name(k) if istask(v): func_name = name((k, "function")) if not collapse_outputs else k_name if collapse_outputs or func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}).copy() attrs.setdefault("label", key_split(k)) attrs.setdefault("shape", "circle") g.node(func_name, **attrs) if not collapse_outputs: g.edge(func_name, k_name) connected.add(func_name) connected.add(k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}).copy() attrs.setdefault("label", box_label(dep, verbose)) attrs.setdefault("shape", "box") g.node(dep_name, **attrs) g.edge(dep_name, func_name) connected.add(dep_name) connected.add(func_name) elif ishashable(v) and v in dsk: v_name = name(v) g.edge(v_name, k_name) connected.add(v_name) connected.add(k_name) if (not collapse_outputs or k_name in connected) and k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}).copy() attrs.setdefault("label", box_label(k, verbose)) attrs.setdefault("shape", "box") g.node(k_name, **attrs) return g
def __str__(self) -> str: return "dask_histogram.PartitionedHistogram,<%s, npartitions=%d>" % ( key_split(self.name), self.npartitions, )
def to_graphviz( dsk, data_attributes=None, function_attributes=None, rankdir="BT", graph_attr=None, node_attr=None, edge_attr=None, collapse_outputs=False, verbose=False, **kwargs, ): data_attributes = data_attributes or {} function_attributes = function_attributes or {} graph_attr = graph_attr or {} node_attr = node_attr or {} edge_attr = edge_attr or {} graph_attr["rankdir"] = rankdir node_attr["fontname"] = "helvetica" graph_attr.update(kwargs) g = graphviz.Digraph(graph_attr=graph_attr, node_attr=node_attr, edge_attr=edge_attr) seen = set() connected = set() for k, v in dsk.items(): k_name = name(k) if istask(v): func_name = name( (k, "function")) if not collapse_outputs else k_name if collapse_outputs or func_name not in seen: seen.add(func_name) attrs = function_attributes.get(k, {}).copy() attrs.setdefault("label", key_split(k)) attrs.setdefault("shape", "circle") g.node(func_name, **attrs) if not collapse_outputs: g.edge(func_name, k_name) connected.add(func_name) connected.add(k_name) for dep in get_dependencies(dsk, k): dep_name = name(dep) if dep_name not in seen: seen.add(dep_name) attrs = data_attributes.get(dep, {}).copy() attrs.setdefault("label", box_label(dep, verbose)) attrs.setdefault("shape", "box") g.node(dep_name, **attrs) g.edge(dep_name, func_name) connected.add(dep_name) connected.add(func_name) elif ishashable(v) and v in dsk: v_name = name(v) g.edge(v_name, k_name) connected.add(v_name) connected.add(k_name) if (not collapse_outputs or k_name in connected) and k_name not in seen: seen.add(k_name) attrs = data_attributes.get(k, {}).copy() attrs.setdefault("label", box_label(k, verbose)) attrs.setdefault("shape", "box") g.node(k_name, **attrs) return g