Пример #1
0
    def worker_core(self, ident, i):
        """ Manage one core on one distributed worker node

        This coroutine listens on worker_queue for the following operations

        **Incoming Messages**:

        - compute-task:  call worker.compute(...) on remote node, report when done
        - close: close connection to worker node, report `worker-finished` to
          scheduler

        **Outgoing Messages**:

        - task-finished:  sent to scheduler once a task completes
        - task-erred: sent to scheduler when a task errs
        - worker-finished: sent to scheduler in response to a close command
        """
        worker = rpc(ip=ident[0], port=ident[1])
        logger.debug("Start worker core %s, %d", ident, i)

        while True:
            msg = yield self.worker_queues[ident].get()
            if msg['op'] == 'close':
                logger.debug("Worker core receives close message %s, %s",
                        ident, msg)
                break
            if msg['op'] == 'compute-task':
                key = msg['key']
                needed = msg['needed']
                task = msg['task']
                if not istask(task):
                    response, content = yield worker.update_data(data={key: task})
                    assert response == b'OK', response
                    nbytes = content['nbytes'][key]
                else:
                    response, content = yield worker.compute(function=execute_task,
                                                             args=(task,),
                                                             needed=needed,
                                                             key=key,
                                                             kwargs={})
                    if response == b'OK':
                        nbytes = content['nbytes']
                logger.debug("Compute response from worker %s, %s, %s, %s",
                             ident, key, response, content)
                if response == b'error':
                    error, traceback = content
                    self.mark_task_erred(key, ident, error, traceback)

                elif response == b'missing-data':
                    self.mark_missing_data(content.args, key=key, worker=ident)

                else:
                    self.mark_task_finished(key, ident, nbytes)

        yield worker.close(close=True)
        worker.close_streams()
        if msg.get('report', True):
            self.put({'op': 'worker-finished',
                      'worker': ident})
        logger.debug("Close worker core, %s, %d", ident, i)
Пример #2
0
def worker_core(scheduler_queue, worker_queue, ident, i):
    """ Manage one core on one distributed worker node

    This coroutine listens on worker_queue for the following operations

    **Incoming Messages**:

    - compute-task:  call worker.compute(...) on remote node, report when done
    - close: close connection to worker node, report `worker-finished` to
      scheduler

    **Outgoing Messages**:

    - task-finished:  sent to scheduler once a task completes
    - task-erred: sent to scheduler when a task errs
    - worker-finished: sent to scheduler in response to a close command
    """
    worker = rpc(ip=ident[0], port=ident[1])
    logger.debug("Start worker core %s, %d", ident, i)

    while True:
        msg = yield worker_queue.get()
        if msg['op'] == 'close':
            break
        if msg['op'] == 'compute-task':
            key = msg['key']
            needed = msg['needed']
            task = msg['task']
            if not istask(task):
                response = yield worker.update_data(data={key: task})
                assert response == b'OK', response
            else:
                response = yield worker.compute(function=_execute_task,
                                                args=(task, {}),
                                                needed=needed,
                                                key=key,
                                                kwargs={})
            if response == b'error':
                err = yield worker.get_data(keys=[key])
                scheduler_queue.put_nowait({'op': 'task-erred',
                                            'key': key,
                                            'worker': ident,
                                            'exception': err[key]})

            elif isinstance(response, KeyError):
                scheduler_queue.put_nowait({'op': 'task-missing-data',
                                            'key': key,
                                            'worker': ident,
                                            'missing': response.args})

            else:
                scheduler_queue.put_nowait({'op': 'task-finished',
                                            'worker': ident,
                                            'key': key})

    yield worker.close(close=True)
    worker.close_streams()
    scheduler_queue.put_nowait({'op': 'worker-finished',
                                'worker': ident})
    logger.debug("Close worker core, %s, %d", ident, i)
Пример #3
0
def execute_task(task):
    """ Evaluate a nested task """
    if istask(task):
        func, args = task[0], task[1:]
        return func(*map(execute_task, args))
    else:
        return task
Пример #4
0
def _bottom_up(net, term):
    if not istask(term):
        return net._rewrite(term)
    else:
        new_args = tuple(_bottom_up(net, t) for t in args(term))
        new_term = (head(term),) + new_args
    return net._rewrite(new_term)
Пример #5
0
def dumps_task(task):
    """ Serialize a dask task

    Returns a dict of bytestrings that can each be loaded with ``loads``

    Examples
    --------
    Either returns a task as a function, args, kwargs dict

    >>> from operator import add
    >>> dumps_task((add, 1))  # doctest: +SKIP
    {'function': b'\x80\x04\x95\x00\x8c\t_operator\x94\x8c\x03add\x94\x93\x94.'
     'args': b'\x80\x04\x95\x07\x00\x00\x00K\x01K\x02\x86\x94.'}

    Or as a single task blob if it can't easily decompose the result.  This
    happens either if the task is highly nested, or if it isn't a task at all

    >>> dumps_task(1)  # doctest: +SKIP
    {'task': b'\x80\x04\x95\x03\x00\x00\x00\x00\x00\x00\x00K\x01.'}
    """
    if istask(task):
        if task[0] is apply and not any(map(_maybe_complex, task[2:])):
            d = {'function': dumps_function(task[1]),
                 'args': dumps(task[2])}
            if len(task) == 4:
                d['kwargs'] = dumps(task[3])
            return d
        elif not any(map(_maybe_complex, task[1:])):
            return {'function': dumps_function(task[0]),
                        'args': dumps(task[1:])}
    return {'task': dumps(task)}
Пример #6
0
def to_networkx(d, data_attributes=None, function_attributes=None):
    if data_attributes is None:
        data_attributes = dict()
    if function_attributes is None:
        function_attributes = dict()

    g = nx.DiGraph()

    for k, v in sorted(d.items(), key=lambda x: x[0]):
        g.add_node(k, shape='box', **data_attributes.get(k, dict()))
        if istask(v):
            func, args = v[0], v[1:]
            func_node = make_hashable((v, 'function'))
            g.add_node(func_node,
                       shape='circle',
                       label=name(func),
                       **function_attributes.get(k, dict()))
            g.add_edge(func_node, k)
            for dep in sorted(get_dependencies(d, k)):
                arg2 = make_hashable(dep)
                g.add_node(arg2,
                           label=str(dep),
                           shape='box',
                           **data_attributes.get(dep, dict()))
                g.add_edge(arg2, func_node)
        else:
            v_hash = make_hashable(v)
            if v_hash not in d:
                g.add_node(k, label='%s=%s' % (k, v), **data_attributes.get(k, dict()))
            else:  # alias situation
                g.add_edge(v_hash, k)

    return g
Пример #7
0
def head(task):
    """Return the top level node of a task"""

    if istask(task):
        return task[0]
    elif isinstance(task, list):
        return list
    else:
        return task
Пример #8
0
def args(task):
    """Get the arguments for the current task"""

    if istask(task):
        return task[1:]
    elif isinstance(task, list):
        return task
    else:
        return ()
Пример #9
0
def execute_task(task):
    """ Evaluate a nested task

    >>> inc = lambda x: x + 1
    >>> execute_task((inc, 1))
    2
    >>> execute_task((sum, [1, 2, (inc, 3)]))
    7
    """
    if istask(task):
        func, args = task[0], task[1:]
        return func(*map(execute_task, args))
    elif isinstance(task, list):
        return list(map(execute_task, task))
    else:
        return task
Пример #10
0
def test_istask():
    assert istask((inc, 1))
    assert not istask(1)
    assert not istask((1, 2))
    f = namedtuple('f', ['x', 'y'])
    assert not istask(f(sum, 2))
Пример #11
0
 def _execute_task(task: tuple):
     if not istask(task):
         return _get_arg(task)
     return spawn(task[0], args=tuple(_get_arg(a) for a in task[1:]))
Пример #12
0
    def worker_core(self, ident, i):
        """ Manage one core on one distributed worker node

        This coroutine listens on worker_queue for the following operations

        **Incoming Messages**:

        - compute-task:  call worker.compute(...) on remote node, report when done
        - close: close connection to worker node, report `worker-finished` to
          scheduler

        See Also
        --------
        Scheduler.mark_task_finished
        Scheduler.mark_task_erred
        Scheduler.mark_missing_data
        distributed.worker.Worker.compute
        """
        worker = rpc(ip=ident[0], port=ident[1])
        logger.debug("Start worker core %s, %d", ident, i)

        while True:
            msg = yield self.worker_queues[ident].get()
            if msg["op"] == "close":
                logger.debug("Worker core receives close message %s, %s", ident, msg)
                break
            if msg["op"] == "compute-task":
                key = msg["key"]
                who_has = msg["who_has"]
                task = msg["task"]
                if not istask(task):
                    response, content = yield worker.update_data(data={key: task}, report=self.center is not None)
                    assert response == b"OK", response
                    nbytes = content["nbytes"][key]
                else:
                    response, content = yield worker.compute(
                        function=execute_task,
                        args=(task,),
                        who_has=who_has,
                        key=key,
                        kwargs={},
                        report=self.center is not None,
                    )
                    if response == b"OK":
                        nbytes = content["nbytes"]
                logger.debug("Compute response from worker %s, %s, %s, %s", ident, key, response, content)
                if response == b"error":
                    error, traceback = content
                    self.mark_task_erred(key, ident, error, traceback)

                elif response == b"missing-data":
                    self.mark_missing_data(content.args, key=key, worker=ident)

                else:
                    self.mark_task_finished(key, ident, nbytes)

        yield worker.close(close=True)
        worker.close_streams()
        if msg.get("report", True):
            self.put({"op": "worker-finished", "worker": ident})
        logger.debug("Close worker core, %s, %d", ident, i)
Пример #13
0
def test_istask():
    assert istask((inc, 1))
    assert not istask(1)
    assert not istask((1, 2))
    f = namedtuple("f", ["x", "y"])
    assert not istask(f(sum, 2))
Пример #14
0
def to_graphviz(
    dsk,
    data_attributes=None,
    function_attributes=None,
    rankdir="BT",
    graph_attr=None,
    node_attr=None,
    edge_attr=None,
    collapse_outputs=False,
    verbose=False,
    **kwargs,
):
    data_attributes = data_attributes or {}
    function_attributes = function_attributes or {}
    graph_attr = graph_attr or {}
    node_attr = node_attr or {}
    edge_attr = edge_attr or {}

    graph_attr["rankdir"] = rankdir
    node_attr["fontname"] = "helvetica"

    graph_attr.update(kwargs)
    g = graphviz.Digraph(graph_attr=graph_attr,
                         node_attr=node_attr,
                         edge_attr=edge_attr)

    seen = set()
    connected = set()

    for k, v in dsk.items():
        k_name = name(k)
        if istask(v):
            func_name = name(
                (k, "function")) if not collapse_outputs else k_name
            if collapse_outputs or func_name not in seen:
                seen.add(func_name)
                attrs = function_attributes.get(k, {}).copy()
                attrs.setdefault("label", key_split(k))
                attrs.setdefault("shape", "circle")
                g.node(func_name, **attrs)
            if not collapse_outputs:
                g.edge(func_name, k_name)
                connected.add(func_name)
                connected.add(k_name)

            for dep in get_dependencies(dsk, k):
                dep_name = name(dep)
                if dep_name not in seen:
                    seen.add(dep_name)
                    attrs = data_attributes.get(dep, {}).copy()
                    attrs.setdefault("label", box_label(dep, verbose))
                    attrs.setdefault("shape", "box")
                    g.node(dep_name, **attrs)
                g.edge(dep_name, func_name)
                connected.add(dep_name)
                connected.add(func_name)

        elif ishashable(v) and v in dsk:
            v_name = name(v)
            g.edge(v_name, k_name)
            connected.add(v_name)
            connected.add(k_name)

        if (not collapse_outputs
                or k_name in connected) and k_name not in seen:
            seen.add(k_name)
            attrs = data_attributes.get(k, {}).copy()
            attrs.setdefault("label", box_label(k, verbose))
            attrs.setdefault("shape", "box")
            g.node(k_name, **attrs)
    return g
Пример #15
0
def pprint_task(task, keys, label_size=60):
    """Return a nicely formatted string for a task.

    Parameters
    ----------
    task:
        Value within dask graph to render as text
    keys: iterable
        List of keys within dask graph
    label_size: int (optional)
        Maximum size of output label, defaults to 60

    Examples
    --------
    >>> from operator import add, mul
    >>> dsk = {'a': 1,
    ...        'b': 2,
    ...        'c': (add, 'a', 'b'),
    ...        'd': (add, (mul, 'a', 'b'), 'c'),
    ...        'e': (sum, ['a', 'b', 5]),
    ...        'f': (add,),
    ...        'g': []}

    >>> pprint_task(dsk['c'], dsk)
    'add(_, _)'
    >>> pprint_task(dsk['d'], dsk)
    'add(mul(_, _), _)'
    >>> pprint_task(dsk['e'], dsk)
    'sum([_, _, *])'
    >>> pprint_task(dsk['f'], dsk)
    'add()'
    >>> pprint_task(dsk['g'], dsk)
    '[]'
    """
    if istask(task):
        func = task[0]
        if func is apply:
            head = funcname(task[1])
            tail = ")"
            args = unquote(task[2]) if len(task) > 2 else ()
            kwargs = unquote(task[3]) if len(task) > 3 else {}
        else:
            if hasattr(func, "funcs"):
                head = "(".join(funcname(f) for f in func.funcs)
                tail = ")" * len(func.funcs)
            else:
                head = funcname(task[0])
                tail = ")"
            args = task[1:]
            kwargs = {}
        if args or kwargs:
            label_size2 = int(
                (label_size - len(head) - len(tail)) // (len(args) + len(kwargs))
            )
            pprint = lambda t: pprint_task(t, keys, label_size2)
        if args:
            if label_size2 > 5:
                args = ", ".join(pprint(t) for t in args)
            else:
                args = "..."
        else:
            args = ""
        if kwargs:
            if label_size2 > 5:
                kwargs = ", " + ", ".join(
                    f"{k}={pprint(v)}" for k, v in sorted(kwargs.items())
                )
            else:
                kwargs = ", ..."
        else:
            kwargs = ""
        return f"{head}({args}{kwargs}{tail}"
    elif isinstance(task, list):
        if not task:
            return "[]"
        elif len(task) > 3:
            result = pprint_task(task[:3], keys, label_size)
            return result[:-1] + ", ...]"
        else:
            label_size2 = int((label_size - 2 - 2 * len(task)) // len(task))
            args = ", ".join(pprint_task(t, keys, label_size2) for t in task)
            return f"[{args}]"
    else:
        try:
            if task in keys:
                return "_"
            else:
                return "*"
        except TypeError:
            return "*"
Пример #16
0
def test_istask():
    assert istask((inc, 1))
    assert not istask(1)
    assert not istask((1, 2))
Пример #17
0
def _bottom_up(net, term):
    if istask(term):
        term = (head(term),) + tuple(_bottom_up(net, t) for t in args(term))
    elif isinstance(term, list):
        term = [_bottom_up(net, t) for t in args(term)]
    return net._rewrite(term)
Пример #18
0
def _bottom_up(net, term):
    if istask(term):
        term = (head(term), ) + tuple(_bottom_up(net, t) for t in args(term))
    elif isinstance(term, list):
        term = [_bottom_up(net, t) for t in args(term)]
    return net._rewrite(term)
Пример #19
0
def _rayify_task(
        task,
        key,
        deps,
        ray_presubmit_cbs,
        ray_postsubmit_cbs,
        ray_pretask_cbs,
        ray_posttask_cbs,
):
    """
    Rayifies the given task, submitting it as a Ray task to the Ray cluster.

    Args:
        task (tuple): A Dask graph value, being either a literal, dependency
            key, Dask task, or a list thereof.
        key (str): The Dask graph key for the given task.
        deps (dict): The dependencies of this task.
        ray_presubmit_cbs (callable): Pre-task submission callbacks.
        ray_postsubmit_cbs (callable): Post-task submission callbacks.
        ray_pretask_cbs (callable): Pre-task execution callbacks.
        ray_posttask_cbs (callable): Post-task execution callbacks.

    Returns:
        A literal, a Ray object reference representing a submitted task, or a
        list thereof.
    """
    if isinstance(task, list):
        # Recursively rayify this list. This will still bottom out at the first
        # actual task encountered, inlining any tasks in that task's arguments.
        return [
            _rayify_task(
                t,
                key,
                deps,
                ray_presubmit_cbs,
                ray_postsubmit_cbs,
                ray_pretask_cbs,
                ray_posttask_cbs,
            ) for t in task
        ]
    elif istask(task):
        # Unpacks and repacks Ray object references and submits the task to the
        # Ray cluster for execution.
        if ray_presubmit_cbs is not None:
            alternate_returns = [
                cb(task, key, deps) for cb in ray_presubmit_cbs
            ]
            for alternate_return in alternate_returns:
                # We don't submit a Ray task if a presubmit callback returns
                # a non-`None` value, instead we return said value.
                # NOTE: This returns the first non-None presubmit callback
                # return value.
                if alternate_return is not None:
                    return alternate_return

        func, args = task[0], task[1:]
        if func is multiple_return_get:
            return _execute_task(task, deps)
        # If the function's arguments contain nested object references, we must
        # unpack said object references into a flat set of arguments so that
        # Ray properly tracks the object dependencies between Ray tasks.
        arg_object_refs, repack = unpack_object_refs(args, deps)
        # Submit the task using a wrapper function.
        object_refs = dask_task_wrapper.options(
            name=f"dask:{key!s}",
            num_returns=(1 if not isinstance(func, MultipleReturnFunc) else
                         func.num_returns),
        ).remote(
            func,
            repack,
            key,
            ray_pretask_cbs,
            ray_posttask_cbs,
            *arg_object_refs,
        )

        if ray_postsubmit_cbs is not None:
            for cb in ray_postsubmit_cbs:
                cb(task, key, deps, object_refs)

        return object_refs
    elif not ishashable(task):
        return task
    elif task in deps:
        return deps[task]
    else:
        return task
Пример #20
0
def test_istask():
    assert istask((inc, 1))
    assert not istask(1)
    assert not istask((1, 2))
Пример #21
0
def test_istask():
    assert istask((inc, 1))
    assert not istask(1)
    assert not istask((1, 2))
    f = namedtuple('f', ['x', 'y'])
    assert not istask(f(sum, 2))
Пример #22
0
def worker_core(scheduler_queue, worker_queue, ident, i):
    """ Manage one core on one distributed worker node

    This coroutine listens on worker_queue for the following operations

    **Incoming Messages**:

    - compute-task:  call worker.compute(...) on remote node, report when done
    - close: close connection to worker node, report `worker-finished` to
      scheduler

    **Outgoing Messages**:

    - task-finished:  sent to scheduler once a task completes
    - task-erred: sent to scheduler when a task errs
    - worker-finished: sent to scheduler in response to a close command
    """
    worker = rpc(ip=ident[0], port=ident[1])
    logger.debug("Start worker core %s, %d", ident, i)

    while True:
        msg = yield worker_queue.get()
        if msg['op'] == 'close':
            break
        if msg['op'] == 'compute-task':
            key = msg['key']
            needed = msg['needed']
            task = msg['task']
            if not istask(task):
                response = yield worker.update_data(data={key: task})
                assert response == b'OK', response
            else:
                response = yield worker.compute(function=_execute_task,
                                                args=(task, {}),
                                                needed=needed,
                                                key=key,
                                                kwargs={})
            if response == b'error':
                err = yield worker.get_data(keys=[key])
                scheduler_queue.put_nowait({
                    'op': 'task-erred',
                    'key': key,
                    'worker': ident,
                    'exception': err[key]
                })

            elif isinstance(response, KeyError):
                scheduler_queue.put_nowait({
                    'op': 'task-missing-data',
                    'key': key,
                    'worker': ident,
                    'missing': response.args
                })

            else:
                scheduler_queue.put_nowait({
                    'op': 'task-finished',
                    'worker': ident,
                    'key': key
                })

    yield worker.close(close=True)
    worker.close_streams()
    scheduler_queue.put_nowait({'op': 'worker-finished', 'worker': ident})
    logger.debug("Close worker core, %s, %d", ident, i)