Пример #1
0
def execute_with_scope(expr, scope, aggcontext=None, clients=None, **kwargs):
    """Execute an expression `expr`, with data provided in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
        The expression to execute.
    scope : collections.Mapping
        A dictionary mapping :class:`~ibis.expr.operations.Node` subclass
        instances to concrete data such as a pandas DataFrame.
    aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext]

    Returns
    -------
    result : scalar, pd.Series, pd.DataFrame
    """
    op = expr.op()

    # Call pre_execute, to allow clients to intercept the expression before
    # computing anything *and* before associating leaf nodes with data. This
    # allows clients to provide their own data for each leaf.
    if clients is None:
        clients = list(find_backends(expr))

    if aggcontext is None:
        aggcontext = agg_ctx.Summarize()

    pre_executed_scope = pre_execute(op,
                                     *clients,
                                     scope=scope,
                                     aggcontext=aggcontext,
                                     **kwargs)
    new_scope = toolz.merge(scope, pre_executed_scope)
    result = execute_until_in_scope(
        expr,
        new_scope,
        aggcontext=aggcontext,
        clients=clients,
        # XXX: we *explicitly* pass in scope and not new_scope here so that
        # post_execute sees the scope of execute_with_scope, not the scope of
        # execute_until_in_scope
        post_execute_=functools.partial(
            post_execute,
            scope=scope,
            aggcontext=aggcontext,
            clients=clients,
            **kwargs,
        ),
        **kwargs,
    )

    return result
Пример #2
0
def execute_with_scope(expr, scope, aggcontext=None, clients=None, **kwargs):
    """Execute an expression `expr`, with data provided in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
        The expression to execute.
    scope : collections.Mapping
        A dictionary mapping :class:`~ibis.expr.operations.Node` subclass
        instances to concrete data such as a pandas DataFrame.
    aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext]

    Returns
    -------
    result : scalar, pd.Series, pd.DataFrame
    """
    op = expr.op()

    # Call pre_execute, to allow clients to intercept the expression before
    # computing anything *and* before associating leaf nodes with data. This
    # allows clients to provide their own data for each leaf.
    if clients is None:
        clients = list(find_backends(expr))

    if aggcontext is None:
        aggcontext = agg_ctx.Summarize()

    pre_executed_scope = pre_execute(
        op, *clients, scope=scope, aggcontext=aggcontext, **kwargs
    )
    new_scope = toolz.merge(scope, pre_executed_scope)
    result = execute_until_in_scope(
        expr,
        new_scope,
        aggcontext=aggcontext,
        clients=clients,
        # XXX: we *explicitly* pass in scope and not new_scope here so that
        # post_execute sees the scope of execute_with_scope, not the scope of
        # execute_until_in_scope
        post_execute_=functools.partial(
            post_execute,
            scope=scope,
            aggcontext=aggcontext,
            clients=clients,
            **kwargs,
        ),
        **kwargs,
    )

    return result
Пример #3
0
def execute_until_in_scope(expr,
                           scope,
                           aggcontext=None,
                           clients=None,
                           post_execute_=None,
                           **kwargs):
    """Execute until our op is in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Mapping
    aggcontext : Optional[AggregationContext]
    clients : List[ibis.client.Client]
    kwargs : Mapping
    """
    # these should never be None
    assert aggcontext is not None, 'aggcontext is None'
    assert clients is not None, 'clients is None'
    assert post_execute_ is not None, 'post_execute_ is None'

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    op = expr.op()
    if op in scope:
        return scope[op]

    new_scope = execute_bottom_up(
        expr,
        scope,
        aggcontext=aggcontext,
        post_execute_=post_execute_,
        clients=clients,
        **kwargs,
    )
    new_scope = toolz.merge(new_scope,
                            pre_execute(op, *clients, scope=scope, **kwargs))
    return execute_until_in_scope(
        expr,
        new_scope,
        aggcontext=aggcontext,
        clients=clients,
        post_execute_=post_execute_,
        **kwargs,
    )
Пример #4
0
def execute_until_in_scope(
    expr, scope, aggcontext=None, clients=None, post_execute_=None, **kwargs
):
    """Execute until our op is in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Mapping
    aggcontext : Optional[AggregationContext]
    clients : List[ibis.client.Client]
    kwargs : Mapping
    """
    # these should never be None
    assert aggcontext is not None, 'aggcontext is None'
    assert clients is not None, 'clients is None'
    assert post_execute_ is not None, 'post_execute_ is None'

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    op = expr.op()
    if op in scope:
        return scope[op]

    new_scope = execute_bottom_up(
        expr,
        scope,
        aggcontext=aggcontext,
        post_execute_=post_execute_,
        clients=clients,
        **kwargs,
    )
    new_scope = toolz.merge(
        new_scope, pre_execute(op, *clients, scope=scope, **kwargs)
    )
    return execute_until_in_scope(
        expr,
        new_scope,
        aggcontext=aggcontext,
        clients=clients,
        post_execute_=post_execute_,
        **kwargs,
    )
Пример #5
0
def execute_window_op(op,
                      data,
                      window,
                      scope=None,
                      aggcontext=None,
                      clients=None,
                      **kwargs):
    operand = op.expr
    # pre execute "manually" here because otherwise we wouldn't pickup
    # relevant scope changes from the child operand since we're managing
    # execution of that by hand
    operand_op = operand.op()
    pre_executed_scope = pre_execute(operand_op,
                                     *clients,
                                     scope=scope,
                                     aggcontext=aggcontext,
                                     **kwargs)
    scope = toolz.merge(scope, pre_executed_scope)
    (root, ) = op.root_tables()
    root_expr = root.to_expr()
    data = execute(
        root_expr,
        scope=scope,
        clients=clients,
        aggcontext=aggcontext,
        **kwargs,
    )

    following = window.following
    order_by = window._order_by

    if (order_by and following != 0
            and not isinstance(operand_op, ops.ShiftBase)):
        raise com.OperationNotDefinedError(
            'Window functions affected by following with order_by are not '
            'implemented')

    group_by = window._group_by
    grouping_keys = [
        key_op.name if isinstance(key_op, ops.TableColumn) else execute(
            key, scope=scope, clients=clients, aggcontext=aggcontext, **kwargs)
        for key, key_op in zip(group_by,
                               map(operator.methodcaller('op'), group_by))
    ]

    order_by = window._order_by
    if not order_by:
        ordering_keys = ()

    if group_by:
        if order_by:
            (
                sorted_df,
                grouping_keys,
                ordering_keys,
            ) = util.compute_sorted_frame(data,
                                          order_by,
                                          group_by=group_by,
                                          **kwargs)
            source = sorted_df.groupby(grouping_keys, sort=True)
            post_process = _post_process_group_by_order_by
        else:
            source = data.groupby(grouping_keys, sort=False)
            post_process = _post_process_group_by
    else:
        if order_by:
            source, grouping_keys, ordering_keys = util.compute_sorted_frame(
                data, order_by, **kwargs)
            post_process = _post_process_order_by
        else:
            source = data
            post_process = _post_process_empty

    new_scope = toolz.merge(
        scope,
        OrderedDict((t, source) for t in operand.op().root_tables()),
        factory=OrderedDict,
    )

    # figure out what the dtype of the operand is
    operand_type = operand.type()
    operand_dtype = operand_type.to_pandas()

    aggcontext = get_aggcontext(
        window,
        operand=operand,
        operand_dtype=operand_dtype,
        parent=source,
        group_by=grouping_keys,
        order_by=ordering_keys,
    )

    result = execute(
        operand,
        scope=new_scope,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    series = post_process(result, data, ordering_keys, grouping_keys)
    assert len(data) == len(
        series
    ), 'input data source and computed column do not have the same length'
    return series
Пример #6
0
def execute_window_op(op,
                      data,
                      window,
                      scope=None,
                      aggcontext=None,
                      clients=None,
                      **kwargs):
    operand = op.expr
    # pre execute "manually" here because otherwise we wouldn't pickup
    # relevant scope changes from the child operand since we're managing
    # execution of that by hand
    operand_op = operand.op()
    pre_executed_scope = pre_execute(operand_op,
                                     *clients,
                                     scope=scope,
                                     aggcontext=aggcontext,
                                     **kwargs)
    scope = toolz.merge(scope, pre_executed_scope)

    root, = op.root_tables()
    root_expr = root.to_expr()
    data = execute(
        root_expr,
        scope=scope,
        clients=clients,
        aggcontext=aggcontext,
        **kwargs,
    )

    following = window.following
    order_by = window._order_by

    if (order_by and following != 0
            and not isinstance(operand_op, ops.ShiftBase)):
        raise com.OperationNotDefinedError(
            'Window functions affected by following with order_by are not '
            'implemented')

    group_by = window._group_by
    grouping_keys = [
        key_op.name if isinstance(key_op, ops.TableColumn) else execute(
            key, aggcontext=aggcontext, **kwargs) for key, key_op in zip(
                group_by, map(operator.methodcaller('op'), group_by))
    ]

    order_by = window._order_by
    if not order_by:
        ordering_keys = ()

    if group_by:
        if order_by:
            (
                sorted_df,
                grouping_keys,
                ordering_keys,
            ) = util.compute_sorted_frame(data,
                                          order_by,
                                          group_by=group_by,
                                          **kwargs)
            source = sorted_df.groupby(grouping_keys, sort=True)
            post_process = _post_process_group_by_order_by
        else:
            source = data.groupby(grouping_keys, sort=False)
            post_process = _post_process_group_by
    else:
        if order_by:
            source, grouping_keys, ordering_keys = util.compute_sorted_frame(
                data, order_by, **kwargs)
            post_process = _post_process_order_by
        else:
            source = data
            post_process = _post_process_empty

    new_scope = toolz.merge(
        scope,
        OrderedDict((t, source) for t in operand.op().root_tables()),
        factory=OrderedDict,
    )

    # figure out what the dtype of the operand is
    operand_type = operand.type()
    operand_dtype = operand_type.to_pandas()

    # no order by or group by: default summarization aggcontext
    #
    # if we're reducing and we have an order by expression then we need to
    # expand or roll.
    #
    # otherwise we're transforming
    if not grouping_keys and not ordering_keys:
        aggcontext = agg_ctx.Summarize()
    elif (isinstance(operand.op(),
                     (ops.Reduction, ops.CumulativeOp, ops.Any, ops.All))
          and ordering_keys):
        # XXX(phillipc): What a horror show
        preceding = window.preceding
        if preceding is not None:
            max_lookback = window.max_lookback
            assert not isinstance(operand.op(), ops.CumulativeOp)
            aggcontext = agg_ctx.Moving(
                preceding,
                max_lookback,
                parent=source,
                group_by=grouping_keys,
                order_by=ordering_keys,
                dtype=operand_dtype,
            )
        else:
            # expanding window
            aggcontext = agg_ctx.Cumulative(
                parent=source,
                group_by=grouping_keys,
                order_by=ordering_keys,
                dtype=operand_dtype,
            )
    else:
        # groupby transform (window with a partition by clause in SQL parlance)
        aggcontext = agg_ctx.Transform(
            parent=source,
            group_by=grouping_keys,
            order_by=ordering_keys,
            dtype=operand_dtype,
        )

    result = execute(
        operand,
        scope=new_scope,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    series = post_process(result, data, ordering_keys, grouping_keys)
    assert len(data) == len(
        series
    ), 'input data source and computed column do not have the same length'
    return series
Пример #7
0
def execute_until_in_scope(expr,
                           scope,
                           aggcontext=None,
                           clients=None,
                           post_execute_=None,
                           **kwargs):
    """Execute until our op is in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Mapping
    aggcontext : Optional[AggregationContext]
    clients : List[ibis.client.Client]
    kwargs : Mapping
    """
    # these should never be None
    assert aggcontext is not None, 'aggcontext is None'
    assert clients is not None, 'clients is None'
    assert post_execute_ is not None, 'post_execute_ is None'

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    op = expr.op()
    if op in scope:
        return scope
    elif isinstance(op, ops.Literal):
        # special case literals to avoid the overhead of dispatching
        # execute_node
        return {
            op:
            execute_literal(op,
                            op.value,
                            expr.type(),
                            aggcontext=aggcontext,
                            **kwargs)
        }

    pre_executed_scope = pre_execute(op,
                                     *clients,
                                     scope=scope,
                                     aggcontext=aggcontext,
                                     **kwargs)
    new_scope = toolz.merge(scope, pre_executed_scope)

    # Short circuit: if pre_execute puts op in scope, then we don't need to
    # execute its computable_args
    if op in new_scope:
        return new_scope

    # figure out what arguments we're able to compute on based on the
    # expressions inputs. things like expressions, None, and scalar types are
    # computable whereas ``list``s are not
    computable_args = [arg for arg in op.inputs if is_computable_input(arg)]

    # recursively compute each node's arguments until we've changed type
    scopes = [
        execute_until_in_scope(
            arg,
            new_scope,
            aggcontext=aggcontext,
            post_execute_=post_execute_,
            clients=clients,
            **kwargs,
        ) if hasattr(arg, 'op') else {
            arg: arg
        } for arg in computable_args
    ]

    # if we're unable to find data then raise an exception
    if not scopes and computable_args:
        raise com.UnboundExpressionError(
            'Unable to find data for expression:\n{}'.format(repr(expr)))

    # there should be exactly one dictionary per computable argument
    assert len(computable_args) == len(scopes)

    new_scope = toolz.merge(new_scope, *scopes)

    # pass our computed arguments to this node's execute_node implementation
    data = [
        new_scope[arg.op()] if hasattr(arg, 'op') else arg
        for arg in computable_args
    ]
    result = execute_node(
        op,
        *data,
        scope=scope,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    computed = post_execute_(op, result)
    return {op: computed}
Пример #8
0
def main_execute(expr: ir.Expr,
                 scope: Optional[Mapping] = None,
                 aggcontext: Optional[agg_ctx.AggregationContext] = None,
                 clients: Sequence[ibis.client.Client] = (),
                 params: Optional[Mapping] = None,
                 **kwargs: Any):
    """Execute an ibis expression against the pandas backend.

    Parameters
    ----------
    expr
    scope
    aggcontext
    clients
    params

    """
    toposorted, dependencies = toposort(expr)
    params = toolz.keymap(get_node, params if params is not None else {})

    # Add to scope the objects that have no dependencies and are not ibis
    # nodes. We have to filter out nodes for cases--such as zero argument
    # UDFs--that do not have any dependencies yet still need to be evaluated.
    full_scope = toolz.merge(
        scope if scope is not None else {},
        {
            key: key
            for key, parents in dependencies.items()
            if not parents and not isinstance(key, ops.Node)
        },
        params,
    )

    if not clients:
        clients = list(find_backends(expr))

    if aggcontext is None:
        aggcontext = agg_ctx.Summarize()

    # give backends a chance to inject scope if needed
    execute_first_scope = execute_first(expr.op(),
                                        *clients,
                                        scope=full_scope,
                                        aggcontext=aggcontext,
                                        **kwargs)
    full_scope = toolz.merge(full_scope, execute_first_scope)

    nodes = [node for node in toposorted if node not in full_scope]

    # compute the nodes that are not currently in scope
    for node in nodes:
        # allow clients to pre compute nodes as they like
        pre_executed_scope = pre_execute(node,
                                         *clients,
                                         scope=full_scope,
                                         aggcontext=aggcontext,
                                         **kwargs)
        # merge the existing scope with whatever was returned from pre_execute
        execute_scope = toolz.merge(full_scope, pre_executed_scope)

        # if after pre_execute our node is in scope, then there's nothing to do
        # in this iteration
        if node in execute_scope:
            full_scope = execute_scope
        else:
            # If we're evaluating a literal then we can be a bit quicker about
            # evaluating the dispatch graph
            if isinstance(node, ops.Literal):
                executor = execute_literal
            else:
                executor = execute_node

            # Gather the inputs we've already computed that the current node
            # depends on
            execute_args = [
                full_scope[get_node(arg)] for arg in dependencies[node]
            ]

            # execute the node with its inputs
            execute_node_result = executor(
                node,
                *execute_args,
                aggcontext=aggcontext,
                scope=execute_scope,
                clients=clients,
                **kwargs,
            )

            # last change to perform any additional computation on the result
            # before it gets added to scope for the next node
            full_scope[node] = post_execute(
                node,
                execute_node_result,
                clients=clients,
                aggcontext=aggcontext,
                scope=full_scope,
            )

    # the last node in the toposorted graph is the root and maps to the desired
    # result in scope
    last_node = toposorted[-1]
    result = full_scope[last_node]
    return result
Пример #9
0
def execute_window_op(
    op, data, window, scope=None, aggcontext=None, clients=None, **kwargs
):
    operand = op.expr
    # pre execute "manually" here because otherwise we wouldn't pickup
    # relevant scope changes from the child operand since we're managing
    # execution of that by hand
    operand_op = operand.op()
    pre_executed_scope = pre_execute(
        operand_op, *clients, scope=scope, aggcontext=aggcontext, **kwargs
    )
    scope = toolz.merge(scope, pre_executed_scope)

    root, = op.root_tables()
    root_expr = root.to_expr()
    data = execute(root_expr, scope=scope, aggcontext=aggcontext, **kwargs)

    following = window.following
    order_by = window._order_by

    if (
        order_by
        and following != 0
        and not isinstance(operand_op, ops.ShiftBase)
    ):
        raise com.OperationNotDefinedError(
            'Window functions affected by following with order_by are not '
            'implemented'
        )

    group_by = window._group_by
    grouping_keys = [
        key_op.name
        if isinstance(key_op, ops.TableColumn)
        else execute(key, aggcontext=aggcontext, **kwargs)
        for key, key_op in zip(
            group_by, map(operator.methodcaller('op'), group_by)
        )
    ]

    order_by = window._order_by
    if not order_by:
        ordering_keys = ()

    if group_by:
        if order_by:
            (
                sorted_df,
                grouping_keys,
                ordering_keys,
            ) = util.compute_sorted_frame(
                data, order_by, group_by=group_by, **kwargs
            )
            source = sorted_df.groupby(grouping_keys, sort=True)
            post_process = _post_process_group_by_order_by
        else:
            source = data.groupby(grouping_keys, sort=False)
            post_process = _post_process_group_by
    else:
        if order_by:
            source, grouping_keys, ordering_keys = util.compute_sorted_frame(
                data, order_by, **kwargs
            )
            post_process = _post_process_order_by
        else:
            source = data
            post_process = _post_process_empty

    new_scope = toolz.merge(
        scope,
        OrderedDict((t, source) for t in operand.op().root_tables()),
        factory=OrderedDict,
    )

    # figure out what the dtype of the operand is
    operand_type = operand.type()
    if isinstance(operand_type, dt.Integer) and operand_type.nullable:
        operand_dtype = np.float64
    else:
        operand_dtype = operand.type().to_pandas()

    # no order by or group by: default summarization aggcontext
    #
    # if we're reducing and we have an order by expression then we need to
    # expand or roll.
    #
    # otherwise we're transforming
    if not grouping_keys and not ordering_keys:
        aggcontext = agg_ctx.Summarize()
    elif isinstance(operand.op(), ops.Reduction) and ordering_keys:
        # XXX(phillipc): What a horror show
        preceding = window.preceding
        if preceding is not None:
            aggcontext = agg_ctx.Moving(
                preceding,
                parent=source,
                group_by=grouping_keys,
                order_by=ordering_keys,
                dtype=operand_dtype,
            )
        else:
            # expanding window
            aggcontext = agg_ctx.Cumulative(
                parent=source,
                group_by=grouping_keys,
                order_by=ordering_keys,
                dtype=operand_dtype,
            )
    else:
        # groupby transform (window with a partition by clause in SQL parlance)
        aggcontext = agg_ctx.Transform(
            parent=source,
            group_by=grouping_keys,
            order_by=ordering_keys,
            dtype=operand_dtype,
        )

    result = execute(operand, scope=new_scope, aggcontext=aggcontext, **kwargs)
    series = post_process(result, data, ordering_keys, grouping_keys)
    assert len(data) == len(
        series
    ), 'input data source and computed column do not have the same length'
    return series
Пример #10
0
def execute_window_op(
    op,
    data,
    window,
    scope: Scope = None,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    clients=None,
    **kwargs,
):
    operand = op.expr
    # pre execute "manually" here because otherwise we wouldn't pickup
    # relevant scope changes from the child operand since we're managing
    # execution of that by hand
    operand_op = operand.op()

    adjusted_timecontext = None
    if timecontext:
        arg_timecontexts = compute_time_context(op,
                                                timecontext=timecontext,
                                                clients=clients)
        # timecontext is the original time context required by parent node
        # of this WindowOp, while adjusted_timecontext is the adjusted context
        # of this Window, since we are doing a manual execution here, use
        # adjusted_timecontext in later execution phases
        adjusted_timecontext = arg_timecontexts[0]

    pre_executed_scope = pre_execute(
        operand_op,
        *clients,
        scope=scope,
        timecontext=adjusted_timecontext,
        aggcontext=aggcontext,
        **kwargs,
    )
    scope = scope.merge_scope(pre_executed_scope)
    (root, ) = op.root_tables()
    root_expr = root.to_expr()

    data = execute(
        root_expr,
        scope=scope,
        timecontext=adjusted_timecontext,
        clients=clients,
        aggcontext=aggcontext,
        **kwargs,
    )
    following = window.following
    order_by = window._order_by

    if (order_by and following != 0
            and not isinstance(operand_op, ops.ShiftBase)):
        raise com.OperationNotDefinedError(
            'Window functions affected by following with order_by are not '
            'implemented')

    group_by = window._group_by
    grouping_keys = [
        key_op.name if isinstance(key_op, ops.TableColumn) else execute(
            key,
            scope=scope,
            clients=clients,
            timecontext=adjusted_timecontext,
            aggcontext=aggcontext,
            **kwargs,
        ) for key, key_op in zip(group_by,
                                 map(operator.methodcaller('op'), group_by))
    ]

    order_by = window._order_by
    if not order_by:
        ordering_keys = []

    if group_by:
        if order_by:
            (
                sorted_df,
                grouping_keys,
                ordering_keys,
            ) = util.compute_sorted_frame(
                data,
                order_by,
                group_by=group_by,
                timecontext=adjusted_timecontext,
                **kwargs,
            )
            source = sorted_df.groupby(grouping_keys, sort=True)
            post_process = _post_process_group_by_order_by
        else:
            source = data.groupby(grouping_keys, sort=False)
            post_process = _post_process_group_by
    else:
        if order_by:
            source, grouping_keys, ordering_keys = util.compute_sorted_frame(
                data, order_by, timecontext=adjusted_timecontext, **kwargs)
            post_process = _post_process_order_by
        else:
            source = data
            post_process = _post_process_empty

    # Here groupby object should be add to the corresponding node in scope
    # for execution, data will be overwrite to a groupby object, so we
    # force an update regardless of time context
    new_scope = scope.merge_scopes(
        [
            Scope({t: source}, adjusted_timecontext)
            for t in operand.op().root_tables()
        ],
        overwrite=True,
    )

    # figure out what the dtype of the operand is
    operand_type = operand.type()
    operand_dtype = operand_type.to_pandas()

    aggcontext = get_aggcontext(
        window,
        scope=scope,
        operand=operand,
        operand_dtype=operand_dtype,
        parent=source,
        group_by=grouping_keys,
        order_by=ordering_keys,
        **kwargs,
    )
    result = execute(
        operand,
        scope=new_scope,
        timecontext=adjusted_timecontext,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    series = post_process(result, data, ordering_keys, grouping_keys)
    assert len(data) == len(
        series
    ), 'input data source and computed column do not have the same length'
    # trim data to original time context
    series = trim_with_timecontext(series, timecontext)
    return series
Пример #11
0
def execute_until_in_scope(
    expr,
    scope: Scope,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    clients=None,
    post_execute_=None,
    **kwargs,
) -> Scope:
    """Execute until our op is in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
    scope : Scope
    timecontext : Optional[TimeContext]
    aggcontext : Optional[AggregationContext]
    clients : List[ibis.client.Client]
    kwargs : Mapping
    """
    # these should never be None
    assert aggcontext is not None, 'aggcontext is None'
    assert clients is not None, 'clients is None'
    assert post_execute_ is not None, 'post_execute_ is None'

    # base case: our op has been computed (or is a leaf data node), so
    # return the corresponding value
    op = expr.op()
    if scope.get_value(op, timecontext) is not None:
        return scope
    if isinstance(op, ops.Literal):
        # special case literals to avoid the overhead of dispatching
        # execute_node
        return Scope(
            {
                op:
                execute_literal(
                    op, op.value, expr.type(), aggcontext=aggcontext, **kwargs)
            },
            timecontext,
        )

    # figure out what arguments we're able to compute on based on the
    # expressions inputs. things like expressions, None, and scalar types are
    # computable whereas ``list``s are not
    computable_args = [arg for arg in op.inputs if is_computable_input(arg)]

    # pre_executed_states is a list of states with same the length of
    # computable_args, these states are passed to each arg
    if timecontext:
        arg_timecontexts = compute_time_context(
            op,
            num_args=len(computable_args),
            timecontext=timecontext,
            clients=clients,
        )
    else:
        arg_timecontexts = [None] * len(computable_args)

    pre_executed_scope = pre_execute(
        op,
        *clients,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        **kwargs,
    )

    new_scope = scope.merge_scope(pre_executed_scope)

    # Short circuit: if pre_execute puts op in scope, then we don't need to
    # execute its computable_args
    if new_scope.get_value(op, timecontext) is not None:
        return new_scope

    # recursively compute each node's arguments until we've changed type.
    # compute_time_context should return with a list with the same length
    # as computable_args, the two lists will be zipping together for
    # further execution
    if len(arg_timecontexts) != len(computable_args):
        raise com.IbisError(
            'arg_timecontexts differ with computable_arg in length '
            f'for type:\n{type(op).__name__}.')

    scopes = [
        execute_until_in_scope(
            arg,
            new_scope,
            timecontext=timecontext,
            aggcontext=aggcontext,
            post_execute_=post_execute_,
            clients=clients,
            **kwargs,
        ) if hasattr(arg, 'op') else Scope({arg: arg}, timecontext)
        for (arg, timecontext) in zip(computable_args, arg_timecontexts)
    ]

    # if we're unable to find data then raise an exception
    if not scopes and computable_args:
        raise com.UnboundExpressionError(
            'Unable to find data for expression:\n{}'.format(repr(expr)))

    # there should be exactly one dictionary per computable argument
    assert len(computable_args) == len(scopes)

    new_scope = new_scope.merge_scopes(scopes)
    # pass our computed arguments to this node's execute_node implementation
    data = [
        new_scope.get_value(arg.op(), timecontext)
        if hasattr(arg, 'op') else arg for arg in computable_args
    ]
    result = execute_node(
        op,
        *data,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        clients=clients,
        **kwargs,
    )
    computed = post_execute_(op, result, timecontext=timecontext)
    return Scope({op: computed}, timecontext)
Пример #12
0
def execute_with_scope(
    expr,
    scope: Scope,
    timecontext: Optional[TimeContext] = None,
    aggcontext=None,
    clients=None,
    **kwargs,
):
    """Execute an expression `expr`, with data provided in `scope`.

    Parameters
    ----------
    expr : ibis.expr.types.Expr
        The expression to execute.
    scope : Scope
        A Scope class, with dictionary mapping
        :class:`~ibis.expr.operations.Node` subclass instances to concrete
        data such as a pandas DataFrame.
    timecontext : Optional[TimeContext]
        A tuple of (begin, end) that is passed from parent Node to children
        see [timecontext.py](ibis/pandas/execution/timecontext.py) for
        detailed usage for this time context.
    aggcontext : Optional[ibis.pandas.aggcontext.AggregationContext]

    Returns
    -------
    result : scalar, pd.Series, pd.DataFrame
    """
    op = expr.op()

    # Call pre_execute, to allow clients to intercept the expression before
    # computing anything *and* before associating leaf nodes with data. This
    # allows clients to provide their own data for each leaf.
    if clients is None:
        clients = list(find_backends(expr))

    if aggcontext is None:
        aggcontext = agg_ctx.Summarize()

    pre_executed_scope = pre_execute(
        op,
        *clients,
        scope=scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        **kwargs,
    )
    new_scope = scope.merge_scope(pre_executed_scope)
    result = execute_until_in_scope(
        expr,
        new_scope,
        timecontext=timecontext,
        aggcontext=aggcontext,
        clients=clients,
        # XXX: we *explicitly* pass in scope and not new_scope here so that
        # post_execute sees the scope of execute_with_scope, not the scope of
        # execute_until_in_scope
        post_execute_=functools.partial(
            post_execute,
            scope=scope,
            timecontext=timecontext,
            aggcontext=aggcontext,
            clients=clients,
            **kwargs,
        ),
        **kwargs,
    ).get_value(op, timecontext)
    return result