def execute_until_in_scope( expr, scope: Scope, timecontext: Optional[TimeContext] = None, aggcontext=None, clients=None, post_execute_=None, **kwargs, ) -> Scope: """Execute until our op is in `scope`. Parameters ---------- expr : ibis.expr.types.Expr scope : Scope timecontext : Optional[TimeContext] aggcontext : Optional[AggregationContext] clients : List[ibis.client.Client] kwargs : Mapping """ # these should never be None assert aggcontext is not None, 'aggcontext is None' assert clients is not None, 'clients is None' assert post_execute_ is not None, 'post_execute_ is None' # base case: our op has been computed (or is a leaf data node), so # return the corresponding value op = expr.op() if scope.get_value(op, timecontext) is not None: return scope if isinstance(op, ops.Literal): # special case literals to avoid the overhead of dispatching # execute_node return Scope( { op: execute_literal( op, op.value, expr.type(), aggcontext=aggcontext, **kwargs) }, timecontext, ) # figure out what arguments we're able to compute on based on the # expressions inputs. things like expressions, None, and scalar types are # computable whereas ``list``s are not computable_args = [arg for arg in op.inputs if is_computable_input(arg)] # pre_executed_states is a list of states with same the length of # computable_args, these states are passed to each arg if timecontext: arg_timecontexts = compute_time_context( op, num_args=len(computable_args), timecontext=timecontext, clients=clients, ) else: arg_timecontexts = [None] * len(computable_args) pre_executed_scope = pre_execute( op, *clients, scope=scope, timecontext=timecontext, aggcontext=aggcontext, **kwargs, ) new_scope = scope.merge_scope(pre_executed_scope) # Short circuit: if pre_execute puts op in scope, then we don't need to # execute its computable_args if new_scope.get_value(op, timecontext) is not None: return new_scope # recursively compute each node's arguments until we've changed type. # compute_time_context should return with a list with the same length # as computable_args, the two lists will be zipping together for # further execution if len(arg_timecontexts) != len(computable_args): raise com.IbisError( 'arg_timecontexts differ with computable_arg in length ' f'for type:\n{type(op).__name__}.') scopes = [ execute_until_in_scope( arg, new_scope, timecontext=timecontext, aggcontext=aggcontext, post_execute_=post_execute_, clients=clients, **kwargs, ) if hasattr(arg, 'op') else Scope({arg: arg}, timecontext) for (arg, timecontext) in zip(computable_args, arg_timecontexts) ] # if we're unable to find data then raise an exception if not scopes and computable_args: raise com.UnboundExpressionError( 'Unable to find data for expression:\n{}'.format(repr(expr))) # there should be exactly one dictionary per computable argument assert len(computable_args) == len(scopes) new_scope = new_scope.merge_scopes(scopes) # pass our computed arguments to this node's execute_node implementation data = [ new_scope.get_value(arg.op(), timecontext) if hasattr(arg, 'op') else arg for arg in computable_args ] result = execute_node( op, *data, scope=scope, timecontext=timecontext, aggcontext=aggcontext, clients=clients, **kwargs, ) computed = post_execute_(op, result, timecontext=timecontext) return Scope({op: computed}, timecontext)
def execute_window_op( op, data, window, scope: Scope = None, timecontext: Optional[TimeContext] = None, aggcontext=None, clients=None, **kwargs, ): if window.how == "range" and any( not isinstance(ob.type(), (dt.Time, dt.Date, dt.Timestamp)) for ob in window._order_by): raise NotImplementedError( "The pandas backend only implements range windows with temporal " "ordering keys") operand = op.expr # pre execute "manually" here because otherwise we wouldn't pickup # relevant scope changes from the child operand since we're managing # execution of that by hand operand_op = operand.op() adjusted_timecontext = None if timecontext: arg_timecontexts = compute_time_context(op, timecontext=timecontext, clients=clients, scope=scope) # timecontext is the original time context required by parent node # of this Window, while adjusted_timecontext is the adjusted context # of this Window, since we are doing a manual execution here, use # adjusted_timecontext in later execution phases adjusted_timecontext = arg_timecontexts[0] pre_executed_scope = pre_execute( operand_op, *clients, scope=scope, timecontext=adjusted_timecontext, aggcontext=aggcontext, **kwargs, ) if scope is None: scope = pre_executed_scope else: scope = scope.merge_scope(pre_executed_scope) (root, ) = op.root_tables() root_expr = root.to_expr() data = execute( root_expr, scope=scope, timecontext=adjusted_timecontext, clients=clients, aggcontext=aggcontext, **kwargs, ) following = window.following order_by = window._order_by if (order_by and following != 0 and not isinstance(operand_op, ops.ShiftBase)): raise com.OperationNotDefinedError( 'Window functions affected by following with order_by are not ' 'implemented') group_by = window._group_by grouping_keys = [ key_op.name if isinstance(key_op, ops.TableColumn) else execute( key, scope=scope, clients=clients, timecontext=adjusted_timecontext, aggcontext=aggcontext, **kwargs, ) for key, key_op in zip(group_by, map(operator.methodcaller('op'), group_by)) ] order_by = window._order_by if not order_by: ordering_keys = [] post_process: Callable[ [Any, pd.DataFrame, List[str], List[str], Optional[TimeContext]], pd.Series, ] if group_by: if order_by: ( sorted_df, grouping_keys, ordering_keys, ) = util.compute_sorted_frame( data, order_by, group_by=group_by, timecontext=adjusted_timecontext, **kwargs, ) source = sorted_df.groupby(grouping_keys, sort=True) post_process = _post_process_group_by_order_by else: source = data.groupby(grouping_keys, sort=False) post_process = _post_process_group_by else: if order_by: source, grouping_keys, ordering_keys = util.compute_sorted_frame( data, order_by, timecontext=adjusted_timecontext, **kwargs) post_process = _post_process_order_by else: source = data post_process = _post_process_empty # Here groupby object should be add to the corresponding node in scope # for execution, data will be overwrite to a groupby object, so we # force an update regardless of time context new_scope = scope.merge_scopes( [ Scope({t: source}, adjusted_timecontext) for t in operand.op().root_tables() ], overwrite=True, ) aggcontext = get_aggcontext( window, scope=scope, operand=operand, parent=source, group_by=grouping_keys, order_by=ordering_keys, **kwargs, ) result = execute( operand, scope=new_scope, timecontext=adjusted_timecontext, aggcontext=aggcontext, clients=clients, **kwargs, ) result = post_process( result, data, ordering_keys, grouping_keys, adjusted_timecontext, ) assert len(data) == len( result ), 'input data source and computed column do not have the same length' # trim data to original time context result = trim_window_result(result, timecontext) return result