def get_aggcontext_window( window, *, scope, operand, operand_dtype, parent, group_by, order_by, **kwargs, ) -> AggregationContext: # no order by or group by: default summarization aggcontext # # if we're reducing and we have an order by expression then we need to # expand or roll. # # otherwise we're transforming if not group_by and not order_by: aggcontext = agg_ctx.Summarize() elif ( isinstance( operand.op(), (ops.Reduction, ops.CumulativeOp, ops.Any, ops.All) ) and order_by ): # XXX(phillipc): What a horror show preceding = window.preceding if preceding is not None: max_lookback = window.max_lookback assert not isinstance(operand.op(), ops.CumulativeOp) aggcontext = agg_ctx.Moving( preceding, max_lookback, parent=parent, group_by=group_by, order_by=order_by, dtype=operand_dtype, ) else: # expanding window aggcontext = agg_ctx.Cumulative( parent=parent, group_by=group_by, order_by=order_by, dtype=operand_dtype, ) else: # groupby transform (window with a partition by clause in SQL parlance) aggcontext = agg_ctx.Transform( parent=parent, group_by=group_by, order_by=order_by, dtype=operand_dtype, ) return aggcontext
def execute_window_op(op, data, window, scope=None, aggcontext=None, clients=None, **kwargs): operand = op.expr # pre execute "manually" here because otherwise we wouldn't pickup # relevant scope changes from the child operand since we're managing # execution of that by hand operand_op = operand.op() pre_executed_scope = pre_execute(operand_op, *clients, scope=scope, aggcontext=aggcontext, **kwargs) scope = toolz.merge(scope, pre_executed_scope) root, = op.root_tables() root_expr = root.to_expr() data = execute( root_expr, scope=scope, clients=clients, aggcontext=aggcontext, **kwargs, ) following = window.following order_by = window._order_by if (order_by and following != 0 and not isinstance(operand_op, ops.ShiftBase)): raise com.OperationNotDefinedError( 'Window functions affected by following with order_by are not ' 'implemented') group_by = window._group_by grouping_keys = [ key_op.name if isinstance(key_op, ops.TableColumn) else execute( key, aggcontext=aggcontext, **kwargs) for key, key_op in zip( group_by, map(operator.methodcaller('op'), group_by)) ] order_by = window._order_by if not order_by: ordering_keys = () if group_by: if order_by: ( sorted_df, grouping_keys, ordering_keys, ) = util.compute_sorted_frame(data, order_by, group_by=group_by, **kwargs) source = sorted_df.groupby(grouping_keys, sort=True) post_process = _post_process_group_by_order_by else: source = data.groupby(grouping_keys, sort=False) post_process = _post_process_group_by else: if order_by: source, grouping_keys, ordering_keys = util.compute_sorted_frame( data, order_by, **kwargs) post_process = _post_process_order_by else: source = data post_process = _post_process_empty new_scope = toolz.merge( scope, OrderedDict((t, source) for t in operand.op().root_tables()), factory=OrderedDict, ) # figure out what the dtype of the operand is operand_type = operand.type() operand_dtype = operand_type.to_pandas() # no order by or group by: default summarization aggcontext # # if we're reducing and we have an order by expression then we need to # expand or roll. # # otherwise we're transforming if not grouping_keys and not ordering_keys: aggcontext = agg_ctx.Summarize() elif (isinstance(operand.op(), (ops.Reduction, ops.CumulativeOp, ops.Any, ops.All)) and ordering_keys): # XXX(phillipc): What a horror show preceding = window.preceding if preceding is not None: max_lookback = window.max_lookback assert not isinstance(operand.op(), ops.CumulativeOp) aggcontext = agg_ctx.Moving( preceding, max_lookback, parent=source, group_by=grouping_keys, order_by=ordering_keys, dtype=operand_dtype, ) else: # expanding window aggcontext = agg_ctx.Cumulative( parent=source, group_by=grouping_keys, order_by=ordering_keys, dtype=operand_dtype, ) else: # groupby transform (window with a partition by clause in SQL parlance) aggcontext = agg_ctx.Transform( parent=source, group_by=grouping_keys, order_by=ordering_keys, dtype=operand_dtype, ) result = execute( operand, scope=new_scope, aggcontext=aggcontext, clients=clients, **kwargs, ) series = post_process(result, data, ordering_keys, grouping_keys) assert len(data) == len( series ), 'input data source and computed column do not have the same length' return series
def execute_frame_window_op(op, data, scope=None, context=None, **kwargs): operand, window = op.args following = window.following order_by = window._order_by if order_by and following != 0: raise ValueError( 'Following with a value other than 0 (current row) with order_by ' 'is not yet implemented in the pandas backend. Use ' 'ibis.trailing_window or ibis.cumulative_window to ' 'construct windows when using the pandas backend.') group_by = window._group_by grouping_keys = [ key_op.name if isinstance(key_op, ir.TableColumn) else execute( key, context=context, **kwargs) for key, key_op in zip( group_by, map(operator.methodcaller('op'), group_by)) ] order_by = window._order_by if grouping_keys: source = data.groupby(grouping_keys, sort=False, as_index=not order_by) if order_by: sorted_df = source.apply( lambda df, order_by=order_by, kwargs=kwargs: (util.compute_sorted_frame(order_by, df, **kwargs))) source = sorted_df.groupby(grouping_keys, sort=False) post_process = _post_process_group_by_order_by else: post_process = _post_process_group_by else: if order_by: source = util.compute_sorted_frame(order_by, data, **kwargs) post_process = _post_process_order_by else: source = data post_process = _post_process_empty new_scope = toolz.merge( scope, OrderedDict((t, source) for t in operand.op().root_tables()), factory=OrderedDict, ) # no order by or group by: default summarization context # # if we're reducing and we have an order by expression then we need to # expand or roll. # # otherwise we're transforming if not grouping_keys and not order_by: context = agg_ctx.Summarize() elif isinstance(operand.op(), ops.Reduction) and order_by: preceding = window.preceding if preceding is not None: context = agg_ctx.Trailing(preceding) else: context = agg_ctx.Cumulative() else: context = agg_ctx.Transform() result = execute(operand, new_scope, context=context, **kwargs) series = post_process(result, data.index) assert len(data) == len(series), \ 'input data source and computed column do not have the same length' return series