def test_is_aggregate_derived(self): columns, aggregates = qc.get_columns_and_aggregates( qc.Operator(qp.And, [ qc.Operator(qp.Equal, [ qe.LineNoColumn(), qc.EvalConstant(42), ]), qc.Operator(qp.Or, [ qc.Operator(qp.Not, [ qc.Operator(qp.Equal, [ qe.DateColumn(), qc.EvalConstant(datetime.date(2014, 1, 1)), ]), ]), qc.EvalConstant(False), ]), ])) self.assertEqual((2, 0), (len(columns), len(aggregates))) columns, aggregates = qc.get_columns_and_aggregates( qc.Operator( qp.And, [ qc.Operator(qp.Equal, [ qe.LineNoColumn(), qc.EvalConstant(42), ]), qc.Operator( qp.Or, [ qc.Operator(qp.Not, [ qc.Operator(qp.Not, [ qc.Operator(qp.Equal, [ qe.DateColumn(), qc.EvalConstant( datetime.date(2014, 1, 1)), ]), ]), ]), # Aggregation node deep in the tree. qe.SumInt([qc.EvalConstant(1)]), ]), ])) self.assertEqual((2, 1), (len(columns), len(aggregates)))
def test_is_aggregate_derived(self): columns, aggregates = qc.get_columns_and_aggregates( qc.EvalAnd( qc.EvalEqual(qe.PositionColumn(), qc.EvalConstant(42)), qc.EvalOr( qc.EvalNot( qc.EvalEqual( qe.DateColumn(), qc.EvalConstant(datetime.date(2014, 1, 1)))), qc.EvalConstant(False)))) self.assertEqual((2, 0), (len(columns), len(aggregates))) columns, aggregates = qc.get_columns_and_aggregates( qc.EvalAnd( qc.EvalEqual(qe.PositionColumn(), qc.EvalConstant(42)), qc.EvalOr( qc.EvalNot( qc.EvalEqual( qe.DateColumn(), qc.EvalConstant(datetime.date(2014, 1, 1)))), # Aggregation node deep in the tree. qe.Sum([qc.EvalConstant(1)])))) self.assertEqual((2, 1), (len(columns), len(aggregates)))
def test_get_columns_and_aggregates(self): # Simple column. c_query = qe.PositionColumn() columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((1, 0), (len(columns), len(aggregates))) self.assertFalse(qc.is_aggregate(c_query)) # Multiple columns. c_query = qc.Operator(qp.And, [qe.PositionColumn(), qe.DateColumn()]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((2, 0), (len(columns), len(aggregates))) self.assertFalse(qc.is_aggregate(c_query)) # Simple aggregate. c_query = qe.SumPosition([qe.PositionColumn()]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((0, 1), (len(columns), len(aggregates))) self.assertTrue(qc.is_aggregate(c_query)) # Multiple aggregates. c_query = qc.Operator( qp.And, [qe.First([qe.DateColumn()]), qe.Last([qe.FlagColumn()])]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((0, 2), (len(columns), len(aggregates))) self.assertTrue(qc.is_aggregate(c_query)) # Simple non-aggregate function. c_query = qe.Function('length', [qe.AccountColumn()]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((1, 0), (len(columns), len(aggregates))) self.assertFalse(qc.is_aggregate(c_query)) # Mix of column and aggregates (this is used to detect this illegal case). c_query = qc.Operator(qp.And, [ qe.Function('length', [qe.AccountColumn()]), qe.SumPosition([qe.PositionColumn()]), ]) columns, aggregates = qc.get_columns_and_aggregates(c_query) self.assertEqual((1, 1), (len(columns), len(aggregates))) self.assertTrue(qc.is_aggregate(c_query))
def execute_select(query, entries, options_map): """Given a compiled select statement, execute the query. Args: query: An instance of a query_compile.Query entries: A list of directives. options_map: A parser's option_map. Returns: A pair of: result_types: A list of (name, data-type) item pairs. result_rows: A list of ResultRow tuples of length and types described by 'result_types'. """ # Figure out the result types that describe what we return. result_types = [Column(target.name, target.c_expr.dtype) for target in query.c_targets if target.name is not None] # Pre-compute lists of the expressions to evaluate. group_indexes = (set(query.group_indexes) if query.group_indexes is not None else query.group_indexes) # Indexes of the columns for result rows and order rows. result_indexes = [index for index, c_target in enumerate(query.c_targets) if c_target.name] order_spec = query.order_spec # Figure out if we need to compute balance. uses_balance = any(uses_balance_column(c_expr) for c_expr in itertools.chain( [c_target.c_expr for c_target in query.c_targets], [query.c_where] if query.c_where else [])) context = create_row_context(entries, options_map) # Filter the entries using the FROM clause. filt_entries = (filter_entries(query.c_from, entries, options_map, context) if query.c_from is not None else entries) # Dispatch between the non-aggregated queries and aggregated queries. c_where = query.c_where rows = [] # Precompute a list of expressions to be evaluated. c_target_exprs = [c_target.c_expr for c_target in query.c_targets] if query.group_indexes is None: # This is a non-aggregated query. # Iterate over all the postings once. for entry in misc_utils.filter_type(filt_entries, data.Transaction): context.entry = entry for posting in entry.postings: context.posting = posting if c_where is None or c_where(context): # Compute the balance. if uses_balance: context.balance.add_position(posting) # Evaluate all the values. values = [c_expr(context) for c_expr in c_target_exprs] rows.append(values) else: # This is an aggregated query. # Precompute lists of non-aggregate and aggregate expressions to # evaluate. For aggregate targets, we hunt down the aggregate # sub-expressions to evaluate, to avoid recursion during iteration. c_nonaggregate_exprs = [] c_aggregate_exprs = [] for index, c_expr in enumerate(c_target_exprs): if index in group_indexes: c_nonaggregate_exprs.append(c_expr) else: _, aggregate_exprs = query_compile.get_columns_and_aggregates(c_expr) c_aggregate_exprs.extend(aggregate_exprs) # Note: it is possible that there are no aggregates to compute here. You could # have all columns be non-aggregates and group-by the entire list of columns. # Pre-allocate handles in aggregation nodes. allocator = Allocator() for c_expr in c_aggregate_exprs: c_expr.allocate(allocator) # Iterate over all the postings to evaluate the aggregates. agg_store = {} for entry in misc_utils.filter_type(filt_entries, data.Transaction): context.entry = entry for posting in entry.postings: context.posting = posting if c_where is None or c_where(context): # Compute the balance. if uses_balance: context.balance.add_position(posting) # Compute the non-aggregate expressions. row_key = tuple(c_expr(context) for c_expr in c_nonaggregate_exprs) # Get an appropriate store for the unique key of this row. try: store = agg_store[row_key] except KeyError: # This is a row; create a new store. store = allocator.create_store() for c_expr in c_aggregate_exprs: c_expr.initialize(store) agg_store[row_key] = store # Update the aggregate expressions. for c_expr in c_aggregate_exprs: c_expr.update(store, context) # Iterate over all the aggregations. for key, store in agg_store.items(): key_iter = iter(key) values = [] # Finalize the store. for c_expr in c_aggregate_exprs: c_expr.finalize(store) context.store = store for index, c_expr in enumerate(c_target_exprs): if index in group_indexes: value = next(key_iter) else: value = c_expr(context) values.append(value) # Skip row if HAVING clause expression is false. if query.having_index is not None: if not values[query.having_index]: continue rows.append(values) # Order results if requested. if order_spec is not None: # Process the order-by clauses grouped by their ordering direction. for reverse, spec in itertools.groupby(reversed(order_spec), key=operator.itemgetter(1)): indexes = reversed([i[0] for i in spec]) # The rows may contain None values: nullitemgetter() # replaces these with a special value that compares # smaller than anything else. rows.sort(key=nullitemgetter(*indexes), reverse=reverse) # Convert results into list of tuples. rows = [tuple(row[i] for i in result_indexes) for row in rows] # Apply distinct. if query.distinct: rows = list(misc_utils.uniquify(rows)) # Apply limit. if query.limit is not None: rows = rows[:query.limit] return result_types, rows
def execute_query(query, entries, options_map): """Given a compiled select statement, execute the query. Args: query: An instance of a query_compile.Query entries: A list of directives. options_map: A parser's option_map. Returns: A pair of: result_types: A list of (name, data-type) item pairs. result_rows: A list of ResultRow tuples of length and types described by 'result_types'. """ # Figure out the result types that describe what we return. result_types = [(target.name, target.c_expr.dtype) for target in query.c_targets if target.name is not None] # Create a class for each final result. # pylint: disable=invalid-name ResultRow = collections.namedtuple('ResultRow', [target.name for target in query.c_targets if target.name is not None]) # Pre-compute lists of the expressions to evaluate. group_indexes = (set(query.group_indexes) if query.group_indexes is not None else query.group_indexes) # Indexes of the columns for result rows and order rows. result_indexes = [index for index, c_target in enumerate(query.c_targets) if c_target.name] order_indexes = query.order_indexes # Figure out if we need to compute balance. uses_balance = any(uses_balance_column(c_expr) for c_expr in itertools.chain( [c_target.c_expr for c_target in query.c_targets], [query.c_where] if query.c_where else [])) context = create_row_context(entries, options_map) # Filter the entries using the FROM clause. filt_entries = (filter_entries(query.c_from, entries, options_map, context) if query.c_from is not None else entries) # Dispatch between the non-aggregated queries and aggregated queries. c_where = query.c_where schwartz_rows = [] # Precompute a list of expressions to be evaluated. c_target_exprs = [c_target.c_expr for c_target in query.c_targets] if query.group_indexes is None: # This is a non-aggregated query. # Iterate over all the postings once and produce schwartzian rows. for entry in misc_utils.filter_type(filt_entries, data.Transaction): context.entry = entry for posting in entry.postings: context.posting = posting if c_where is None or c_where(context): # Compute the balance. if uses_balance: context.balance.add_position(posting) # Evaluate all the values. values = [c_expr(context) for c_expr in c_target_exprs] # Compute result and sort-key objects. result = ResultRow._make(values[index] for index in result_indexes) sortkey = row_sortkey(order_indexes, values, c_target_exprs) schwartz_rows.append((sortkey, result)) else: # This is an aggregated query. # Precompute lists of non-aggregate and aggregate expressions to # evaluate. For aggregate targets, we hunt down the aggregate # sub-expressions to evaluate, to avoid recursion during iteration. c_nonaggregate_exprs = [] c_aggregate_exprs = [] for index, c_expr in enumerate(c_target_exprs): if index in group_indexes: c_nonaggregate_exprs.append(c_expr) else: _, aggregate_exprs = query_compile.get_columns_and_aggregates(c_expr) c_aggregate_exprs.extend(aggregate_exprs) # Note: it is possible that there are no aggregates to compute here. You could # have all columns be non-aggregates and group-by the entire list of columns. # Pre-allocate handles in aggregation nodes. allocator = Allocator() for c_expr in c_aggregate_exprs: c_expr.allocate(allocator) # Iterate over all the postings to evaluate the aggregates. agg_store = {} for entry in misc_utils.filter_type(filt_entries, data.Transaction): context.entry = entry for posting in entry.postings: context.posting = posting if c_where is None or c_where(context): # Compute the balance. if uses_balance: context.balance.add_position(posting) # Compute the non-aggregate expressions. row_key = tuple(c_expr(context) for c_expr in c_nonaggregate_exprs) # Get an appropriate store for the unique key of this row. try: store = agg_store[row_key] except KeyError: # This is a row; create a new store. store = allocator.create_store() for c_expr in c_aggregate_exprs: c_expr.initialize(store) agg_store[row_key] = store # Update the aggregate expressions. for c_expr in c_aggregate_exprs: c_expr.update(store, context) # Iterate over all the aggregations to produce the schwartzian rows. for key, store in agg_store.items(): key_iter = iter(key) values = [] # Finalize the store. for c_expr in c_aggregate_exprs: c_expr.finalize(store) context.store = store for index, c_expr in enumerate(c_target_exprs): if index in group_indexes: value = next(key_iter) else: value = c_expr(context) values.append(value) # Compute result and sort-key objects. result = ResultRow._make(values[index] for index in result_indexes) sortkey = row_sortkey(order_indexes, values, c_target_exprs) schwartz_rows.append((sortkey, result)) # Order results if requested. if order_indexes is not None: schwartz_rows.sort(key=operator.itemgetter(0), reverse=(query.ordering == 'DESC')) # Extract final results, in sorted order at this point. result_rows = [x[1] for x in schwartz_rows] # Apply distinct. if query.distinct: result_rows = list(misc_utils.uniquify(result_rows)) # Apply limit. if query.limit is not None: result_rows = result_rows[:query.limit] # Flatten inventories if requested. if query.flatten: result_types, result_rows = flatten_results(result_types, result_rows) return (result_types, result_rows)