Exemplo n.º 1
0
    def test_is_aggregate_derived(self):
        columns, aggregates = qc.get_columns_and_aggregates(
            qc.Operator(qp.And, [
                qc.Operator(qp.Equal, [
                    qe.LineNoColumn(),
                    qc.EvalConstant(42),
                ]),
                qc.Operator(qp.Or, [
                    qc.Operator(qp.Not, [
                        qc.Operator(qp.Equal, [
                            qe.DateColumn(),
                            qc.EvalConstant(datetime.date(2014, 1, 1)),
                        ]),
                    ]),
                    qc.EvalConstant(False),
                ]),
            ]))
        self.assertEqual((2, 0), (len(columns), len(aggregates)))

        columns, aggregates = qc.get_columns_and_aggregates(
            qc.Operator(
                qp.And,
                [
                    qc.Operator(qp.Equal, [
                        qe.LineNoColumn(),
                        qc.EvalConstant(42),
                    ]),
                    qc.Operator(
                        qp.Or,
                        [
                            qc.Operator(qp.Not, [
                                qc.Operator(qp.Not, [
                                    qc.Operator(qp.Equal, [
                                        qe.DateColumn(),
                                        qc.EvalConstant(
                                            datetime.date(2014, 1, 1)),
                                    ]),
                                ]),
                            ]),
                            # Aggregation node deep in the tree.
                            qe.SumInt([qc.EvalConstant(1)]),
                        ]),
                ]))
        self.assertEqual((2, 1), (len(columns), len(aggregates)))
Exemplo n.º 2
0
    def test_is_aggregate_derived(self):
        columns, aggregates = qc.get_columns_and_aggregates(
            qc.EvalAnd(
                qc.EvalEqual(qe.PositionColumn(), qc.EvalConstant(42)),
                qc.EvalOr(
                    qc.EvalNot(
                        qc.EvalEqual(
                            qe.DateColumn(),
                            qc.EvalConstant(datetime.date(2014, 1, 1)))),
                    qc.EvalConstant(False))))
        self.assertEqual((2, 0), (len(columns), len(aggregates)))

        columns, aggregates = qc.get_columns_and_aggregates(
            qc.EvalAnd(
                qc.EvalEqual(qe.PositionColumn(), qc.EvalConstant(42)),
                qc.EvalOr(
                    qc.EvalNot(
                        qc.EvalEqual(
                            qe.DateColumn(),
                            qc.EvalConstant(datetime.date(2014, 1, 1)))),
                    # Aggregation node deep in the tree.
                    qe.Sum([qc.EvalConstant(1)]))))
        self.assertEqual((2, 1), (len(columns), len(aggregates)))
Exemplo n.º 3
0
    def test_get_columns_and_aggregates(self):
        # Simple column.
        c_query = qe.PositionColumn()
        columns, aggregates = qc.get_columns_and_aggregates(c_query)
        self.assertEqual((1, 0), (len(columns), len(aggregates)))
        self.assertFalse(qc.is_aggregate(c_query))

        # Multiple columns.
        c_query = qc.Operator(qp.And, [qe.PositionColumn(), qe.DateColumn()])
        columns, aggregates = qc.get_columns_and_aggregates(c_query)
        self.assertEqual((2, 0), (len(columns), len(aggregates)))
        self.assertFalse(qc.is_aggregate(c_query))

        # Simple aggregate.
        c_query = qe.SumPosition([qe.PositionColumn()])
        columns, aggregates = qc.get_columns_and_aggregates(c_query)
        self.assertEqual((0, 1), (len(columns), len(aggregates)))
        self.assertTrue(qc.is_aggregate(c_query))

        # Multiple aggregates.
        c_query = qc.Operator(
            qp.And, [qe.First([qe.DateColumn()]),
                     qe.Last([qe.FlagColumn()])])
        columns, aggregates = qc.get_columns_and_aggregates(c_query)
        self.assertEqual((0, 2), (len(columns), len(aggregates)))
        self.assertTrue(qc.is_aggregate(c_query))

        # Simple non-aggregate function.
        c_query = qe.Function('length', [qe.AccountColumn()])
        columns, aggregates = qc.get_columns_and_aggregates(c_query)
        self.assertEqual((1, 0), (len(columns), len(aggregates)))
        self.assertFalse(qc.is_aggregate(c_query))

        # Mix of column and aggregates (this is used to detect this illegal case).
        c_query = qc.Operator(qp.And, [
            qe.Function('length', [qe.AccountColumn()]),
            qe.SumPosition([qe.PositionColumn()]),
        ])
        columns, aggregates = qc.get_columns_and_aggregates(c_query)
        self.assertEqual((1, 1), (len(columns), len(aggregates)))
        self.assertTrue(qc.is_aggregate(c_query))
Exemplo n.º 4
0
def execute_select(query, entries, options_map):
    """Given a compiled select statement, execute the query.

    Args:
      query: An instance of a query_compile.Query
      entries: A list of directives.
      options_map: A parser's option_map.
    Returns:
      A pair of:
        result_types: A list of (name, data-type) item pairs.
        result_rows: A list of ResultRow tuples of length and types described by
          'result_types'.
    """
    # Figure out the result types that describe what we return.
    result_types = [Column(target.name, target.c_expr.dtype)
                    for target in query.c_targets
                    if target.name is not None]

    # Pre-compute lists of the expressions to evaluate.
    group_indexes = (set(query.group_indexes)
                     if query.group_indexes is not None
                     else query.group_indexes)

    # Indexes of the columns for result rows and order rows.
    result_indexes = [index
                      for index, c_target in enumerate(query.c_targets)
                      if c_target.name]
    order_spec = query.order_spec

    # Figure out if we need to compute balance.
    uses_balance = any(uses_balance_column(c_expr)
                       for c_expr in itertools.chain(
                               [c_target.c_expr for c_target in query.c_targets],
                               [query.c_where] if query.c_where else []))

    context = create_row_context(entries, options_map)

    # Filter the entries using the FROM clause.
    filt_entries = (filter_entries(query.c_from, entries, options_map, context)
                    if query.c_from is not None else
                    entries)

    # Dispatch between the non-aggregated queries and aggregated queries.
    c_where = query.c_where
    rows = []

    # Precompute a list of expressions to be evaluated.
    c_target_exprs = [c_target.c_expr for c_target in query.c_targets]

    if query.group_indexes is None:
        # This is a non-aggregated query.

        # Iterate over all the postings once.
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Evaluate all the values.
                    values = [c_expr(context) for c_expr in c_target_exprs]

                    rows.append(values)
    else:
        # This is an aggregated query.

        # Precompute lists of non-aggregate and aggregate expressions to
        # evaluate. For aggregate targets, we hunt down the aggregate
        # sub-expressions to evaluate, to avoid recursion during iteration.
        c_nonaggregate_exprs = []
        c_aggregate_exprs = []
        for index, c_expr in enumerate(c_target_exprs):
            if index in group_indexes:
                c_nonaggregate_exprs.append(c_expr)
            else:
                _, aggregate_exprs = query_compile.get_columns_and_aggregates(c_expr)
                c_aggregate_exprs.extend(aggregate_exprs)
        # Note: it is possible that there are no aggregates to compute here. You could
        # have all columns be non-aggregates and group-by the entire list of columns.

        # Pre-allocate handles in aggregation nodes.
        allocator = Allocator()
        for c_expr in c_aggregate_exprs:
            c_expr.allocate(allocator)

        # Iterate over all the postings to evaluate the aggregates.
        agg_store = {}
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Compute the non-aggregate expressions.
                    row_key = tuple(c_expr(context)
                                    for c_expr in c_nonaggregate_exprs)

                    # Get an appropriate store for the unique key of this row.
                    try:
                        store = agg_store[row_key]
                    except KeyError:
                        # This is a row; create a new store.
                        store = allocator.create_store()
                        for c_expr in c_aggregate_exprs:
                            c_expr.initialize(store)
                        agg_store[row_key] = store

                    # Update the aggregate expressions.
                    for c_expr in c_aggregate_exprs:
                        c_expr.update(store, context)

        # Iterate over all the aggregations.
        for key, store in agg_store.items():
            key_iter = iter(key)
            values = []

            # Finalize the store.
            for c_expr in c_aggregate_exprs:
                c_expr.finalize(store)
            context.store = store

            for index, c_expr in enumerate(c_target_exprs):
                if index in group_indexes:
                    value = next(key_iter)
                else:
                    value = c_expr(context)
                values.append(value)

            # Skip row if HAVING clause expression is false.
            if query.having_index is not None:
                if not values[query.having_index]:
                    continue

            rows.append(values)

    # Order results if requested.
    if order_spec is not None:
        # Process the order-by clauses grouped by their ordering direction.
        for reverse, spec in itertools.groupby(reversed(order_spec), key=operator.itemgetter(1)):
            indexes = reversed([i[0] for i in spec])
            # The rows may contain None values: nullitemgetter()
            # replaces these with a special value that compares
            # smaller than anything else.
            rows.sort(key=nullitemgetter(*indexes), reverse=reverse)

    # Convert results into list of tuples.
    rows = [tuple(row[i] for i in result_indexes) for row in rows]

    # Apply distinct.
    if query.distinct:
        rows = list(misc_utils.uniquify(rows))

    # Apply limit.
    if query.limit is not None:
        rows = rows[:query.limit]

    return result_types, rows
Exemplo n.º 5
0
def execute_query(query, entries, options_map):
    """Given a compiled select statement, execute the query.

    Args:
      query: An instance of a query_compile.Query
      entries: A list of directives.
      options_map: A parser's option_map.
    Returns:
      A pair of:
        result_types: A list of (name, data-type) item pairs.
        result_rows: A list of ResultRow tuples of length and types described by
          'result_types'.
    """
    # Figure out the result types that describe what we return.
    result_types = [(target.name, target.c_expr.dtype)
                    for target in query.c_targets
                    if target.name is not None]

    # Create a class for each final result.
    # pylint: disable=invalid-name
    ResultRow = collections.namedtuple('ResultRow',
                                       [target.name
                                        for target in query.c_targets
                                        if target.name is not None])

    # Pre-compute lists of the expressions to evaluate.
    group_indexes = (set(query.group_indexes)
                     if query.group_indexes is not None
                     else query.group_indexes)

    # Indexes of the columns for result rows and order rows.
    result_indexes = [index
                      for index, c_target in enumerate(query.c_targets)
                      if c_target.name]
    order_indexes = query.order_indexes

    # Figure out if we need to compute balance.
    uses_balance = any(uses_balance_column(c_expr)
                       for c_expr in itertools.chain(
                               [c_target.c_expr for c_target in query.c_targets],
                               [query.c_where] if query.c_where else []))

    context = create_row_context(entries, options_map)

    # Filter the entries using the FROM clause.
    filt_entries = (filter_entries(query.c_from, entries, options_map, context)
                    if query.c_from is not None else
                    entries)

    # Dispatch between the non-aggregated queries and aggregated queries.
    c_where = query.c_where
    schwartz_rows = []

    # Precompute a list of expressions to be evaluated.
    c_target_exprs = [c_target.c_expr for c_target in query.c_targets]

    if query.group_indexes is None:
        # This is a non-aggregated query.

        # Iterate over all the postings once and produce schwartzian rows.
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Evaluate all the values.
                    values = [c_expr(context) for c_expr in c_target_exprs]

                    # Compute result and sort-key objects.
                    result = ResultRow._make(values[index]
                                             for index in result_indexes)
                    sortkey = row_sortkey(order_indexes, values, c_target_exprs)
                    schwartz_rows.append((sortkey, result))
    else:
        # This is an aggregated query.

        # Precompute lists of non-aggregate and aggregate expressions to
        # evaluate. For aggregate targets, we hunt down the aggregate
        # sub-expressions to evaluate, to avoid recursion during iteration.
        c_nonaggregate_exprs = []
        c_aggregate_exprs = []
        for index, c_expr in enumerate(c_target_exprs):
            if index in group_indexes:
                c_nonaggregate_exprs.append(c_expr)
            else:
                _, aggregate_exprs = query_compile.get_columns_and_aggregates(c_expr)
                c_aggregate_exprs.extend(aggregate_exprs)
        # Note: it is possible that there are no aggregates to compute here. You could
        # have all columns be non-aggregates and group-by the entire list of columns.

        # Pre-allocate handles in aggregation nodes.
        allocator = Allocator()
        for c_expr in c_aggregate_exprs:
            c_expr.allocate(allocator)

        # Iterate over all the postings to evaluate the aggregates.
        agg_store = {}
        for entry in misc_utils.filter_type(filt_entries, data.Transaction):
            context.entry = entry
            for posting in entry.postings:
                context.posting = posting
                if c_where is None or c_where(context):
                    # Compute the balance.
                    if uses_balance:
                        context.balance.add_position(posting)

                    # Compute the non-aggregate expressions.
                    row_key = tuple(c_expr(context)
                                    for c_expr in c_nonaggregate_exprs)

                    # Get an appropriate store for the unique key of this row.
                    try:
                        store = agg_store[row_key]
                    except KeyError:
                        # This is a row; create a new store.
                        store = allocator.create_store()
                        for c_expr in c_aggregate_exprs:
                            c_expr.initialize(store)
                        agg_store[row_key] = store

                    # Update the aggregate expressions.
                    for c_expr in c_aggregate_exprs:
                        c_expr.update(store, context)

        # Iterate over all the aggregations to produce the schwartzian rows.
        for key, store in agg_store.items():
            key_iter = iter(key)
            values = []

            # Finalize the store.
            for c_expr in c_aggregate_exprs:
                c_expr.finalize(store)
            context.store = store

            for index, c_expr in enumerate(c_target_exprs):
                if index in group_indexes:
                    value = next(key_iter)
                else:
                    value = c_expr(context)
                values.append(value)

            # Compute result and sort-key objects.
            result = ResultRow._make(values[index]
                                     for index in result_indexes)
            sortkey = row_sortkey(order_indexes, values, c_target_exprs)
            schwartz_rows.append((sortkey, result))

    # Order results if requested.
    if order_indexes is not None:
        schwartz_rows.sort(key=operator.itemgetter(0),
                           reverse=(query.ordering == 'DESC'))

    # Extract final results, in sorted order at this point.
    result_rows = [x[1] for x in schwartz_rows]

    # Apply distinct.
    if query.distinct:
        result_rows = list(misc_utils.uniquify(result_rows))

    # Apply limit.
    if query.limit is not None:
        result_rows = result_rows[:query.limit]

    # Flatten inventories if requested.
    if query.flatten:
        result_types, result_rows = flatten_results(result_types, result_rows)

    return (result_types, result_rows)