Exemplo n.º 1
0
    def eval_table_Join(self, table_expr):
        base_context = self.evaluate_table_expr(table_expr.base)
        rhs_tables, join_types = zip(*table_expr.tables)
        other_contexts = map(self.evaluate_table_expr, rhs_tables)

        lhs_context = base_context

        for rhs_context, join_type, conditions in zip(other_contexts,
                                                      join_types,
                                                      table_expr.conditions):

            if join_type is tq_ast.JoinType.CROSS:
                lhs_context = context.cross_join_contexts(
                    lhs_context, rhs_context)
                continue

            # We reordered the join conditions in the compilation step, so
            # column1 always refers to the lhs of the current join.
            lhs_key_refs = [cond.column1 for cond in conditions]
            rhs_key_refs = [cond.column2 for cond in conditions]
            rhs_key_contexts = {}
            for i in xrange(rhs_context.num_rows):
                rhs_key = self.get_join_key(rhs_context, rhs_key_refs, i)
                if rhs_key not in rhs_key_contexts:
                    rhs_key_contexts[rhs_key] = (
                        context.empty_context_from_template(rhs_context))
                context.append_row_to_context(
                    src_context=rhs_context,
                    index=i,
                    dest_context=rhs_key_contexts[rhs_key])

            result_context = context.cross_join_contexts(
                context.empty_context_from_template(lhs_context),
                context.empty_context_from_template(rhs_context))

            for i in xrange(lhs_context.num_rows):
                lhs_key = self.get_join_key(lhs_context, lhs_key_refs, i)
                lhs_row_context = context.row_context_from_context(
                    lhs_context, i)
                if lhs_key in rhs_key_contexts:
                    new_rows = context.cross_join_contexts(
                        lhs_row_context, rhs_key_contexts[lhs_key])
                    context.append_context_to_context(new_rows, result_context)
                elif join_type is tq_ast.JoinType.LEFT_OUTER:
                    # For a left outer join, we still want to in a row with
                    # nulls on the right.
                    context.append_context_to_context(lhs_row_context,
                                                      result_context)
            lhs_context = result_context

        return lhs_context
Exemplo n.º 2
0
    def eval_table_Join(self, table_expr):
        base_context = self.evaluate_table_expr(table_expr.base)
        rhs_tables, join_types = zip(*table_expr.tables)
        other_contexts = map(self.evaluate_table_expr, rhs_tables)

        lhs_context = base_context

        for rhs_context, join_type, conditions in zip(other_contexts,
                                                      join_types,
                                                      table_expr.conditions):

            if join_type is tq_ast.JoinType.CROSS:
                lhs_context = context.cross_join_contexts(
                    lhs_context, rhs_context)
                continue

            # We reordered the join conditions in the compilation step, so
            # column1 always refers to the lhs of the current join.
            lhs_key_refs = [cond.column1 for cond in conditions]
            rhs_key_refs = [cond.column2 for cond in conditions]
            rhs_key_contexts = {}
            for i in xrange(rhs_context.num_rows):
                rhs_key = self.get_join_key(rhs_context, rhs_key_refs, i)
                if rhs_key not in rhs_key_contexts:
                    rhs_key_contexts[rhs_key] = (
                        context.empty_context_from_template(rhs_context))
                context.append_row_to_context(
                    src_context=rhs_context, index=i,
                    dest_context=rhs_key_contexts[rhs_key])

            result_context = context.cross_join_contexts(
                context.empty_context_from_template(lhs_context),
                context.empty_context_from_template(rhs_context))

            for i in xrange(lhs_context.num_rows):
                lhs_key = self.get_join_key(lhs_context, lhs_key_refs, i)
                lhs_row_context = context.row_context_from_context(
                    lhs_context, i)
                if lhs_key in rhs_key_contexts:
                    new_rows = context.cross_join_contexts(
                        lhs_row_context, rhs_key_contexts[lhs_key])
                    context.append_context_to_context(new_rows, result_context)
                elif join_type is tq_ast.JoinType.LEFT_OUTER:
                    # For a left outer join, we still want to in a row with
                    # nulls on the right.
                    context.append_context_to_context(lhs_row_context,
                                                      result_context)
            lhs_context = result_context

        return lhs_context
Exemplo n.º 3
0
    def eval_table_Join(self, table_expr):
        result_context_1 = self.evaluate_table_expr(table_expr.table1)
        result_context_2 = self.evaluate_table_expr(table_expr.table2)

        table_1_key_refs = [cond.column1 for cond in table_expr.conditions]
        table_2_key_refs = [cond.column2 for cond in table_expr.conditions]

        # Build a map from table 2 key to value.
        table_2_key_contexts = {}
        for i in xrange(result_context_2.num_rows):
            key = self.get_join_key(result_context_2, table_2_key_refs, i)
            if key not in table_2_key_contexts:
                new_group_context = context.empty_context_from_template(
                    result_context_2)
                table_2_key_contexts[key] = new_group_context
            context.append_row_to_context(
                src_context=result_context_2,
                index=i,
                dest_context=table_2_key_contexts[key])

        result_context = context.cross_join_contexts(
            context.empty_context_from_template(result_context_1),
            context.empty_context_from_template(result_context_2),
        )
        for i in xrange(result_context_1.num_rows):
            key = self.get_join_key(result_context_1, table_1_key_refs, i)
            if key not in table_2_key_contexts:
                # Left outer join means that if we didn't find something, we
                # still put in a row with nulls on the right.
                if table_expr.is_left_outer:
                    row_context = context.row_context_from_context(
                        result_context_1, i)
                    context.append_context_to_context(row_context,
                                                      result_context)
                continue
            row_context = context.row_context_from_context(result_context_1, i)
            new_rows = context.cross_join_contexts(row_context,
                                                   table_2_key_contexts[key])
            context.append_context_to_context(new_rows, result_context)

        return result_context
Exemplo n.º 4
0
    def eval_table_Join(self, table_expr):
        result_context_1 = self.evaluate_table_expr(table_expr.table1)
        result_context_2 = self.evaluate_table_expr(table_expr.table2)

        table_1_key_refs = [cond.column1 for cond in table_expr.conditions]
        table_2_key_refs = [cond.column2 for cond in table_expr.conditions]

        # Build a map from table 2 key to value.
        table_2_key_contexts = {}
        for i in xrange(result_context_2.num_rows):
            key = self.get_join_key(result_context_2, table_2_key_refs, i)
            if key not in table_2_key_contexts:
                new_group_context = context.empty_context_from_template(
                    result_context_2)
                table_2_key_contexts[key] = new_group_context
            context.append_row_to_context(
                src_context=result_context_2, index=i,
                dest_context=table_2_key_contexts[key])

        result_context = context.cross_join_contexts(
            context.empty_context_from_template(result_context_1),
            context.empty_context_from_template(result_context_2),
        )
        for i in xrange(result_context_1.num_rows):
            key = self.get_join_key(result_context_1, table_1_key_refs, i)
            if key not in table_2_key_contexts:
                # Left outer join means that if we didn't find something, we
                # still put in a row with nulls on the right.
                if table_expr.is_left_outer:
                    row_context = context.row_context_from_context(
                        result_context_1, i)
                    context.append_context_to_context(row_context,
                                                      result_context)
                continue
            row_context = context.row_context_from_context(result_context_1, i)
            new_rows = context.cross_join_contexts(row_context,
                                                   table_2_key_contexts[key])
            context.append_context_to_context(new_rows, result_context)

        return result_context
Exemplo n.º 5
0
    def evaluate_within(self, select_fields, group_set, ctx, within_clause):
        """Evaluate a list of select fields, one of which has a WITHIN or
        WITHIN RECORD clause and/or grouping by some of the values.

        Arguments:
            select_fields: A list of SelectField instances to evaluate.
            group_set: The groups (either fields in select_context or aliases
                referring to an element of select_fields) to group by.
            ctx: The "source" context that the expressions can access when
                being evaluated.

        Returns:
            A context with the results.
        """
        if within_clause == "RECORD":
            # Add an extra column of row number over which the grouping
            # will be done.
            ctx_with_primary_key = context.empty_context_from_template(ctx)
            context.append_context_to_context(ctx, ctx_with_primary_key)

            (table_name, _), _ = ctx_with_primary_key.columns.items()[0]
            row_nums = range(1, ctx_with_primary_key.num_rows + 1)
            row_nums_col = context.Column(type=tq_types.INT,
                                          mode=tq_modes.NULLABLE,
                                          values=row_nums)
            ctx_with_primary_key.columns[(
                table_name, 'row_numbers_column_primary_key')] = row_nums_col
            group_set.field_groups.append(
                typed_ast.ColumnRef(table_name,
                                    'row_numbers_column_primary_key',
                                    tq_types.INT))
            if len(select_fields) > 1:
                # TODO: Implement WITHIN RECORD when one or more of the
                # selected fields (except the one in the WITHIN RECORD
                # clause) has mode = REPEATED.
                for select_field in select_fields:
                    if select_field.within_clause is None:
                        if select_field.expr.mode != tq_modes.REPEATED:
                            group_set.alias_groups.add(select_field.alias)
                        else:
                            raise NotImplementedError(
                                'Cannot select fields having mode=REPEATED '
                                'for queries involving WITHIN RECORD')
        # TODO: Implement for WITHIN clause
        typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), [])
        return self.evaluate_groups(select_fields, group_set,
                                    ctx_with_primary_key)
Exemplo n.º 6
0
    def evaluate_groups(self, select_fields, group_set, select_context):
        """Evaluate a list of select fields, grouping by some of the values.

        Arguments:
            select_fields: A list of SelectField instances to evaluate.
            group_set: The groups (either fields in select_context or aliases
                referring to an element of select_fields) to group by.
            select_context: A context with the data that the select statement
                has access to.

        Returns:
            A context with the results.
        """
        # TODO: Implement GROUP BY for repeated fields.
        field_groups = group_set.field_groups
        alias_groups = group_set.alias_groups
        alias_group_list = sorted(alias_groups)

        group_key_select_fields = [
            f for f in select_fields if f.alias in alias_groups
        ]
        aggregate_select_fields = [
            f for f in select_fields if f.alias not in alias_groups
        ]

        alias_group_result_context = self.evaluate_select_fields(
            group_key_select_fields, select_context)

        # Dictionary mapping (singleton) group key context to the context of
        # values for that key.
        from collections import OrderedDict
        group_contexts = OrderedDict()

        # As a special case, we check if we are grouping by nothing (in other
        # words, if the query had an aggregate without any explicit GROUP BY).
        # Normally, it's fine to just use the trivial group set: every row maps
        # to the empty tuple, so we have a single aggregation over the entire
        # table. However, if the table is empty, we still want to aggregate
        # over the empty table and return a single row, so this is the one case
        # where it's possible to have a group with no rows in it. To make this
        # case work, we ensure that the trivial group key (the empty tuple)
        # always shows up for the TRIVIAL_GROUP_SET case.
        # In the long run, it might be cleaner to view TRIVIAL_GROUP_SET as a
        # completely separate case, but this approach should work.
        if group_set == typed_ast.TRIVIAL_GROUP_SET:
            trivial_ctx = context.Context(1, collections.OrderedDict(), None)
            group_contexts[trivial_ctx] = (
                context.empty_context_from_template(select_context))

        # TODO: Seems pretty ugly and wasteful to use a whole context as a
        # group key.
        for i in xrange(select_context.num_rows):
            key = self.get_group_key(field_groups, alias_group_list,
                                     select_context,
                                     alias_group_result_context, i)
            if key not in group_contexts:
                new_group_context = context.empty_context_from_template(
                    select_context)
                group_contexts[key] = new_group_context
            group_context = group_contexts[key]
            context.append_row_to_context(src_context=select_context,
                                          index=i,
                                          dest_context=group_context)

        result_context = self.empty_context_from_select_fields(select_fields)
        result_col_names = [field.alias for field in select_fields]
        for context_key, group_context in group_contexts.iteritems():
            group_eval_context = context.Context(1, context_key.columns,
                                                 group_context)
            group_aggregate_result_context = self.evaluate_select_fields(
                aggregate_select_fields, group_eval_context)
            full_result_row_context = self.merge_contexts_for_select_fields(
                result_col_names, group_aggregate_result_context, context_key)
            context.append_row_to_context(full_result_row_context, 0,
                                          result_context)
        return result_context
Exemplo n.º 7
0
    def evaluate_groups(self, select_fields, group_set, select_context):
        """Evaluate a list of select fields, grouping by some of the values.

        Arguments:
            select_fields: A list of SelectField instances to evaluate.
            group_set: The groups (either fields in select_context or aliases
                referring to an element of select_fields) to group by.
            select_context: A context with the data that the select statement
                has access to.

        Returns:
            A context with the results.
        """
        field_groups = group_set.field_groups
        alias_groups = group_set.alias_groups
        alias_group_list = sorted(alias_groups)

        group_key_select_fields = [
            f for f in select_fields if f.alias in alias_groups]
        aggregate_select_fields = [
            f for f in select_fields if f.alias not in alias_groups]

        alias_group_result_context = self.evaluate_select_fields(
            group_key_select_fields, select_context)

        # Dictionary mapping (singleton) group key context to the context of
        # values for that key.
        group_contexts = {}

        # As a special case, we check if we are grouping by nothing (in other
        # words, if the query had an aggregate without any explicit GROUP BY).
        # Normally, it's fine to just use the trivial group set: every row maps
        # to the empty tuple, so we have a single aggregation over the entire
        # table. However, if the table is empty, we still want to aggregate
        # over the empty table and return a single row, so this is the one case
        # where it's possible to have a group with no rows in it. To make this
        # case work, we ensure that the trivial group key (the empty tuple)
        # always shows up for the TRIVIAL_GROUP_SET case.
        # In the long run, it might be cleaner to view TRIVIAL_GROUP_SET as a
        # completely separate case, but this approach should work.
        if group_set == typed_ast.TRIVIAL_GROUP_SET:
            trivial_ctx = context.Context(1, collections.OrderedDict(), None)
            group_contexts[trivial_ctx] = (
                context.empty_context_from_template(select_context))

        # TODO: Seems pretty ugly and wasteful to use a whole context as a
        # group key.
        for i in xrange(select_context.num_rows):
            key = self.get_group_key(
                field_groups, alias_group_list, select_context,
                alias_group_result_context, i)
            if key not in group_contexts:
                new_group_context = context.empty_context_from_template(
                    select_context)
                group_contexts[key] = new_group_context
            group_context = group_contexts[key]
            context.append_row_to_context(src_context=select_context, index=i,
                                          dest_context=group_context)

        result_context = self.empty_context_from_select_fields(select_fields)
        result_col_names = [field.alias for field in select_fields]
        for context_key, group_context in group_contexts.iteritems():
            group_eval_context = context.Context(
                1, context_key.columns, group_context)
            group_aggregate_result_context = self.evaluate_select_fields(
                aggregate_select_fields, group_eval_context)
            full_result_row_context = self.merge_contexts_for_select_fields(
                result_col_names, group_aggregate_result_context, context_key)
            context.append_row_to_context(full_result_row_context, 0,
                                          result_context)
        return result_context