Пример #1
0
    def get_group_key(self, field_groups, alias_groups, select_context,
                      alias_group_result_context, index):
        """Computes a singleton context with the values for a group key.

        The evaluation has already been done; this method just selects the
        values out of the right contexts.

        Arguments:
            field_groups: A list of ColumnRefs for the field groups to use.
            alias_groups: A list of strings of alias groups to use.
            select_context: A context with the data for the table expression
                being selected from.
            alias_group_result_context: A context with the data for the
                grouped-by select fields.
            index: The row index to use from each context.
        """
        result_columns = collections.OrderedDict()
        for field_group in field_groups:
            column_key = (field_group.table, field_group.column)
            source_column = select_context.columns[column_key]
            result_columns[column_key] = context.Column(
                # TODO(Samantha): This shouldn't just be nullable.
                type=source_column.type,
                mode=tq_modes.NULLABLE,
                values=[source_column.values[index]])
        for alias_group in alias_groups:
            column_key = (None, alias_group)
            source_column = alias_group_result_context.columns[column_key]
            result_columns[column_key] = context.Column(
                # TODO(Samantha): This shouldn't just be nullable.
                type=source_column.type,
                mode=tq_modes.NULLABLE,
                values=[source_column.values[index]])
        return context.Context(1, result_columns, None)
Пример #2
0
 def evaluate_Literal(self, literal, context_object):
     values = [
         literal.value for _ in six.moves.xrange(context_object.num_rows)
     ]
     return context.Column(type=literal.type,
                           mode=tq_modes.NULLABLE,
                           values=values)
Пример #3
0
 def evaluate_select_field(self, select_field, ctx):
     """Given a typed select field, return a resulting column entry."""
     assert isinstance(select_field, typed_ast.SelectField)
     results = self.evaluate_expr(select_field.expr, ctx)
     return (None, select_field.alias), context.Column(
         type=results.type, mode=results.mode,
         values=results.values)
Пример #4
0
 def load_empty_table_from_template(self, table_name, template_table):
     columns = collections.OrderedDict(
         # TODO(Samantha): This shouldn't just be nullable.
         (col_name,
          context.Column(type=col.type, mode=tq_modes.NULLABLE, values=[]))
         for col_name, col in template_table.columns.items())
     table = Table(table_name, 0, columns)
     self.load_table_or_view(table)
Пример #5
0
 def empty_context_from_select_fields(self, select_fields):
     return context.Context(
         0,
         collections.OrderedDict((
             (None, select_field.alias),
             # TODO(Samantha): This shouldn't just be nullable
             context.Column(type=select_field.expr.type,
                            mode=tq_modes.NULLABLE,
                            values=[])) for select_field in select_fields),
         None)
Пример #6
0
    def evaluate_within(self, select_fields, group_set, ctx, within_clause):
        """Evaluate a list of select fields, one of which has a WITHIN or
        WITHIN RECORD clause and/or grouping by some of the values.

        Arguments:
            select_fields: A list of SelectField instances to evaluate.
            group_set: The groups (either fields in select_context or aliases
                referring to an element of select_fields) to group by.
            ctx: The "source" context that the expressions can access when
                being evaluated.

        Returns:
            A context with the results.
        """
        if within_clause == "RECORD":
            # Add an extra column of row number over which the grouping
            # will be done.
            ctx_with_primary_key = context.empty_context_from_template(ctx)
            context.append_context_to_context(ctx, ctx_with_primary_key)

            table_name = next(iter(ctx_with_primary_key.columns))
            row_nums = list(
                six.moves.xrange(1, ctx_with_primary_key.num_rows + 1))
            row_nums_col = context.Column(type=tq_types.INT,
                                          mode=tq_modes.NULLABLE,
                                          values=row_nums)
            ctx_with_primary_key.columns[(
                table_name, 'row_numbers_column_primary_key')] = row_nums_col
            group_set.field_groups.append(
                typed_ast.ColumnRef(table_name,
                                    'row_numbers_column_primary_key',
                                    tq_types.INT))
            if len(select_fields) > 1:
                # TODO: Implement WITHIN RECORD when one or more of the
                # selected fields (except the one in the WITHIN RECORD
                # clause) has mode = REPEATED.
                for select_field in select_fields:
                    if select_field.within_clause is None:
                        if select_field.expr.mode != tq_modes.REPEATED:
                            group_set.alias_groups.add(select_field.alias)
                        else:
                            raise NotImplementedError(
                                'Cannot select fields having mode=REPEATED '
                                'for queries involving WITHIN RECORD')
        # TODO: Implement for WITHIN clause
        typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), [])
        return self.evaluate_groups(select_fields, group_set,
                                    ctx_with_primary_key)
Пример #7
0
 def make_columns(schema, name_prefix='', ever_repeated=False):
     for field in schema['fields']:
         prefixed_name = name_prefix + field['name']
         value_type = field['type'].upper()
         mode = field['mode'].upper()
         if value_type == 'RECORD':
             make_columns(
                 field, name_prefix=(prefixed_name + '.'),
                 ever_repeated=(ever_repeated or mode == 'REPEATED'))
         # Type and Mode validation
         elif (value_type not in tq_types.TYPE_SET
               or mode not in tq_modes.MODE_SET):
             raise ValueError("Type or Mode given was invalid.")
         else:
             final_mode = 'REPEATED' if ever_repeated else mode
             columns[prefixed_name] = context.Column(
                 type=value_type, mode=final_mode, values=[])
Пример #8
0
    def evaluate_orderings(self, overall_context, select_context, ordering_col,
                           select_fields):
        """
        Evaluate a context and order it by a list of given columns.

        Arguments:
            overall_context: A context with the data that the select statement
                has access to.
            select_context: A context with the data remaining after earlier
            evaluations.
            ordering_col: A list of order-by column objects having two
                properties: column_id containing the name of the column and
                is_ascending which is a boolean for the order in which the
                column has to be arranged (True for ascending and False for
                descending).
            select_fields: A list of select fields that can be used to map
                aliases back to the overall context

        Returns:
            A context with the results.
        """
        # A dict of aliases for select fields since an order by field
        # might be an alias
        select_aliases = collections.OrderedDict(
            (select_field.alias, (select_field.expr.table,
                                  select_field.expr.column))
            for select_field in select_fields)

        assert select_context.aggregate_context is None
        all_values = []
        sort_by_indexes = collections.OrderedDict()

        for ((_, column_name), column) in overall_context.columns.items():
            all_values.append(column.values)

        for order_by_column in ordering_col:
            order_column_name = order_by_column.column_id.name

            for count, (column_identifier_pair,
                        column) in enumerate(overall_context.columns.items()):
                if (
                        # order by column is of the form `table_name.col`
                        '%s.%s' % column_identifier_pair == order_column_name
                        # order by column is an alias
                        or (select_aliases.get(order_column_name)
                            == column_identifier_pair) or
                    (
                        # order by column is just the field name
                        # but not if that field name is also an alias
                        # to avoid mixing up duplicate field names across joins
                        order_column_name not in select_aliases
                        and order_column_name == column_identifier_pair[1])):
                    sort_by_indexes[count] = order_by_column.is_ascending
                    break
        reversed_sort_by_indexes = collections.OrderedDict(
            reversed(list(sort_by_indexes.items())))

        t_all_values = [list(z) for z in zip(*all_values)]
        for index, is_ascending in reversed_sort_by_indexes.items():
            t_all_values.sort(key=lambda x: (x[index]),
                              reverse=not is_ascending)
        ordered_values = [list(z) for z in zip(*t_all_values)]
        # If we started evaluating an ordering over 0 rows,
        # all_values was originally [[], [], [], ...], i.e. the empty list for
        # each column, but now ordered_values is just the empty list, since
        # when going to a list of rows, we lost any notion of how many columns
        # there were.  In that case, we just set back to all_values, since
        # there isn't any data to order by anyway.
        # TODO(colin): can we exit early if there's no data to order?
        if len(t_all_values) == 0:
            ordered_values = all_values

        for key in select_context.columns:
            for count, overall_column_identifier_pair in (enumerate(
                    overall_context.columns)):
                overall_context_loop_break = False
                if (key == overall_column_identifier_pair or not key[0] and
                    (key[1] == '%s.%s' % overall_column_identifier_pair or
                     (select_aliases.get(key[1])
                      == overall_column_identifier_pair))):
                    select_context.columns[key] = context.Column(
                        type=select_context.columns[key].type,
                        mode=select_context.columns[key].mode,
                        values=ordered_values[count])
                    overall_context_loop_break = True
                if overall_context_loop_break:
                    break

        return select_context
Пример #9
0
 def _evaluate(self, num_rows, column):
     values = [[arg for arg in column.values]]
     return context.Column(type=column.type,
                           mode=tq_modes.REPEATED,
                           values=values)
Пример #10
0
    def setUp(self):
        self.table1 = tinyquery.Table(
            'table1', 0,
            collections.OrderedDict([('value',
                                      context.Column(type=tq_types.INT,
                                                     mode=tq_modes.NULLABLE,
                                                     values=[])),
                                     ('value2',
                                      context.Column(type=tq_types.INT,
                                                     mode=tq_modes.NULLABLE,
                                                     values=[]))]))
        self.table1_type_ctx = self.make_type_context([
            ('table1', 'value', tq_types.INT),
            ('table1', 'value2', tq_types.INT)
        ])

        self.table2 = tinyquery.Table(
            'table2', 0,
            collections.OrderedDict([('value',
                                      context.Column(type=tq_types.INT,
                                                     mode=tq_modes.NULLABLE,
                                                     values=[])),
                                     ('value3',
                                      context.Column(type=tq_types.INT,
                                                     mode=tq_modes.NULLABLE,
                                                     values=[]))]))
        self.table2_type_ctx = self.make_type_context([
            ('table2', 'value', tq_types.INT),
            ('table2', 'value3', tq_types.INT)
        ])

        self.table3 = tinyquery.Table(
            'table3', 0,
            collections.OrderedDict([
                ('value',
                 context.Column(type=tq_types.INT,
                                mode=tq_modes.NULLABLE,
                                values=[])),
            ]))
        self.table3_type_ctx = self.make_type_context([('table3', 'value',
                                                        tq_types.INT)])

        self.rainbow_table = tinyquery.Table(
            'rainbow_table', 3,
            collections.OrderedDict([
                ('ints',
                 context.Column(type=tq_types.INT,
                                mode=tq_modes.NULLABLE,
                                values=[-2147483649, -0, 2147483648])),
                ('floats',
                 context.Column(type=tq_types.FLOAT,
                                mode=tq_modes.NULLABLE,
                                values=[1.41, 2.72,
                                        float('infinity')])),
                ('bools',
                 context.Column(type=tq_types.BOOL,
                                mode=tq_modes.NULLABLE,
                                values=[True, False, True])),
                ('strings',
                 context.Column(type=tq_types.STRING,
                                mode=tq_modes.NULLABLE,
                                values=["infrared", "indigo", "ultraviolet"])),
                ('times',
                 context.Column(type=tq_types.TIMESTAMP,
                                mode=tq_modes.NULLABLE,
                                values=[
                                    datetime.datetime(1969, 12, 31, 23, 59,
                                                      59),
                                    datetime.datetime(1999, 12, 31, 23, 59,
                                                      59),
                                    datetime.datetime(2038, 1, 19, 3, 14, 8)
                                ]))
            ]))
        self.rainbow_table_type_ctx = self.make_type_context([
            ('rainbow_table', 'ints', tq_types.INT),
            ('rainbow_table', 'floats', tq_types.FLOAT),
            ('rainbow_table', 'bools', tq_types.BOOL),
            ('rainbow_table', 'strings', tq_types.STRING),
            ('rainbow_table', 'times', tq_types.TIMESTAMP)
        ])

        self.record_table = tinyquery.Table(
            'record_table', 0,
            collections.OrderedDict([
                ('r1.i',
                 context.Column(type=tq_types.INT,
                                mode=tq_modes.NULLABLE,
                                values=[])),
                ('r1.s',
                 context.Column(type=tq_types.STRING,
                                mode=tq_modes.NULLABLE,
                                values=[])),
                ('r2.i',
                 context.Column(type=tq_types.INT,
                                mode=tq_modes.NULLABLE,
                                values=[])),
            ]))
        self.record_table_type_ctx = self.make_type_context([
            ('record_table', 'r1.i', tq_types.INT),
            ('record_table', 'r1.s', tq_types.STRING),
            ('record_table', 'r2.i', tq_types.INT)
        ])

        self.tables_by_name = {
            'table1': self.table1,
            'table2': self.table2,
            'table3': self.table3,
            'rainbow_table': self.rainbow_table,
            'record_table': self.record_table,
        }