def get_group_key(self, field_groups, alias_groups, select_context, alias_group_result_context, index): """Computes a singleton context with the values for a group key. The evaluation has already been done; this method just selects the values out of the right contexts. Arguments: field_groups: A list of ColumnRefs for the field groups to use. alias_groups: A list of strings of alias groups to use. select_context: A context with the data for the table expression being selected from. alias_group_result_context: A context with the data for the grouped-by select fields. index: The row index to use from each context. """ result_columns = collections.OrderedDict() for field_group in field_groups: column_key = (field_group.table, field_group.column) source_column = select_context.columns[column_key] result_columns[column_key] = context.Column( # TODO(Samantha): This shouldn't just be nullable. type=source_column.type, mode=tq_modes.NULLABLE, values=[source_column.values[index]]) for alias_group in alias_groups: column_key = (None, alias_group) source_column = alias_group_result_context.columns[column_key] result_columns[column_key] = context.Column( # TODO(Samantha): This shouldn't just be nullable. type=source_column.type, mode=tq_modes.NULLABLE, values=[source_column.values[index]]) return context.Context(1, result_columns, None)
def evaluate_Literal(self, literal, context_object): values = [ literal.value for _ in six.moves.xrange(context_object.num_rows) ] return context.Column(type=literal.type, mode=tq_modes.NULLABLE, values=values)
def evaluate_select_field(self, select_field, ctx): """Given a typed select field, return a resulting column entry.""" assert isinstance(select_field, typed_ast.SelectField) results = self.evaluate_expr(select_field.expr, ctx) return (None, select_field.alias), context.Column( type=results.type, mode=results.mode, values=results.values)
def load_empty_table_from_template(self, table_name, template_table): columns = collections.OrderedDict( # TODO(Samantha): This shouldn't just be nullable. (col_name, context.Column(type=col.type, mode=tq_modes.NULLABLE, values=[])) for col_name, col in template_table.columns.items()) table = Table(table_name, 0, columns) self.load_table_or_view(table)
def empty_context_from_select_fields(self, select_fields): return context.Context( 0, collections.OrderedDict(( (None, select_field.alias), # TODO(Samantha): This shouldn't just be nullable context.Column(type=select_field.expr.type, mode=tq_modes.NULLABLE, values=[])) for select_field in select_fields), None)
def evaluate_within(self, select_fields, group_set, ctx, within_clause): """Evaluate a list of select fields, one of which has a WITHIN or WITHIN RECORD clause and/or grouping by some of the values. Arguments: select_fields: A list of SelectField instances to evaluate. group_set: The groups (either fields in select_context or aliases referring to an element of select_fields) to group by. ctx: The "source" context that the expressions can access when being evaluated. Returns: A context with the results. """ if within_clause == "RECORD": # Add an extra column of row number over which the grouping # will be done. ctx_with_primary_key = context.empty_context_from_template(ctx) context.append_context_to_context(ctx, ctx_with_primary_key) table_name = next(iter(ctx_with_primary_key.columns)) row_nums = list( six.moves.xrange(1, ctx_with_primary_key.num_rows + 1)) row_nums_col = context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=row_nums) ctx_with_primary_key.columns[( table_name, 'row_numbers_column_primary_key')] = row_nums_col group_set.field_groups.append( typed_ast.ColumnRef(table_name, 'row_numbers_column_primary_key', tq_types.INT)) if len(select_fields) > 1: # TODO: Implement WITHIN RECORD when one or more of the # selected fields (except the one in the WITHIN RECORD # clause) has mode = REPEATED. for select_field in select_fields: if select_field.within_clause is None: if select_field.expr.mode != tq_modes.REPEATED: group_set.alias_groups.add(select_field.alias) else: raise NotImplementedError( 'Cannot select fields having mode=REPEATED ' 'for queries involving WITHIN RECORD') # TODO: Implement for WITHIN clause typed_ast.TRIVIAL_GROUP_SET = typed_ast.GroupSet(set(), []) return self.evaluate_groups(select_fields, group_set, ctx_with_primary_key)
def make_columns(schema, name_prefix='', ever_repeated=False): for field in schema['fields']: prefixed_name = name_prefix + field['name'] value_type = field['type'].upper() mode = field['mode'].upper() if value_type == 'RECORD': make_columns( field, name_prefix=(prefixed_name + '.'), ever_repeated=(ever_repeated or mode == 'REPEATED')) # Type and Mode validation elif (value_type not in tq_types.TYPE_SET or mode not in tq_modes.MODE_SET): raise ValueError("Type or Mode given was invalid.") else: final_mode = 'REPEATED' if ever_repeated else mode columns[prefixed_name] = context.Column( type=value_type, mode=final_mode, values=[])
def evaluate_orderings(self, overall_context, select_context, ordering_col, select_fields): """ Evaluate a context and order it by a list of given columns. Arguments: overall_context: A context with the data that the select statement has access to. select_context: A context with the data remaining after earlier evaluations. ordering_col: A list of order-by column objects having two properties: column_id containing the name of the column and is_ascending which is a boolean for the order in which the column has to be arranged (True for ascending and False for descending). select_fields: A list of select fields that can be used to map aliases back to the overall context Returns: A context with the results. """ # A dict of aliases for select fields since an order by field # might be an alias select_aliases = collections.OrderedDict( (select_field.alias, (select_field.expr.table, select_field.expr.column)) for select_field in select_fields) assert select_context.aggregate_context is None all_values = [] sort_by_indexes = collections.OrderedDict() for ((_, column_name), column) in overall_context.columns.items(): all_values.append(column.values) for order_by_column in ordering_col: order_column_name = order_by_column.column_id.name for count, (column_identifier_pair, column) in enumerate(overall_context.columns.items()): if ( # order by column is of the form `table_name.col` '%s.%s' % column_identifier_pair == order_column_name # order by column is an alias or (select_aliases.get(order_column_name) == column_identifier_pair) or ( # order by column is just the field name # but not if that field name is also an alias # to avoid mixing up duplicate field names across joins order_column_name not in select_aliases and order_column_name == column_identifier_pair[1])): sort_by_indexes[count] = order_by_column.is_ascending break reversed_sort_by_indexes = collections.OrderedDict( reversed(list(sort_by_indexes.items()))) t_all_values = [list(z) for z in zip(*all_values)] for index, is_ascending in reversed_sort_by_indexes.items(): t_all_values.sort(key=lambda x: (x[index]), reverse=not is_ascending) ordered_values = [list(z) for z in zip(*t_all_values)] # If we started evaluating an ordering over 0 rows, # all_values was originally [[], [], [], ...], i.e. the empty list for # each column, but now ordered_values is just the empty list, since # when going to a list of rows, we lost any notion of how many columns # there were. In that case, we just set back to all_values, since # there isn't any data to order by anyway. # TODO(colin): can we exit early if there's no data to order? if len(t_all_values) == 0: ordered_values = all_values for key in select_context.columns: for count, overall_column_identifier_pair in (enumerate( overall_context.columns)): overall_context_loop_break = False if (key == overall_column_identifier_pair or not key[0] and (key[1] == '%s.%s' % overall_column_identifier_pair or (select_aliases.get(key[1]) == overall_column_identifier_pair))): select_context.columns[key] = context.Column( type=select_context.columns[key].type, mode=select_context.columns[key].mode, values=ordered_values[count]) overall_context_loop_break = True if overall_context_loop_break: break return select_context
def _evaluate(self, num_rows, column): values = [[arg for arg in column.values]] return context.Column(type=column.type, mode=tq_modes.REPEATED, values=values)
def setUp(self): self.table1 = tinyquery.Table( 'table1', 0, collections.OrderedDict([('value', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ('value2', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[]))])) self.table1_type_ctx = self.make_type_context([ ('table1', 'value', tq_types.INT), ('table1', 'value2', tq_types.INT) ]) self.table2 = tinyquery.Table( 'table2', 0, collections.OrderedDict([('value', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ('value3', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[]))])) self.table2_type_ctx = self.make_type_context([ ('table2', 'value', tq_types.INT), ('table2', 'value3', tq_types.INT) ]) self.table3 = tinyquery.Table( 'table3', 0, collections.OrderedDict([ ('value', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ])) self.table3_type_ctx = self.make_type_context([('table3', 'value', tq_types.INT)]) self.rainbow_table = tinyquery.Table( 'rainbow_table', 3, collections.OrderedDict([ ('ints', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[-2147483649, -0, 2147483648])), ('floats', context.Column(type=tq_types.FLOAT, mode=tq_modes.NULLABLE, values=[1.41, 2.72, float('infinity')])), ('bools', context.Column(type=tq_types.BOOL, mode=tq_modes.NULLABLE, values=[True, False, True])), ('strings', context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE, values=["infrared", "indigo", "ultraviolet"])), ('times', context.Column(type=tq_types.TIMESTAMP, mode=tq_modes.NULLABLE, values=[ datetime.datetime(1969, 12, 31, 23, 59, 59), datetime.datetime(1999, 12, 31, 23, 59, 59), datetime.datetime(2038, 1, 19, 3, 14, 8) ])) ])) self.rainbow_table_type_ctx = self.make_type_context([ ('rainbow_table', 'ints', tq_types.INT), ('rainbow_table', 'floats', tq_types.FLOAT), ('rainbow_table', 'bools', tq_types.BOOL), ('rainbow_table', 'strings', tq_types.STRING), ('rainbow_table', 'times', tq_types.TIMESTAMP) ]) self.record_table = tinyquery.Table( 'record_table', 0, collections.OrderedDict([ ('r1.i', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ('r1.s', context.Column(type=tq_types.STRING, mode=tq_modes.NULLABLE, values=[])), ('r2.i', context.Column(type=tq_types.INT, mode=tq_modes.NULLABLE, values=[])), ])) self.record_table_type_ctx = self.make_type_context([ ('record_table', 'r1.i', tq_types.INT), ('record_table', 'r1.s', tq_types.STRING), ('record_table', 'r2.i', tq_types.INT) ]) self.tables_by_name = { 'table1': self.table1, 'table2': self.table2, 'table3': self.table3, 'rainbow_table': self.rainbow_table, 'record_table': self.record_table, }