def compile_FunctionCall(self, expr, type_ctx): # Innermost aggregates are special, since the context to use changes # inside them. We also need to generate an AggregateFunctionCall AST so # that the evaluator knows to change the context. if self.is_innermost_aggregate(expr): if type_ctx.aggregate_context is None: raise exceptions.CompileError('Unexpected aggregate function.') sub_expr_ctx = type_ctx.aggregate_context ast_type = typed_ast.AggregateFunctionCall else: sub_expr_ctx = type_ctx ast_type = typed_ast.FunctionCall func = runtime.get_func(expr.name) compiled_args = [ self.compile_expr(sub_expr, sub_expr_ctx) for sub_expr in expr.args ] try: result_type = func.check_types(*(arg.type for arg in compiled_args)) except TypeError: raise exceptions.CompileError( 'Invalid types for function {}: {}'.format( expr.name, [arg.type for arg in compiled_args])) return ast_type(func, compiled_args, result_type)
def column_ref_for_name(self, name): """Gets the full identifier for a column from any possible alias.""" if name in self.columns: return typed_ast.ColumnRef(name, self.columns[name]) possible_results = [] # Try all possible ways of splitting a dot-separated string. for match in re.finditer('\.', name): left_side = name[:match.start()] right_side = name[match.end():] result_type = self.columns.get((left_side, right_side)) if result_type is not None: possible_results.append( typed_ast.ColumnRef(left_side, right_side, result_type)) if name in self.aliases: table, column = self.aliases[name] result_type = self.columns[(table, column)] possible_results.append( typed_ast.ColumnRef(table, column, result_type)) if len(possible_results) == 1: return possible_results[0] elif len(possible_results) > 1: raise exceptions.CompileError('Ambiguous field: {}'.format(name)) else: if self.implicit_column_context is not None: return self.implicit_column_context.column_ref_for_name(name) else: raise exceptions.CompileError( 'Field not found: {}'.format(name))
def compile_select_field(self, expr, alias, within_clause, type_ctx): if within_clause is not None and within_clause != 'RECORD' and ( expr.args[0].name.split('.')[0] != within_clause): raise exceptions.CompileError('WITHIN clause syntax error') else: compiled_expr = self.compile_expr(expr, type_ctx) return typed_ast.SelectField(compiled_expr, alias, within_clause)
def union_contexts(cls, contexts): """Creates a type context from the union of others. This follows the semantics of the comma operator: -Columns are added in order, and columns already added from previous tables are kept in their original place. -All fully-qualified names are removed; columns can only be referenced by their direct names. TODO: Do better error handling with things like conflicting types. """ result_columns = collections.OrderedDict() for context in contexts: assert context.aggregate_context is None for (_, column_name), col_type in context.columns.items(): full_column = (None, column_name) if full_column in result_columns: if result_columns[full_column] == col_type: continue raise exceptions.CompileError( 'Incompatible types when performing union on field ' '{}: {} vs. {}'.format(full_column, result_columns[full_column], col_type)) else: result_columns[full_column] = col_type return cls.from_full_columns(result_columns)
def compile_UnaryOperator(self, expr, type_ctx): func = runtime.get_unary_op(expr.operator) compiled_val = self.compile_expr(expr.expr, type_ctx) try: result_type = func.check_types(compiled_val.type) except TypeError: raise exceptions.CompileError( 'Invalid type for operator {}: {}'.format( expr.operator, [compiled_val.type])) return typed_ast.FunctionCall(func, [compiled_val], result_type)
def compile_join_field(expr, join_type): """Compile a single part of the join. This results in a list of one or more join fields, depending on whether or not multiple are ANDed together. """ if join_type is tq_ast.JoinType.CROSS: assert expr is None, ( "Cross joins do not allow join conditions.") return [None] if isinstance(expr, tq_ast.BinaryOperator): if expr.operator == 'and': return list( itertools.chain( compile_join_field(expr.left, join_type), compile_join_field(expr.right, join_type))) elif (expr.operator in ('=', '==') and isinstance(expr.left, tq_ast.ColumnId) and isinstance(expr.right, tq_ast.ColumnId)): # For evaluation, we want the ordering of the columns in # the JoinField to match the ordering of the join, left to # right, but bigquery allows either order. Thus we need to # reorder them if they're reversed. # TODO(colin): better error message if we don't find an # alias? lhs_alias_idx = next( idx for idx, alias in enumerate(aliases) if expr.left.name.startswith(alias + ".")) rhs_alias_idx = next( idx for idx, alias in enumerate(aliases) if expr.right.name.startswith(alias + ".")) left_column_id = self.compile_ColumnId( expr.left, type_contexts[lhs_alias_idx]) right_column_id = self.compile_ColumnId( expr.right, type_contexts[rhs_alias_idx]) if lhs_alias_idx < rhs_alias_idx: return [ typed_ast.JoinFields(left_column_id, right_column_id) ] elif rhs_alias_idx < lhs_alias_idx: return [ typed_ast.JoinFields(right_column_id, left_column_id) ] # Fall through to the error case if the aliases are the # same for both sides. raise exceptions.CompileError( 'JOIN conditions must consist of an AND of = ' 'comparisons between two field on distinct ' 'tables. Got expression %s' % expr)
def compile_joined_table(self, table_expr): """Given one side of a JOIN, get its table expression and alias.""" compiled_table = self.compile_table_expr(table_expr) if table_expr.alias is not None: alias = table_expr.alias elif isinstance(table_expr, tq_ast.TableId): alias = table_expr.name else: raise exceptions.CompileError( 'Table expression must have an alias name.') result_ctx = compiled_table.type_ctx.context_with_full_alias(alias) compiled_table = compiled_table.with_type_ctx(result_ctx) return compiled_table, alias
def compile_BinaryOperator(self, expr, type_ctx): func = runtime.get_binary_op(expr.operator) compiled_left = self.compile_expr(expr.left, type_ctx) compiled_right = self.compile_expr(expr.right, type_ctx) try: result_type = func.check_types(compiled_left.type, compiled_right.type) except TypeError: raise exceptions.CompileError( 'Invalid types for operator {}: {}'.format( expr.operator, [arg.type for arg in [compiled_left, compiled_right]])) return typed_ast.FunctionCall(func, [compiled_left, compiled_right], result_type)
def get_aliases(cls, select_field_list): """Given a list of tq_ast.SelectField, return the aliases to use.""" used_aliases = set() proposed_aliases = [ cls.field_alias(select_field) for select_field in select_field_list ] for alias in proposed_aliases: if alias is not None: if alias in used_aliases: raise exceptions.CompileError( 'Ambiguous column name {}.'.format(alias)) used_aliases.add(alias) generic_field_num = 0 result = [] for alias in proposed_aliases: if alias is not None: result.append(alias) else: while ('f%s_' % generic_field_num) in used_aliases: generic_field_num += 1 result.append('f%s_' % generic_field_num) generic_field_num += 1 return result