def apply_filter(expr, predicates): # This will attempt predicate pushdown in the cases where we can do it # easily and safely, to make both cleaner SQL and fewer referential errors # for users op = expr.op() if isinstance(op, ops.Selection): return _filter_selection(expr, predicates) elif isinstance(op, ops.Aggregation): # Potential fusion opportunity simplified_predicates = [ sub_for(x, [(expr, op.table)]) for x in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Aggregation(op.table, op.agg_exprs, by=op.by, having=op.having, predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys) return ir.TableExpr(result) elif isinstance(op, ops.Join): expr = expr.materialize() result = ops.Selection(expr, [], predicates) return ir.TableExpr(result)
def apply_filter(expr, predicates): # This will attempt predicate pushdown in the cases where we can do it # easily and safely, to make both cleaner SQL and fewer referential errors # for users op = expr.op() if isinstance(op, ops.Selection): return _filter_selection(expr, predicates) elif isinstance(op, ops.Aggregation): # Potential fusion opportunity # GH1344: We can't sub in things with correlated subqueries simplified_predicates = [ sub_for(predicate, [(expr, op.table)]) if not has_reduction(predicate) else predicate for predicate in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Aggregation(op.table, op.metrics, by=op.by, having=op.having, predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys) return ir.TableExpr(result) elif isinstance(op, ops.Join): expr = expr.materialize() result = ops.Selection(expr, [], predicates) return ir.TableExpr(result)
def _lift_Selection(self, expr, block=None): if block is None: block = self.block_projection op = expr.op() if block and op.blocks(): # GH #549: dig no further return expr else: lifted_table, unch = self._lift_arg(op.table, block=True) lifted_selections, unch_sel = self._lift_arg(op.selections, block=True) unchanged = unch and unch_sel lifted_predicates, unch_sel = self._lift_arg(op.predicates, block=True) unchanged = unch and unch_sel lifted_sort_keys, unch_sel = self._lift_arg(op.sort_keys, block=True) unchanged = unch and unch_sel if not unchanged: lifted_projection = ops.Selection(lifted_table, lifted_selections, lifted_predicates, lifted_sort_keys) result = ir.TableExpr(lifted_projection) else: result = expr return result
def _lift_Aggregation(self, expr, block=None): if block is None: block = self.block_projection op = expr.op() # as exposed in #544, do not lift the table inside (which may be # filtered or otherwise altered in some way) if blocking if block: lifted_table = op.table else: lifted_table = self.lift(op.table, block=True) unch = lifted_table is op.table lifted_aggs, unch1 = self._lift_arg(op.metrics, block=True) lifted_by, unch2 = self._lift_arg(op.by, block=True) lifted_having, unch3 = self._lift_arg(op.having, block=True) unchanged = unch and unch1 and unch2 and unch3 if not unchanged: lifted_op = ops.Aggregation(lifted_table, lifted_aggs, by=lifted_by, having=lifted_having) result = ir.TableExpr(lifted_op) else: result = expr return result
def _inject_table(self, name, schema): if name in self.meta.tables: table = self.meta.tables[name] else: table = alch.table_from_schema(name, self.meta, schema) node = alch.AlchemyTable(table, self) return ir.TableExpr(node)
def _filter_selection(expr, predicates): # if any of the filter predicates have the parent expression among # their roots, then pushdown (at least of that predicate) is not # possible # It's not unusual for the filter to reference the projection # itself. If a predicate can be pushed down, in this case we must # rewrite replacing the table refs with the roots internal to the # projection we are referencing # # Assuming that the fields referenced by the filter predicate originate # below the projection, we need to rewrite the predicate referencing # the parent tables in the join being projected op = expr.op() if not op.blocks(): # Potential fusion opportunity. The predicates may need to be # rewritten in terms of the child table. This prevents the broken # ref issue (described in more detail in #59) simplified_predicates = [ sub_for(x, [(expr, op.table)]) for x in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Selection(op.table, [], predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys) return ir.TableExpr(result) can_pushdown = _can_pushdown(op, predicates) if can_pushdown: simplified_predicates = [substitute_parents(x) for x in predicates] fused_predicates = op.predicates + simplified_predicates result = ops.Selection(op.table, proj_exprs=op.selections, predicates=fused_predicates, sort_keys=op.sort_keys) else: result = ops.Selection(expr, proj_exprs=[], predicates=predicates) return ir.TableExpr(result)
def apply_filter(expr, predicates): # This will attempt predicate pushdown in the cases where we can do it # easily and safely, to make both cleaner SQL and fewer referential errors # for users op = expr.op() if isinstance(op, ops.Selection): return _filter_selection(expr, predicates) elif isinstance(op, ops.Aggregation): # Potential fusion opportunity # GH1344: We can't sub in things with correlated subqueries simplified_predicates = [ # Originally this line tried substituting op.table in for expr, but # that is too aggressive in the presence of filters that occur # after aggregations. # # See https://github.com/ibis-project/ibis/pull/3341 for details sub_for(predicate, [(op.table, expr)]) if not has_reduction(predicate) else predicate for predicate in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Aggregation( op.table, op.metrics, by=op.by, having=op.having, predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys, ) return ir.TableExpr(result) result = ops.Selection(expr, [], predicates) return ir.TableExpr(result)
def _wrap_new_table(self, qualified_name, persist): if persist: t = self.table(qualified_name) else: schema = self._get_table_schema(qualified_name) node = ImpalaTemporaryTable(qualified_name, schema, self) t = ir.TableExpr(node) # Compute number of rows in table for better default query planning cardinality = t.count().execute() set_card = ("alter table {0} set tblproperties('numRows'='{1}', " "'STATS_GENERATED_VIA_STATS_TASK' = 'true')".format( qualified_name, cardinality)) self._execute(set_card) return t
def sql(self, query): """ Convert a SQL query to an Ibis table expression Parameters ---------- Returns ------- table : TableExpr """ # Get the schema by adding a LIMIT 0 on to the end of the query. If # there is already a limit in the query, we find and remove it limited_query = 'SELECT * FROM ({}) t0 LIMIT 0'.format(query) schema = self._get_schema_using_query(limited_query) node = ops.SQLQueryResult(query, schema, self) return ir.TableExpr(node)
def table(self, name, database=None): """ Create a table expression that references a particular table in the database Parameters ---------- name : string database : string, optional Returns ------- table : TableExpr """ qualified_name = self._fully_qualified_name(name, database) schema = self._get_table_schema(qualified_name) node = ops.DatabaseTable(qualified_name, schema, self) return ir.TableExpr(node)
def _lift_Projection(self, expr, block=None): if block is None: block = self.block_projection op = expr.op() if block: lifted_table = op.table unch = True else: lifted_table, unch = self._lift_arg(op.table, block=True) lifted_selections, unch_sel = self._lift_arg(op.selections, block=True) unchanged = unch and unch_sel if not unchanged: lifted_projection = ops.Projection(lifted_table, lifted_selections) result = ir.TableExpr(lifted_projection) else: result = expr return result
def _lift_Join(self, expr, block=None): op = expr.op() left_lifted = self.lift(op.left, block=block) right_lifted = self.lift(op.right, block=block) unchanged = (left_lifted is op.left and right_lifted is op.right) # Fix predicates lifted_preds = [] for x in op.predicates: subbed = self._sub(x, block=True) if subbed is not x: unchanged = False lifted_preds.append(subbed) if not unchanged: lifted_join = type(op)(left_lifted, right_lifted, lifted_preds) result = ir.TableExpr(lifted_join) else: result = expr return result
def _lift_Aggregation(self, expr, block=None): if block is None: block = self.block_projection op = expr.op() lifted_table = self.lift(op.table, block=True) unch = lifted_table is op.table lifted_aggs, unch1 = self._lift_arg(op.agg_exprs, block=True) lifted_by, unch2 = self._lift_arg(op.by, block=True) lifted_having, unch3 = self._lift_arg(op.having, block=True) unchanged = unch and unch1 and unch2 and unch3 if not unchanged: lifted_op = ops.Aggregation(lifted_table, lifted_aggs, by=lifted_by, having=lifted_having) result = ir.TableExpr(lifted_op) else: result = expr return result