def _filter_selection(expr, predicates): # if any of the filter predicates have the parent expression among # their roots, then pushdown (at least of that predicate) is not # possible # It's not unusual for the filter to reference the projection # itself. If a predicate can be pushed down, in this case we must # rewrite replacing the table refs with the roots internal to the # projection we are referencing # # Assuming that the fields referenced by the filter predicate originate # below the projection, we need to rewrite the predicate referencing # the parent tables in the join being projected op = expr.op() if not op.blocks(): # Potential fusion opportunity. The predicates may need to be # rewritten in terms of the child table. This prevents the broken # ref issue (described in more detail in #59) simplified_predicates = tuple( sub_for(predicate, [( expr, op.table)]) if not has_reduction(predicate) else predicate for predicate in predicates) if op.table._is_valid(simplified_predicates): result = ops.Selection( op.table, [], predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys, ) return result.to_expr() can_pushdown = _can_pushdown(op, predicates) if can_pushdown: simplified_predicates = tuple( substitute_parents(x) for x in predicates) fused_predicates = op.predicates + simplified_predicates result = ops.Selection( op.table, selections=op.selections, predicates=fused_predicates, sort_keys=op.sort_keys, ) else: result = ops.Selection(expr, selections=[], predicates=predicates) return result.to_expr()
def apply_filter(expr, predicates): # This will attempt predicate pushdown in the cases where we can do it # easily and safely, to make both cleaner SQL and fewer referential errors # for users op = expr.op() if isinstance(op, ops.Selection): return _filter_selection(expr, predicates) elif isinstance(op, ops.Aggregation): # Potential fusion opportunity # GH1344: We can't sub in things with correlated subqueries simplified_predicates = [ sub_for(predicate, [(expr, op.table)]) if not has_reduction(predicate) else predicate for predicate in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Aggregation(op.table, op.metrics, by=op.by, having=op.having, predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys) return ir.TableExpr(result) elif isinstance(op, ops.Join): expr = expr.materialize() result = ops.Selection(expr, [], predicates) return ir.TableExpr(result)
def _lift_Selection(self, expr, block=None): if block is None: block = self.block_projection op = expr.op() if block and op.blocks(): # GH #549: dig no further return expr else: lifted_table, unch = self._lift_arg(op.table, block=True) lifted_selections, unch_sel = self._lift_arg(op.selections, block=True) unchanged = unch and unch_sel lifted_predicates, unch_sel = self._lift_arg(op.predicates, block=True) unchanged = unch and unch_sel lifted_sort_keys, unch_sel = self._lift_arg(op.sort_keys, block=True) unchanged = unch and unch_sel if not unchanged: lifted_projection = ops.Selection(lifted_table, lifted_selections, lifted_predicates, lifted_sort_keys) result = ir.TableExpr(lifted_projection) else: result = expr return result
def apply_filter(expr, predicates): # This will attempt predicate pushdown in the cases where we can do it # easily and safely, to make both cleaner SQL and fewer referential errors # for users op = expr.op() if isinstance(op, ops.Selection): return _filter_selection(expr, predicates) elif isinstance(op, ops.Aggregation): # Potential fusion opportunity simplified_predicates = [ sub_for(x, [(expr, op.table)]) for x in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Aggregation(op.table, op.agg_exprs, by=op.by, having=op.having, predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys) return ir.TableExpr(result) elif isinstance(op, ops.Join): expr = expr.materialize() result = ops.Selection(expr, [], predicates) return ir.TableExpr(result)
def get_result(self): roots = self.parent_roots if len(roots) == 1 and isinstance(roots[0], ops.Selection): fused_op = self._check_fusion(roots[0]) if fused_op is not None: return fused_op return ops.Selection(self.parent, self.clean_exprs)
def get_result(self): roots = self.parent_roots first_root = roots[0] if len(roots) == 1 and isinstance(first_root, ops.Selection): fused_op = self.try_fusion(first_root) if fused_op is not None: return fused_op return ops.Selection(self.parent, self.clean_exprs)
def _check_fusion(self, root): roots = root.table._root_tables() validator = ExprValidator([root.table]) fused_exprs = [] can_fuse = False resolved = _maybe_resolve_exprs(root.table, self.input_exprs) if not resolved: return None for val in resolved: # XXX lifted_val = substitute_parents(val) # a * projection if isinstance( val, ir.TableExpr ) and (self.parent.op().compatible_with(val.op()) # gross we share the same table root. Better way to # detect? or len(roots) == 1 and val._root_tables()[0] is roots[0]): can_fuse = True have_root = False for y in root.selections: # Don't add the * projection twice if y.equals(root.table): fused_exprs.append(root.table) have_root = True continue fused_exprs.append(y) # This was a filter, so implicitly a select * if not have_root and len(root.selections) == 0: fused_exprs = [root.table] + fused_exprs elif validator.validate(lifted_val): can_fuse = True fused_exprs.append(lifted_val) elif not validator.validate(val): can_fuse = False break else: fused_exprs.append(val) if can_fuse: return ops.Selection( root.table, fused_exprs, predicates=root.predicates, sort_keys=root.sort_keys, ) else: return None
def apply_filter(expr, predicates): # This will attempt predicate pushdown in the cases where we can do it # easily and safely, to make both cleaner SQL and fewer referential errors # for users op = expr.op() if isinstance(op, ops.Selection): return _filter_selection(expr, predicates) elif isinstance(op, ops.Aggregation): # Potential fusion opportunity # GH1344: We can't sub in things with correlated subqueries simplified_predicates = [ # Originally this line tried substituting op.table in for expr, but # that is too aggressive in the presence of filters that occur # after aggregations. # # See https://github.com/ibis-project/ibis/pull/3341 for details sub_for(predicate, [(op.table, expr)]) if not has_reduction(predicate) else predicate for predicate in predicates ] if op.table._is_valid(simplified_predicates): result = ops.Aggregation( op.table, op.metrics, by=op.by, having=op.having, predicates=op.predicates + simplified_predicates, sort_keys=op.sort_keys, ) return ir.TableExpr(result) result = ops.Selection(expr, [], predicates) return ir.TableExpr(result)
def try_fusion(self, root): assert self.parent.op() == root root_table = root.table roots = root_table.op().root_tables() fused_exprs = [] clean_exprs = self.clean_exprs if not isinstance(root_table.op(), ops.Join): try: resolved = [ root_table._ensure_expr(expr) for expr in util.promote_list(self.input_exprs) ] except (AttributeError, IbisTypeError): resolved = clean_exprs else: # if any expressions aren't exactly equivalent then don't try # to fuse them if any(not res_root_root.equals(res_root) for res_root_root, res_root in zip( resolved, clean_exprs)): return None else: # joins cannot be used to resolve expressions, but we still may be # able to fuse columns from a projection off of a join. In that # case, use the projection's input expressions as the columns with # which to attempt fusion resolved = clean_exprs root_selections = root.selections parent_op = self.parent.op() for val in resolved: # a * projection if isinstance( val, ir.Table ) and (parent_op.equals(val.op()) # gross we share the same table root. Better way to # detect? or len(roots) == 1 and val.op().root_tables()[0] is roots[0]): have_root = False for root_sel in root_selections: # Don't add the * projection twice if root_sel.equals(root_table): fused_exprs.append(root_table) have_root = True continue fused_exprs.append(root_sel) # This was a filter, so implicitly a select * if not have_root and not root_selections: fused_exprs = [root_table, *fused_exprs] elif shares_all_roots(val, root_table): fused_exprs.append(val) else: return None return ops.Selection( root_table, fused_exprs, predicates=root.predicates, sort_keys=root.sort_keys, )
def try_fusion(self, root): root_table = root.table roots = root_table._root_tables() validator = ExprValidator([root_table]) fused_exprs = [] can_fuse = False if not isinstance(root_table.op(), ops.Join): resolved = _maybe_resolve_exprs(root_table, self.input_exprs) else: # joins cannot be used to resolve expressions, but we still may be # able to fuse columns from a projection off of a join. In that # case, use the projection's input expressions as the columns with # which to attempt fusion resolved = self.clean_exprs if not resolved: return None root_selections = root.selections parent_op = self.parent.op() for val in resolved: # XXX lifted_val = substitute_parents(val) # a * projection if isinstance( val, ir.TableExpr ) and (parent_op.compatible_with(val.op()) # gross we share the same table root. Better way to # detect? or len(roots) == 1 and val._root_tables()[0] is roots[0]): can_fuse = True have_root = False for root_sel in root_selections: # Don't add the * projection twice if root_sel.equals(root_table): fused_exprs.append(root_table) have_root = True continue fused_exprs.append(root_sel) # This was a filter, so implicitly a select * if not have_root and not root_selections: fused_exprs = [root_table] + fused_exprs elif validator.validate(lifted_val): can_fuse = True fused_exprs.append(lifted_val) elif not validator.validate(val): can_fuse = False break else: fused_exprs.append(val) if can_fuse: return ops.Selection( root_table, fused_exprs, predicates=root.predicates, sort_keys=root.sort_keys, ) return None