def test_rewrite_join_projection_without_other_ops(self): # See #790, predicate pushdown in joins not supported # Star schema with fact table table = self.con.table('star1') table2 = self.con.table('star2') table3 = self.con.table('star3') filtered = table[table['f'] > 0] pred1 = table['foo_id'] == table2['foo_id'] pred2 = filtered['bar_id'] == table3['bar_id'] j1 = filtered.left_join(table2, [pred1]) j2 = j1.inner_join(table3, [pred2]) # Project out the desired fields view = j2[[filtered, table2['value1'], table3['value2']]] # Construct the thing we expect to obtain ex_pred2 = table['bar_id'] == table3['bar_id'] ex_expr = (table.left_join(table2, [pred1]) .inner_join(table3, [ex_pred2])) rewritten_proj = L.substitute_parents(view) op = rewritten_proj.op() assert not op.table.equals(ex_expr)
def test_rewrite_past_projection(self): table = self.con.table('test1') # Rewrite past a projection table3 = table[['c', 'f']] expr = table3['c'] == 2 result = L.substitute_parents(expr) expected = table['c'] == 2 assert_equal(result, expected) # Unsafe to rewrite past projection table5 = table[(table.f * 2).name('c'), table.f] expr = table5['c'] == 2 result = L.substitute_parents(expr) assert result is expr
def test_rewrite_past_projection(con): table = con.table('test1') # Rewrite past a projection table3 = table[['c', 'f']] expr = table3['c'] == 2 result = L.substitute_parents(expr) expected = table['c'] == 2 assert_equal(result, expected) # Unsafe to rewrite past projection table5 = table[(table.f * 2).name('c'), table.f] expr = table5['c'] == 2 result = L.substitute_parents(expr) assert result is expr
def test_rewrite_join_projection_without_other_ops(con): # See #790, predicate pushdown in joins not supported # Star schema with fact table table = con.table('star1') table2 = con.table('star2') table3 = con.table('star3') filtered = table[table['f'] > 0] pred1 = table['foo_id'] == table2['foo_id'] pred2 = filtered['bar_id'] == table3['bar_id'] j1 = filtered.left_join(table2, [pred1]) j2 = j1.inner_join(table3, [pred2]) # Project out the desired fields view = j2[[filtered, table2['value1'], table3['value2']]] # Construct the thing we expect to obtain ex_pred2 = table['bar_id'] == table3['bar_id'] ex_expr = (table.left_join(table2, [pred1]) .inner_join(table3, [ex_pred2])) rewritten_proj = L.substitute_parents(view) op = rewritten_proj.op() assert not op.table.equals(ex_expr)
def test_no_rewrite(con): table = con.table('test1') table4 = table[['c', (table['c'] * 2).name('foo')]] expr = table4['c'] == table4['foo'] result = L.substitute_parents(expr) expected = expr assert result.equals(expected)
def test_rewrite_join_projection_without_other_ops(self): # Drop out filters and other commutative table operations. Join # predicates are "lifted" to reference the base, unmodified join roots # Star schema with fact table table = self.con.table('star1') table2 = self.con.table('star2') table3 = self.con.table('star3') filtered = table[table['f'] > 0] pred1 = table['foo_id'] == table2['foo_id'] pred2 = filtered['bar_id'] == table3['bar_id'] j1 = filtered.left_join(table2, [pred1]) j2 = j1.inner_join(table3, [pred2]) # Project out the desired fields view = j2[[filtered, table2['value1'], table3['value2']]] # Construct the thing we expect to obtain ex_pred2 = table['bar_id'] == table3['bar_id'] ex_expr = (table.left_join(table2, [pred1]) .inner_join(table3, [ex_pred2])) rewritten_proj = L.substitute_parents(view) op = rewritten_proj.op() assert_equal(op.table, ex_expr) # Ensure that filtered table has been substituted with the base table assert op.selections[0] is table
def test_rewrite_join_projection_without_other_ops(self): # Drop out filters and other commutative table operations. Join # predicates are "lifted" to reference the base, unmodified join roots # Star schema with fact table table = self.con.table('star1') table2 = self.con.table('star2') table3 = self.con.table('star3') filtered = table[table['f'] > 0] pred1 = table['foo_id'] == table2['foo_id'] pred2 = filtered['bar_id'] == table3['bar_id'] j1 = filtered.left_join(table2, [pred1]) j2 = j1.inner_join(table3, [pred2]) # Project out the desired fields view = j2[[filtered, table2['value1'], table3['value2']]] # Construct the thing we expect to obtain ex_pred2 = table['bar_id'] == table3['bar_id'] ex_expr = (table.left_join(table2, [pred1]).inner_join(table3, [ex_pred2])) rewritten_proj = L.substitute_parents(view) op = rewritten_proj.op() assert_equal(op.table, ex_expr) # Ensure that filtered table has been substituted with the base table assert op.selections[0] is table
def _clean_predicates(self, predicates): import ibis.expr.analysis as L result = [] if not isinstance(predicates, (list, tuple)): predicates = [predicates] for pred in predicates: if isinstance(pred, tuple): if len(pred) != 2: raise com.ExpressionError('Join key tuple must be ' 'length 2') lk, rk = pred lk = self.left._ensure_expr(lk) rk = self.right._ensure_expr(rk) pred = lk == rk else: pred = L.substitute_parents(pred, past_projection=False) if not isinstance(pred, ir.BooleanArray): raise com.ExpressionError('Join predicate must be comparison') preds = L.unwrap_ands(pred) result.extend(preds) return result
def test_no_rewrite(con): table = con.table('test1') # Substitution not fully possible if we depend on a new expr in a # projection table4 = table[['c', (table['c'] * 2).name('foo')]] expr = table4['c'] == table4['foo'] result = L.substitute_parents(expr) expected = table['c'] == table4['foo'] assert_equal(result, expected)
def test_rewrite_expr_with_parent(self): table = self.con.table('test1') table2 = table[table['f'] > 0] expr = table2['c'] == 2 result = L.substitute_parents(expr) expected = table['c'] == 2 assert_equal(result, expected) # Substitution not fully possible if we depend on a new expr in a # projection table4 = table[['c', (table['c'] * 2).name('foo')]] expr = table4['c'] == table4['foo'] result = L.substitute_parents(expr) expected = table['c'] == table4['foo'] assert_equal(result, expected)
def test_rewrite_distinct_but_equal_objects(self): t = self.con.table('test1') t_copy = self.con.table('test1') table2 = t[t_copy['f'] > 0] expr = table2['c'] == 2 result = L.substitute_parents(expr) expected = t['c'] == 2 assert_equal(result, expected)
def _rewrite_exprs(self, what): from ibis.expr.analysis import substitute_parents what = util.promote_list(what) all_exprs = [] for expr in what: if isinstance(expr, ir.ExprList): all_exprs.extend(expr.exprs()) else: all_exprs.append(expr) return [substitute_parents(x, past_projection=False) for x in all_exprs]
def _rewrite_exprs(self, what): from ibis.expr.analysis import substitute_parents what = util.promote_list(what) all_exprs = [] for expr in what: if isinstance(expr, ir.ExprList): all_exprs.extend(expr.exprs()) else: all_exprs.append(expr) return [ substitute_parents(x, past_projection=False) for x in all_exprs ]
def _visit_filter_SummaryFilter(self, expr): # Top K is rewritten as an # - aggregation # - sort by # - limit # - left semi join with table set parent_op = expr.op() summary_expr = parent_op.args[0] op = summary_expr.op() rank_set = summary_expr.to_aggregation(backup_metric_name='__tmp__', parent_table=self.table_set) # GH #667; this may reference a filtered version of self.table_set arg = L.substitute_parents(op.arg) pred = (arg == getattr(rank_set, op.arg.get_name())) self.table_set = self.table_set.semi_join(rank_set, [pred]) return None
def _visit_filter_SummaryFilter(self, expr): # Top K is rewritten as an # - aggregation # - sort by # - limit # - left semi join with table set parent_op = expr.op() summary_expr = parent_op.args[0] op = summary_expr.op() rank_set = summary_expr.to_aggregation( backup_metric_name='__tmp__', parent_table=self.table_set) # GH #667; this may reference a filtered version of self.table_set arg = L.substitute_parents(op.arg) pred = (arg == getattr(rank_set, op.arg.get_name())) self.table_set = self.table_set.semi_join(rank_set, [pred]) return None
def _sub(self, what): return L.substitute_parents(what)
def _sub(self, what): if isinstance(what, list): return [L.substitute_parents(x, self.sub_memo) for x in what] else: return L.substitute_parents(what, self.sub_memo)