def convert(statement_list): if len(statement_list) > 1 or len(statement_list) == 0: print('statement list must be length 1 and it was len: {}'.format(len(statement_list))) else: statement = statement_list[0] # Need to Create the table and projection from_stuff = statement.from_table meta1 = VideoMetaInfo(file=from_stuff.table_info.table_name, c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) t1 = TableRef(video=video1, table_info=TableInfo(table_name=from_stuff.table_info.table_name)) # Creating projection (root) projection_output = [target.col_name for target in statement.target_list] root = LogicalProjectionPlan(videos=[video1], column_ids=projection_output, foreign_column_ids=[]) where_stuff = statement.where_clause print(where_stuff) # Need to create the sigma plan if where_stuff is not None: # Creating a select Node select_node = SeqScanPlan(predicate=where_stuff, column_ids=projection_output, videos=[video1], foreign_column_ids=[]) root.set_children(select_node) select_node.parent=root select_node.set_children([t1]) t1.parent=select_node else: root.set_children([t1]) t1.parent = root return root
def test_simple_predicate_pushdown(self, verbose=False): # Creating the videos meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) meta2 = VideoMetaInfo(file='v2', c_format=VideoFormat.MOV, fps=30) video2 = SimpleVideoLoader(video_metadata=meta2) projection_output = ['v1.1', 'v2.2'] root = LogicalProjectionPlan(videos=[video1, video2], column_ids=projection_output, foreign_column_ids=[]) # Creating Expression for Select: Expression is basically where v1.1 == 4 const = ConstantValueExpression(value=4) tup = TupleValueExpression( col_idx=int(projection_output[0].split('.')[1])) expression = ComparisonExpression( exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) # used both videos because purposely placed BEFORE the join s1 = SeqScanPlan(predicate=expression, column_ids=['v1.1'], videos=[video1, video2], foreign_column_ids=[]) s1.parent = root j1 = LogicalInnerJoinPlan(videos=[video1, video2], join_ids=['v1.3', 'v2.3']) j1.parent = s1 t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) t2 = TableRef(video=video2, table_info=TableInfo(table_name='v2')) s1.set_children([j1]) t1.parent = j1 t2.parent = j1 j1.set_children([t1, t2]) root.set_children([s1]) rule_list = [Rules.PREDICATE_PUSHDOWN] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) self.assertIsNone(root.parent, None) self.assertEqual(root.children, [j1]) self.assertEqual(j1.parent, root) self.assertEqual(j1.children, [s1, t2]) self.assertEqual(s1.parent, j1) self.assertEqual(s1.videos, [video1]) self.assertEqual(t2.parent, j1) self.assertEqual(s1.children, [t1]) self.assertEqual(t1.parent, s1)
def test_join_elimination(self, verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) meta2 = VideoMetaInfo(file='v2', c_format=VideoFormat.MOV, fps=30) video2 = SimpleVideoLoader(video_metadata=meta2) projection_output = ['v1.1', 'v2.2'] root = LogicalProjectionPlan(videos=[video1, video2], column_ids=projection_output, foreign_column_ids=['v2.2']) # Creating Expression for Select: Expression is basically where v1.1 == v2.2 # Also creating a foreign key constraint for v1 where it requires v2.2 # hence join elimination should delete the join node and just return all of v1.1 for select tup1 = TupleValueExpression(col_idx=1) tup2 = TupleValueExpression(col_idx=2) expression = ComparisonExpression( exp_type=ExpressionType.COMPARE_EQUAL, left=tup1, right=tup2) # used both videos because purposely placed BEFORE the join s1 = SeqScanPlan(predicate=expression, column_ids=['v1.1', 'v2.2'], videos=[video1, video2], foreign_column_ids=['v2.2']) s1.parent = root j1 = LogicalInnerJoinPlan(videos=[video1, video2], join_ids=['v1.1', 'v2.2']) j1.parent = s1 t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) t2 = TableRef(video=video2, table_info=TableInfo(table_name='v2')) t1.parent = j1 t2.parent = j1 root.set_children([s1]) s1.set_children([j1]) j1.set_children([t1, t2]) rule_list = [Rules.JOIN_ELIMINATION] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) self.assertIsNone(root.parent) self.assertEqual(type(t1.parent), SeqScanPlan) self.assertEqual(type(s1.children[0]), TableRef) self.assertEqual(len(s1.children), 1) self.assertEqual(len(s1.foreign_column_ids), 0) self.assertTrue('v1.2' in root.column_ids) self.assertEqual(len(root.column_ids), 2) self.assertEqual(len(root.foreign_column_ids), 0) self.assertEqual(type(root.children[0]), SeqScanPlan)
def test_shouldnot_simply_predicate(verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) # Creating Expression for Select: Expression is basically where v1.7 == 4 const = ConstantValueExpression(value=4) tup = TupleValueExpression(col_idx=int(7)) expression = ComparisonExpression(exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) s1 = SeqScanPlan(predicate=expression, column_ids=['v1.7'], videos=[video1], foreign_column_ids=[]) projection_output = ['v1.3', 'v1.4'] root = LogicalProjectionPlan(videos=[video1], column_ids=projection_output, foreign_column_ids=[]) t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) root.set_children([s1]) s1.parent = root s1.set_children([t1]) t1.parent = s1 rule_list = [Rules.SIMPLIFY_PREDICATE] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree)
def test_simple_projection_pushdown_select(verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) # Creating Expression for Select: Expression is basically where v2.7 == 4 const = ConstantValueExpression(value=4) tup = TupleValueExpression(col_idx=int(7)) expression = ComparisonExpression(exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) s1 = SeqScanPlan(predicate=expression, column_ids=['v1.7'], videos=[video1], foreign_column_ids=[]) projection_output = ['v1.3', 'v1.4'] root = LogicalProjectionPlan(videos=[video1], column_ids=projection_output, foreign_column_ids=[]) t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) root.set_children([s1]) s1.parent = root s1.set_children([t1]) t1.parent = s1 rule_list = [Rules.PROJECTION_PUSHDOWN_SELECT] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) assert root.parent is None assert root.children == [s1] assert s1.parent == root assert len(s1.children) == 1 assert type(s1.children[0]) == LogicalProjectionPlan assert 'v1.7' in s1.children[0].column_ids assert 'v1.3' in s1.children[0].column_ids assert 'v1.4' in s1.children[0].column_ids assert type(t1.parent) == LogicalProjectionPlan assert s1.children[0].children == [t1] print('Simple Projection Pushdown Select Test Successful!')
def test_should_simply_predicate(self, verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) # Creating Expression for Select: Expression is basically where 0==1 const1 = ConstantValueExpression(value=0) const2 = ConstantValueExpression(value=1) expression = ComparisonExpression( exp_type=ExpressionType.COMPARE_EQUAL, left=const1, right=const2) s1 = SeqScanPlan(predicate=expression, column_ids=[], videos=[], foreign_column_ids=[]) projection_output = ['v1.3', 'v1.4'] root = LogicalProjectionPlan(videos=[video1], column_ids=projection_output, foreign_column_ids=[]) t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) root.set_children([s1]) s1.parent = root s1.set_children([t1]) t1.parent = s1 rule_list = [Rules.SIMPLIFY_PREDICATE] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) self.assertIsNone(root.parent) self.assertEqual(root.children, [t1]) self.assertEqual(t1.parent, root) self.assertEqual(len(root.children), 1) self.assertTrue('v1.3' in root.children[0].column_ids) self.assertTrue('v1.4' in root.children[0].column_ids) self.assertEqual(type(t1.parent), LogicalProjectionPlan) self.assertEqual(root.children[0].children, [t1])
def transitive_closure(self, curnode, child_ix): """ Ensures precise cardinality estimation when same predicate is being applied to both tables Keyword Arguments: curnode -- The current node visited in the plan tree and is a type that inherits from the AbstractPlan type child_ix -- An integer that represents the index of the child in the curnode's child list :return: void """ child = curnode.children[child_ix] # checking if the current node is a comparison expression if type(curnode.predicate) == ComparisonExpression: const_idx = None col_tab_idx = None const_val = None if type(curnode.predicate.get_child(1)) == ConstantValueExpression: ##print ("R.H.S. Is constant val") const_idx = 1 col_tab_idx = 0 elif type(curnode.predicate.get_child(0)) == ConstantValueExpression: ##print ("L.H.S. is constant val") const_idx = 0 col_tab_idx = 1 # extracting the constant value from the predicate and table name and attribute const_val = curnode.predicate.get_child(const_idx).evaluate() selection_table = curnode.column_ids[0].split(".")[0] selection_column = curnode.column_ids[0].split(".")[1] # Now looking at the child join_cols = child.join_ids matched_join_idx = None for join_col_idx in range(len(join_cols)): if join_cols[join_col_idx] == curnode.column_ids[0]: # remembering which of all the join columns matched with the parent selection column matched_join_idx = join_col_idx # If the columns did not matched if matched_join_idx == None: print ("Not possible") return # checking supported types for grand child for gc_idx, gc in enumerate(child.children): if type(gc) == TableRef: jc_tabnames = set([gc.table_info.table_name]) vids = [gc.video] elif type(gc) == SeqScanPlan: jc_tabnames = set([attr.split('.')[0] for attr in gc.column_ids]) vids = gc.videos elif type(gc) == LogicalInnerJoinPlan: jc_tabnames = set([attr.split('.')[0] for attr in gc.join_ids]) vids = gc.videos else: print ("Grand child type is not suported") return # Now calculate the join columns that are from jc_tablenames select_cols = [] for c in join_cols: if c.split(".")[0] in jc_tabnames: select_cols.append(c) # For future improvement if multiple cols need to be added to selection, current implementation dont support that yet if len(select_cols) > 1: return if len(select_cols) == 0: continue selected_col = select_cols[0] const = ConstantValueExpression(value=const_val) tup = TupleValueExpression(col_idx=int(selected_col.split('.')[1])) expression = ComparisonExpression(exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) # using both videos as purposely place "before" the join s1 = SeqScanPlan(predicate=expression, column_ids=[selected_col], videos=vids, foreign_column_ids=[]) # parent of selection is join s1.parent = child child.children[gc_idx] = s1 s1.set_children([gc]) # parent of grand child is now the newly added selection gc.parent = s1 # modifying the parent pointers after addition of the selection child.parent = curnode.parent curnode.parent.set_children([child]) # setting the children and column ids child=curnode.children[child_ix] cur_col=curnode.column_ids
def test_transitive_closure(self, verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) meta2 = VideoMetaInfo(file='v2', c_format=VideoFormat.MOV, fps=30) video2 = SimpleVideoLoader(video_metadata=meta2) projection_output = ['v1.1', 'v1.2', 'v2.2'] root = LogicalProjectionPlan(videos=[video1, video2], column_ids=projection_output, foreign_column_ids=[]) # Creating Expression for Select: Expression is basically where v1.1 == 43 const = ConstantValueExpression(value=43) tup = TupleValueExpression( col_idx=int(projection_output[0].split('.')[1])) expression = ComparisonExpression( exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) s1 = SeqScanPlan( predicate=expression, column_ids=['v1.1'], videos=[video1], foreign_column_ids=[] ) # For later Check if videos should also contain video2 s1.parent = root root.set_children([s1]) # Declaring the join node j1 = LogicalInnerJoinPlan(videos=[video1, video2], join_ids=['v1.1', 'v2.1']) j1.parent = s1 s1.set_children([j1]) t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) t2 = TableRef(video=video2, table_info=TableInfo(table_name='v2')) # adding parent and children pointers t1.parent = j1 t2.parent = j1 j1.set_children([t1, t2]) rule_list = [Rules.TRANSITIVE_CLOSURE] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) self.assertIsNone(root.parent) self.assertTrue(root.children, [j1]) self.assertTrue(j1.parent, root) self.assertTrue(type(j1.parent), LogicalProjectionPlan) self.assertEqual(len(j1.children), 2) self.assertTrue(type(j1.children[0]), SeqScanPlan) self.assertTrue(type(j1.children[1]), SeqScanPlan) self.assertTrue(t1.parent in j1.children) self.assertTrue(t2.parent in j1.children)
def test_combined_projection_pushdown(self, verbose=False): # Creating the videos meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) meta2 = VideoMetaInfo(file='v2', c_format=VideoFormat.MOV, fps=30) video2 = SimpleVideoLoader(video_metadata=meta2) projection_output = ['v1.3', 'v2.4'] root = LogicalProjectionPlan(videos=[video1, video2], column_ids=projection_output, foreign_column_ids=[]) j1 = LogicalInnerJoinPlan(videos=[video1, video2], join_ids=['v1.1', 'v2.1']) j1.parent = root const = ConstantValueExpression(value=4) tup = TupleValueExpression(col_idx=int(7)) expression = ComparisonExpression( exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) s1 = SeqScanPlan(predicate=expression, column_ids=['v2.7'], videos=[video1], foreign_column_ids=[]) s1.parent = j1 t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) t2 = TableRef(video=video2, table_info=TableInfo(table_name='v2')) s1.set_children([t2]) t1.parent = j1 t2.parent = s1 j1.set_children([t1, s1]) root.set_children([j1]) rule_list = [ Rules.PROJECTION_PUSHDOWN_JOIN, Rules.PROJECTION_PUSHDOWN_SELECT ] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) self.assertIsNone(root.parent) self.assertEqual(root.children, [j1]) self.assertEqual(j1.parent, root) self.assertEqual(type(j1.children[0]), LogicalProjectionPlan) self.assertEqual(type(j1.children[1]), LogicalProjectionPlan) self.assertEqual(type(s1.parent), LogicalProjectionPlan) self.assertTrue('v2.1' in s1.parent.column_ids) self.assertTrue('v2.4' in s1.parent.column_ids) self.assertTrue(s1.parent in j1.children) self.assertEqual(len(s1.children), 1) self.assertEqual(type(s1.children[0]), LogicalProjectionPlan) self.assertTrue('v2.7' in s1.children[0].column_ids) self.assertTrue('v2.1' in s1.children[0].column_ids) self.assertTrue('v2.4' in s1.children[0].column_ids) self.assertEqual(type(t1.parent), LogicalProjectionPlan) self.assertTrue('v1.1' in t1.parent.column_ids) self.assertTrue('v1.3' in t1.parent.column_ids) self.assertTrue(t1.parent in j1.children) self.assertEqual(s1.children[0].children, [t2])
def test_double_join_predicate_pushdown(verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) meta2 = VideoMetaInfo(file='v2', c_format=VideoFormat.MOV, fps=30) video2 = SimpleVideoLoader(video_metadata=meta2) meta3 = VideoMetaInfo(file='v3', c_format=VideoFormat.MOV, fps=30) video3 = SimpleVideoLoader(video_metadata=meta3) projection_output = ['v1.1', 'v2.2', 'v3.4'] root = LogicalProjectionPlan(videos=[video1, video2, video3], column_ids=projection_output, foreign_column_ids=[]) # Creating Expression for Select: Expression is basically where v3.3 == 4 const = ConstantValueExpression(value=4) tup = TupleValueExpression(col_idx=int(projection_output[2].split('.')[1])) expression = ComparisonExpression(exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) # used both videos because purposely placed BEFORE the join s1 = SeqScanPlan(predicate=expression, column_ids=['v3.3'], videos=[video1, video2, video3], foreign_column_ids=[]) s1.parent = root j1 = LogicalInnerJoinPlan(videos=[video1, video2], join_ids=['v1.3', 'v2.3']) j2 = LogicalInnerJoinPlan(videos=[video1, video2, video3], join_ids=['v1.3', 'v2.3', 'v3.3']) j1.parent = j2 t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) t2 = TableRef(video=video2, table_info=TableInfo(table_name='v2')) t3 = TableRef(video=video3, table_info=TableInfo(table_name='v3')) s1.set_children([j2]) t1.parent = j1 t2.parent = j1 j2.set_children([j1, t3]) t3.parent = j2 j1.set_children([t1, t2]) root.set_children([s1]) rule_list = [Rules.PREDICATE_PUSHDOWN] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) assert root.parent is None assert len(root.children) == 1 assert root.children[0].parent == root assert j2.parent == root assert len(j2.children) == 2 assert j2.children[0] == j1 assert j2.children[1] == s1 assert s1.parent == j2 assert j1.parent == j2 assert len(s1.videos) == 1 assert s1.videos[0] == video3 assert len(s1.children) == 1 assert s1.children[0] == t3 assert t3.parent == s1 assert len(j1.children) == 2 assert j1.children[0] == t1 assert j1.children[1] == t2 assert t1.parent == j1 assert t2.parent == j1 print('Double join predicate Pushdown Successful!')
def test_both_projection_pushdown_and_predicate_pushdown(verbose=False): meta1 = VideoMetaInfo(file='v1', c_format=VideoFormat.MOV, fps=30) video1 = SimpleVideoLoader(video_metadata=meta1) meta2 = VideoMetaInfo(file='v2', c_format=VideoFormat.MOV, fps=30) video2 = SimpleVideoLoader(video_metadata=meta2) projection_output = ['v1.1', 'v2.2'] root = LogicalProjectionPlan(videos=[video1, video2], column_ids=projection_output, foreign_column_ids=[]) # Creating Expression for Select: Expression is basically where v1.1 == 4 const = ConstantValueExpression(value=4) tup = TupleValueExpression(col_idx=int(projection_output[0].split('.')[1])) expression = ComparisonExpression(exp_type=ExpressionType.COMPARE_EQUAL, left=tup, right=const) # used both videos because purposely placed BEFORE the join s1 = SeqScanPlan(predicate=expression, column_ids=['v1.1'], videos=[video1, video2], foreign_column_ids=[]) s1.parent = root j1 = LogicalInnerJoinPlan(videos=[video1, video2], join_ids=['v1.3', 'v2.3']) j1.parent = s1 t1 = TableRef(video=video1, table_info=TableInfo(table_name='v1')) t2 = TableRef(video=video2, table_info=TableInfo(table_name='v2')) s1.set_children([j1]) t1.parent = j1 t2.parent = j1 j1.set_children([t1, t2]) root.set_children([s1]) rule_list = [ Rules.PREDICATE_PUSHDOWN, Rules.PROJECTION_PUSHDOWN_JOIN, Rules.PROJECTION_PUSHDOWN_SELECT ] if verbose: print('Original Plan Tree') print(root) qo = RuleQueryOptimizer() new_tree = qo.run(root, rule_list) if verbose: print('New Plan Tree') print(new_tree) assert root.parent is None assert root.children == [j1] assert j1.parent == root assert len(j1.children) == 2 assert s1 in j1.children assert s1.parent == j1 assert s1.videos == [video1] assert len(s1.children) == 1 assert type(s1.children[0]) == LogicalProjectionPlan assert 'v1.1' in s1.children[0].column_ids assert 'v1.3' in s1.children[0].column_ids assert s1.children[0].children == [t1] assert t1.parent == s1.children[0] s1_ix = j1.children.index(s1) if s1_ix == 0: proj_ix = 1 else: proj_ix = 0 assert type(j1.children[proj_ix]) == LogicalProjectionPlan assert j1.children[proj_ix].parent == j1 assert 'v2.3' in j1.children[proj_ix].column_ids assert 'v2.2' in j1.children[proj_ix].column_ids assert t2.parent == j1.children[proj_ix] print( 'Combined Projection Pushdown and Predicate Pushdown Test Successful!')