def test_nested_select_statement(self): parser = Parser() sub_query = """SELECT CLASS FROM TAIPAI WHERE CLASS = 'VAN'""" nested_query = """SELECT ID FROM ({});""".format(sub_query) parsed_sub_query = parser.parse(sub_query)[0] actual_stmt = parser.parse(nested_query)[0] self.assertEqual(actual_stmt.stmt_type, StatementType.SELECT) self.assertEqual(actual_stmt.target_list[0].col_name, 'ID') self.assertEqual(actual_stmt.from_table, TableRef(parsed_sub_query)) sub_query = """SELECT Yolo(frame).bbox FROM autonomous_vehicle_1 WHERE Yolo(frame).label = 'vehicle'""" nested_query = """SELECT Licence_plate(bbox) FROM ({}) WHERE Is_suspicious(bbox) = 1 AND Licence_plate(bbox) = '12345'; """.format(sub_query) query = """SELECT Licence_plate(bbox) FROM TAIPAI WHERE Is_suspicious(bbox) = 1 AND Licence_plate(bbox) = '12345'; """ query_stmt = parser.parse(query)[0] actual_stmt = parser.parse(nested_query)[0] sub_query_stmt = parser.parse(sub_query)[0] self.assertEqual(actual_stmt.from_table, TableRef(sub_query_stmt)) self.assertEqual(actual_stmt.where_clause, query_stmt.where_clause) self.assertEqual(actual_stmt.target_list, query_stmt.target_list)
def visitInsertStatement(self, ctx: evaql_parser.InsertStatementContext): table_ref = None column_list = [] value_list = [] # first two children with be INSERT INTO # Then we will have terminal nodes for '(', ')' for child in ctx.children[2:]: if not isinstance(child, TerminalNode): try: rule_idx = child.getRuleIndex() if rule_idx == evaql_parser.RULE_tableName: table_ref = TableRef(self.visit(ctx.tableName())) elif rule_idx == evaql_parser.RULE_uidList: column_list = self.visit(ctx.uidList()) elif rule_idx == evaql_parser.RULE_insertStatementValue: insrt_value = self.visit(ctx.insertStatementValue()) # Support only (value1, value2, .... value n) value_list = insrt_value[0] except BaseException: # stop parsing something bad happened return None insert_stmt = InsertTableStatement(table_ref, column_list, value_list) return insert_stmt
def test_visit_table_ref_populates_column_mapping(self, mock, mock_lget): converter = StatementToPlanConvertor() converter._populate_column_map = MagicMock() table_ref = TableRef(TableInfo("test")) converter.visit_table_ref(table_ref) converter._populate_column_map.assert_called_with(mock.return_value)
def visitTableSourceItemWithSample( self, ctx: evaql_parser.TableSourceItemWithSampleContext): sample_freq = None table = self.visit(ctx.tableSourceItem()) if ctx.sampleClause(): sample_freq = self.visit(ctx.sampleClause()) return TableRef(table, sample_freq)
def visitTableName(self, ctx: evaql_parser.TableNameContext): table_name = self.visit(ctx.fullId()) if table_name is not None: table_info = TableInfo(table_name=table_name) return TableRef(table_info) else: warnings.warn("Invalid from table", SyntaxWarning)
def test_should_visit_select_if_nested_query(self, mock_p, mock_c, mock_d): m = MagicMock() mock_p.return_value = mock_c.return_value = mock_d.return_value = m stmt = Parser().parse(""" SELECT id FROM (SELECT data, id FROM video \ WHERE data > 2) WHERE id>3;""")[0] converter = StatementToPlanConvertor() actual_plan = converter.visit(stmt) plans = [LogicalProject([TupleValueExpression('id')])] plans.append( LogicalFilter( ComparisonExpression(ExpressionType.COMPARE_GREATER, TupleValueExpression('id'), ConstantValueExpression(3)))) plans.append(LogicalQueryDerivedGet()) plans.append( LogicalProject( [TupleValueExpression('data'), TupleValueExpression('id')])) plans.append( LogicalFilter( ComparisonExpression(ExpressionType.COMPARE_GREATER, TupleValueExpression('data'), ConstantValueExpression(2)))) plans.append(LogicalGet(TableRef(TableInfo('video')), m)) expected_plan = None for plan in reversed(plans): if expected_plan: plan.append_child(expected_plan) expected_plan = plan self.assertEqual(expected_plan, actual_plan) wrong_plan = plans[0] for plan in plans[1:]: wrong_plan.append_child(plan) self.assertNotEqual(wrong_plan, actual_plan)
def test_should_insert_row_in_table(self): dummy_info = TableInfo('MyVideo') dummy_table = TableRef(dummy_info) columns = [ DataFrameColumn('Frame_ID', ColumnType.INTEGER), DataFrameColumn('Frame_Path', ColumnType.TEXT, array_dimensions=50) ] plan_node = CreatePlan(dummy_table, columns, False) createExec = CreateExecutor(plan_node) url = createExec.exec() parser = Parser() insert_query = """INSERT INTO MyVideo (Frame_ID, Frame_Path) VALUES (1, '/mnt/frames/1.png'); """ eva_statement_list = parser.parse(insert_query) insert_stmt = eva_statement_list[0] convertor = StatementToPlanConvertor() convertor.visit(insert_stmt) logical_plan_node = convertor.plan print("logical", logical_plan_node) phy_plan_node = InsertPlan(logical_plan_node.video_catalog_id, logical_plan_node.column_list, logical_plan_node.value_list) insertExec = InsertExecutor(phy_plan_node) insertExec.exec() # test if we have a added the in our storage df = load_dataframe(url) self.assertEqual(df.collect()[0][0], 1) self.assertEqual(df.collect()[0][1], "'/mnt/frames/1.png'")
def test_visit_table_ref_should_create_logical_get_opr( self, mock, mock_lget): converter = StatementToPlanConvertor() table_ref = TableRef(TableInfo("test")) converter.visit_table_ref(table_ref) mock.assert_called_with(table_ref.table_info) mock_lget.assert_called_with(table_ref, mock.return_value) self.assertEqual(mock_lget.return_value, converter._plan)
def test_visit_load_data_when_bind_returns_None(self, mock_create, mock_bind, mock_load): mock_bind.return_value = None table_ref = TableRef(TableInfo("test")) stmt = MagicMock(table=table_ref, path='path') StatementToPlanConvertor().visit_load_data(stmt) mock_create.assert_called_once_with(table_ref.table_info.table_name) mock_bind.assert_called_with(table_ref.table_info) mock_load.assert_called_with(mock_create.return_value, 'path')
def test_should_visit_select_union_if_union_query(self, mock_p, mock_c, mock_d): m = MagicMock() mock_p.return_value = mock_c.return_value = mock_d.return_value = m stmt = Parser().parse(""" SELECT id FROM video WHERE id>3 UNION ALL SELECT id FROM video WHERE id<=3;""")[0] converter = StatementToPlanConvertor() actual_plan = converter.visit(stmt) left_plans = [LogicalProject([TupleValueExpression('id')])] left_plans.append( LogicalFilter( ComparisonExpression(ExpressionType.COMPARE_GREATER, TupleValueExpression('id'), ConstantValueExpression(3)))) left_plans.append(LogicalGet(TableRef(TableInfo('video')), m)) def reverse_plan(plans): return_plan = None for plan in reversed(plans): if return_plan: plan.append_child(return_plan) return_plan = plan return return_plan expect_left_plan = reverse_plan(left_plans) right_plans = [LogicalProject([TupleValueExpression('id')])] right_plans.append( LogicalFilter( ComparisonExpression(ExpressionType.COMPARE_LEQ, TupleValueExpression('id'), ConstantValueExpression(3)))) right_plans.append(LogicalGet(TableRef(TableInfo('video')), m)) expect_right_plan = reverse_plan(right_plans) expected_plan = LogicalUnion(True) expected_plan.append_child(expect_right_plan) expected_plan.append_child(expect_left_plan) self.assertEqual(expected_plan, actual_plan)
def test_load_data_statement(self): parser = Parser() load_data_query = """LOAD DATA INFILE 'data/video.mp4' INTO MyVideo;""" expected_stmt = LoadDataStatement(TableRef(TableInfo('MyVideo')), Path('data/video.mp4')) eva_statement_list = parser.parse(load_data_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.LOAD_DATA) load_data_stmt = eva_statement_list[0] self.assertEqual(load_data_stmt, expected_stmt)
def test_table_ref(self): ''' Testing table info in TableRef Class: TableInfo ''' table_info = TableInfo('TAIPAI', 'Schema', 'Database') table_ref_obj = TableRef(table_info) select_stmt_new = SelectStatement() select_stmt_new.from_table = table_ref_obj self.assertEqual(select_stmt_new.from_table.table_info.table_name, 'TAIPAI') self.assertEqual(select_stmt_new.from_table.table_info.schema_name, 'Schema') self.assertEqual(select_stmt_new.from_table.table_info.database_name, 'Database')
def test_create_plan(self): dummy_info = TableInfo('dummy') dummy_table = TableRef(dummy_info) CatalogManager().reset() columns = [DataFrameColumn('id', ColumnType.INTEGER), DataFrameColumn('name', ColumnType.TEXT, array_dimensions=50)] dummy_plan_node = CreatePlan(dummy_table, columns, False) self.assertEqual(dummy_plan_node.opr_type, PlanOprType.CREATE) self.assertEqual(dummy_plan_node.if_not_exists, False) self.assertEqual(dummy_plan_node.video_ref.table.table_name, "dummy") self.assertEqual(dummy_plan_node.column_list[0].name, "id") self.assertEqual(dummy_plan_node.column_list[1].name, "name")
def test_should_return_false_for_unequal_expression(self): table = TableRef(TableInfo('MyVideo')) load_stmt = LoadDataStatement(table, Path('data/video.mp4')) insert_stmt = InsertTableStatement(table) create_udf = CreateUDFStatement('udf', False, [ ColumnDefinition('frame', ParserColumnDataType.NDARRAY, [3, 256, 256]) ], [ColumnDefinition('labels', ParserColumnDataType.NDARRAY, [10])], Path('data/fastrcnn.py'), 'Classification') select_stmt = SelectStatement() self.assertNotEqual(load_stmt, insert_stmt) self.assertNotEqual(insert_stmt, load_stmt) self.assertNotEqual(create_udf, insert_stmt) self.assertNotEqual(select_stmt, create_udf)
def test_should_return_false_for_unequal_plans(self): create_plan = LogicalCreate(TableRef(TableInfo('video')), [MagicMock()]) create_udf_plan = LogicalCreateUDF('udf', False, None, None, None) insert_plan = LogicalInsert(MagicMock(), 0, [MagicMock()], [MagicMock()]) query_derived_plan = LogicalQueryDerivedGet() load_plan = LogicalLoadData(MagicMock(), MagicMock()) self.assertEqual(create_plan, create_plan) self.assertEqual(create_udf_plan, create_udf_plan) self.assertNotEqual(create_plan, create_udf_plan) self.assertNotEqual(create_udf_plan, create_plan) create_plan.append_child(create_udf_plan) self.assertNotEqual(create_plan, create_udf_plan) self.assertNotEqual(query_derived_plan, create_plan) self.assertNotEqual(insert_plan, query_derived_plan) self.assertNotEqual(load_plan, insert_plan)
def test_create_executor_should_create_table_in_storage(self): dummy_info = TableInfo('dummy') dummy_table = TableRef(dummy_info) columns = [ DataFrameColumn('id', ColumnType.INTEGER), DataFrameColumn('name', ColumnType.TEXT, array_dimensions=50) ] plan_node = CreatePlan(dummy_table, columns, False) createExec = CreateExecutor(plan_node) url = createExec.exec() # test if we have a table created in our storage df = load_dataframe(url) self.assertEqual(2, len(df.columns)) self.assertEqual(df.columns, ['id', 'name'])
def test_insert_statement(self): parser = Parser() insert_query = """INSERT INTO MyVideo (Frame_ID, Frame_Path) VALUES (1, '/mnt/frames/1.png'); """ expected_stmt = InsertTableStatement(TableRef(TableInfo('MyVideo')), [ TupleValueExpression('Frame_ID'), TupleValueExpression('Frame_Path') ], [ ConstantValueExpression(1), ConstantValueExpression('/mnt/frames/1.png') ]) eva_statement_list = parser.parse(insert_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.INSERT) insert_stmt = eva_statement_list[0] self.assertEqual(insert_stmt, expected_stmt)
def visit_table_ref(self, table_ref: TableRef): """Bind table ref object and convert to Logical get operator Arguments: table {TableRef} -- [Input table ref object created by the parser] """ if table_ref.is_select(): # NestedQuery self.visit_select(table_ref.table) child_plan = self._plan self._plan = LogicalQueryDerivedGet() self._plan.append_child(child_plan) else: # Table catalog_vid_metadata = bind_dataset(table_ref.table) self._populate_column_map(catalog_vid_metadata) self._plan = LogicalGet(table_ref, catalog_vid_metadata) if table_ref.sample_freq: self._visit_sample(table_ref.sample_freq)
def test_visit_load_statement(self, mock_load, mock_visit): ctx = MagicMock() table = 'myVideo' path = MagicMock() path.value = 'video.mp4' params = { ctx.fileName.return_value: path, ctx.tableName.return_value: table } def side_effect(arg): return params[arg] mock_visit.side_effect = side_effect visitor = ParserVisitor() visitor.visitLoadStatement(ctx) mock_visit.assert_has_calls( [call(ctx.fileName()), call(ctx.tableName())]) mock_load.assert_called_once() mock_load.assert_called_with(TableRef('myVideo'), 'video.mp4')
def test_visit_select_orderby(self, mock_p, mock_c, mock_d): m = MagicMock() mock_p.return_value = mock_c.return_value = mock_d.return_value = m stmt = Parser().parse(""" SELECT data, id FROM video \ WHERE data > 2 ORDER BY data, id DESC;""")[0] converter = StatementToPlanConvertor() actual_plan = converter.visit(stmt) plans = [] plans.append( LogicalOrderBy([ (TupleValueExpression('data'), ParserOrderBySortType.ASC), (TupleValueExpression('id'), ParserOrderBySortType.DESC) ])) plans.append( LogicalProject( [TupleValueExpression('data'), TupleValueExpression('id')])) plans.append( LogicalFilter( ComparisonExpression(ExpressionType.COMPARE_GREATER, TupleValueExpression('data'), ConstantValueExpression(2)))) plans.append(LogicalGet(TableRef(TableInfo('video')), m)) expected_plan = None for plan in reversed(plans): if expected_plan: plan.append_child(expected_plan) expected_plan = plan self.assertEqual(expected_plan, actual_plan) wrong_plan = plans[0] for plan in plans[1:]: wrong_plan.append_child(plan) self.assertNotEqual(wrong_plan, actual_plan)
def test_visit_select_sample(self, mock_p, mock_c, mock_d): m = MagicMock() mock_p.return_value = mock_c.return_value = mock_d.return_value = m stmt = Parser().parse(""" SELECT data, id FROM video SAMPLE 2 \ WHERE id > 2 LIMIT 3;""")[0] converter = StatementToPlanConvertor() actual_plan = converter.visit(stmt) plans = [] plans.append(LogicalLimit(ConstantValueExpression(3))) plans.append( LogicalProject( [TupleValueExpression('data'), TupleValueExpression('id')])) plans.append( LogicalFilter( ComparisonExpression(ExpressionType.COMPARE_GREATER, TupleValueExpression('id'), ConstantValueExpression(2)))) plans.append(LogicalSample(ConstantValueExpression(2))) plans.append( LogicalGet( TableRef(TableInfo('video'), ConstantValueExpression(2)), m)) expected_plan = None for plan in reversed(plans): if expected_plan: plan.append_child(expected_plan) expected_plan = plan self.assertEqual(expected_plan, actual_plan)
def visitLoadStatement(self, ctx: evaql_parser.LoadStatementContext): file_path = self.visit(ctx.fileName()).value table = TableRef(self.visit(ctx.tableName())) stmt = LoadDataStatement(table, file_path) return stmt