def test_logical_or(self): const_exp1 = ConstantValueExpression(1) const_exp2 = ConstantValueExpression(1) comparison_expression_left = ComparisonExpression( ExpressionType.COMPARE_EQUAL, const_exp1, const_exp2 ) const_exp1 = ConstantValueExpression(1) const_exp2 = ConstantValueExpression(2) comparison_expression_right = ComparisonExpression( ExpressionType.COMPARE_GREATER, const_exp1, const_exp2 ) logical_expr = LogicalExpression( ExpressionType.LOGICAL_OR, comparison_expression_left, comparison_expression_right ) self.assertEqual( [True], logical_expr.evaluate(None).frames[0].tolist() )
def test_comparison_compare_lesser(self): const_exp1 = ConstantValueExpression(0) const_exp2 = ConstantValueExpression(2) cmpr_exp = ComparisonExpression(ExpressionType.COMPARE_LESSER, const_exp1, const_exp2) self.assertEqual([True], cmpr_exp.evaluate(None).frames[0].tolist())
def test_subtraction(self): const_exp1 = ConstantValueExpression(5) const_exp2 = ConstantValueExpression(2) cmpr_exp = ArithmeticExpression(ExpressionType.ARITHMETIC_SUBTRACT, const_exp1, const_exp2) self.assertEqual([3], cmpr_exp.evaluate(None).frames[0].tolist())
def test_multiply(self): const_exp1 = ConstantValueExpression(3) const_exp2 = ConstantValueExpression(5) cmpr_exp = ArithmeticExpression(ExpressionType.ARITHMETIC_MULTIPLY, const_exp1, const_exp2) self.assertEqual([15], cmpr_exp.evaluate(None).frames[0].tolist())
def test_divide(self): const_exp1 = ConstantValueExpression(5) const_exp2 = ConstantValueExpression(5) cmpr_exp = ArithmeticExpression(ExpressionType.ARITHMETIC_DIVIDE, const_exp1, const_exp2) self.assertEqual([1], cmpr_exp.evaluate(None).frames[0].tolist())
def visitConstant(self, ctx: evaql_parser.ConstantContext): if ctx.REAL_LITERAL() is not None: return ConstantValueExpression(float(ctx.getText()), ColumnType.FLOAT) if ctx.decimalLiteral() is not None: return ConstantValueExpression(self.visit(ctx.decimalLiteral()), ColumnType.INTEGER) return self.visitChildren(ctx)
def test_comparison_compare_contains(self): const_exp1 = ConstantValueExpression([1, 2], ColumnType.NDARRAY) const_exp2 = ConstantValueExpression([1, 5], ColumnType.NDARRAY) const_exp3 = ConstantValueExpression([1, 2, 3, 4], ColumnType.NDARRAY) cmpr_exp1 = ComparisonExpression(ExpressionType.COMPARE_CONTAINS, const_exp3, const_exp1) self.assertEqual([True], cmpr_exp1.evaluate(None).frames[0].tolist()) cmpr_exp2 = ComparisonExpression(ExpressionType.COMPARE_CONTAINS, const_exp3, const_exp2) self.assertEqual([False], cmpr_exp2.evaluate(None).frames[0].tolist())
def test_comparison_compare_geq(self): const_exp1 = ConstantValueExpression(1) const_exp2 = ConstantValueExpression(1) const_exp3 = ConstantValueExpression(0) cmpr_exp1 = ComparisonExpression(ExpressionType.COMPARE_GEQ, const_exp1, const_exp2) cmpr_exp2 = ComparisonExpression(ExpressionType.COMPARE_GEQ, const_exp1, const_exp3) # checking equal self.assertEqual([True], cmpr_exp1.evaluate(None).frames[0].tolist()) # checking greater equal self.assertEqual([True], cmpr_exp2.evaluate(None).frames[0].tolist())
def test_multiple_join_with_multiple_ON(self): select_query = '''SELECT table1.a FROM table1 JOIN table2 ON table1.a = table2.a JOIN table3 ON table3.a = table1.a WHERE table1.a <= 5''' parser = Parser() select_stmt = parser.parse(select_query)[0] table1_col_a = TupleValueExpression('a', 'table1') table2_col_a = TupleValueExpression('a', 'table2') table3_col_a = TupleValueExpression('a', 'table3') select_list = [table1_col_a] child_join = TableRef( JoinNode(TableRef(TableInfo('table1')), TableRef(TableInfo('table2')), predicate=ComparisonExpression( ExpressionType.COMPARE_EQUAL, table1_col_a, table2_col_a), join_type=JoinType.INNER_JOIN)) from_table = TableRef( JoinNode(child_join, TableRef(TableInfo('table3')), predicate=ComparisonExpression( ExpressionType.COMPARE_EQUAL, table3_col_a, table1_col_a), join_type=JoinType.INNER_JOIN)) where_clause = ComparisonExpression(ExpressionType.COMPARE_LEQ, table1_col_a, ConstantValueExpression(5)) expected_stmt = SelectStatement(select_list, from_table, where_clause) self.assertEqual(select_stmt, expected_stmt)
def test_select_statement_sample_class(self): '''Testing sample frequency ''' parser = Parser() select_query = "SELECT CLASS, REDNESS FROM TAIPAI SAMPLE 5;" eva_statement_list = parser.parse(select_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.SELECT) select_stmt = eva_statement_list[0] # target List self.assertIsNotNone(select_stmt.target_list) self.assertEqual(len(select_stmt.target_list), 2) self.assertEqual(select_stmt.target_list[0].etype, ExpressionType.TUPLE_VALUE) self.assertEqual(select_stmt.target_list[1].etype, ExpressionType.TUPLE_VALUE) # from_table self.assertIsNotNone(select_stmt.from_table) self.assertIsInstance(select_stmt.from_table, TableRef) self.assertEqual(select_stmt.from_table.table.table_name, 'TAIPAI') # sample_freq self.assertEqual(select_stmt.from_table.sample_freq, ConstantValueExpression(5))
def test_should_return_smaller_num_rows(self): dfs = [ pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) for _ in range(4) ] batches = [Batch(frames=df) for df in dfs] sample_value = 3 plan = SamplePlan(ConstantValueExpression(sample_value)) sample_executor = SampleExecutor(plan) sample_executor.append_child(DummyExecutor(batches)) reduced_batches = list(sample_executor.exec()) original = Batch.concat(batches) filter = range(0, len(original), sample_value) original = original._get_frames_from_indices(filter) original = Batch.concat([original]) reduced = Batch.concat(reduced_batches) self.assertEqual(len(original), len(reduced)) self.assertEqual(original, reduced)
def test_should_return_limit_greater_than_size(self): """ This should return the exact same data if the limit value is greater than what is present. This will also leave a warning """ dfs = [ pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) for _ in range(4) ] batches = [Batch(frames=df) for df in dfs] previous_total_size = 0 for batch in batches: previous_total_size += batch.batch_size limit_value = 500 plan = LimitPlan(ConstantValueExpression(limit_value)) limit_executor = LimitExecutor(plan) limit_executor.append_child(DummyExecutor(batches)) reduced_batches = list(limit_executor.exec()) after_total_size = 0 for batch in reduced_batches: after_total_size += batch.batch_size self.assertEqual(previous_total_size, after_total_size)
def test_if_expr_tree_is_equal(self): const_exp1 = ConstantValueExpression(0) const_exp2 = ConstantValueExpression(0) columnName1 = TupleValueExpression(col_name='DATA') columnName2 = TupleValueExpression(col_name='DATA') aggr_expr1 = AggregationExpression(ExpressionType.AGGREGATION_AVG, None, columnName1) aggr_expr2 = AggregationExpression(ExpressionType.AGGREGATION_AVG, None, columnName2) cmpr_exp1 = ComparisonExpression(ExpressionType.COMPARE_NEQ, aggr_expr1, const_exp1) cmpr_exp2 = ComparisonExpression(ExpressionType.COMPARE_NEQ, aggr_expr2, const_exp2) self.assertEqual(cmpr_exp1, cmpr_exp2)
def test_should_return_top_frames_after_sorting(self): """ Checks if limit returns the top 2 rows from the data after sorting data (3 batches): 'A' 'B' 'C' [1, 1, 1] ---------- [1, 5, 6] [4, 7, 10] ---------- [2, 9, 7] [4, 1, 2] [4, 2, 4] """ df1 = pd.DataFrame(np.array([[1, 1, 1]]), columns=['A', 'B', 'C']) df2 = pd.DataFrame(np.array([[1, 5, 6], [4, 7, 10]]), columns=['A', 'B', 'C']) df3 = pd.DataFrame(np.array([[2, 9, 7], [4, 1, 2], [4, 2, 4]]), columns=['A', 'B', 'C']) batches = [Batch(frames=df) for df in [df1, df2, df3]] "query: .... ORDER BY A ASC, B DESC limit 2" plan = OrderByPlan([ (TupleValueExpression(col_alias='A'), ParserOrderBySortType.ASC), (TupleValueExpression(col_alias='B'), ParserOrderBySortType.DESC) ]) orderby_executor = OrderByExecutor(plan) orderby_executor.append_child(DummyExecutor(batches)) sorted_batches = list(orderby_executor.exec()) limit_value = 2 plan = LimitPlan(ConstantValueExpression(limit_value)) limit_executor = LimitExecutor(plan) limit_executor.append_child(DummyExecutor(sorted_batches)) reduced_batches = list(limit_executor.exec()) # merge everything into one batch aggregated_batch = Batch.concat(reduced_batches, copy=False) """ A B C 0 1 5 6 1 1 1 1 """ expected_df1 = pd.DataFrame(np.array([[1, 5, 6], [1, 1, 1]]), columns=['A', 'B', 'C']) expected_batches = [Batch(frames=df) for df in [expected_df1]] self.assertEqual(expected_batches[0], aggregated_batch)
def visitStringLiteral(self, ctx: evaql_parser.StringLiteralContext): # Fix a bug here; 'VAN' Literal gets converted to "'VAN'"; # Multiple quotes should be removed if ctx.STRING_LITERAL() is not None: return ConstantValueExpression(ctx.getText()[1:-1], ColumnType.TEXT) # todo handle other types return self.visitChildren(ctx)
def test_insert_statement(self): parser = Parser() insert_query = """INSERT INTO MyVideo (Frame_ID, Frame_Path) VALUES (1, '/mnt/frames/1.png'); """ expected_stmt = InsertTableStatement(TableRef(TableInfo('MyVideo')), [ TupleValueExpression('Frame_ID'), TupleValueExpression('Frame_Path') ], [ ConstantValueExpression(1), ConstantValueExpression('/mnt/frames/1.png', ColumnType.TEXT) ]) eva_statement_list = parser.parse(insert_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.INSERT) insert_stmt = eva_statement_list[0] self.assertEqual(insert_stmt, expected_stmt)
def test_should_return_false_for_unequal_expressions(self): const_exp1 = ConstantValueExpression(0) const_exp2 = ConstantValueExpression(1) func_expr = FunctionExpression(lambda x: x + 1, name='test') cmpr_exp = ComparisonExpression(ExpressionType.COMPARE_NEQ, const_exp1, const_exp2) tuple_expr = TupleValueExpression(col_name='id') aggr_expr = AggregationExpression(ExpressionType.AGGREGATION_MAX, None, tuple_expr) logical_expr = LogicalExpression(ExpressionType.LOGICAL_OR, cmpr_exp, cmpr_exp) self.assertNotEqual(const_exp1, const_exp2) self.assertNotEqual(cmpr_exp, const_exp1) self.assertNotEqual(func_expr, cmpr_exp) self.assertNotEqual(tuple_expr, aggr_expr) self.assertNotEqual(aggr_expr, tuple_expr) self.assertNotEqual(tuple_expr, cmpr_exp) self.assertNotEqual(logical_expr, cmpr_exp)
def test_select_statement_limit_class(self): '''Testing limit clause in select statement Class: SelectStatement''' parser = Parser() select_query = "SELECT CLASS, REDNESS FROM TAIPAI \ WHERE (CLASS = 'VAN' AND REDNESS < 400 ) OR REDNESS > 700 \ ORDER BY CLASS, REDNESS DESC LIMIT 3;" eva_statement_list = parser.parse(select_query) self.assertIsInstance(eva_statement_list, list) self.assertEqual(len(eva_statement_list), 1) self.assertEqual(eva_statement_list[0].stmt_type, StatementType.SELECT) select_stmt = eva_statement_list[0] # target List self.assertIsNotNone(select_stmt.target_list) self.assertEqual(len(select_stmt.target_list), 2) self.assertEqual(select_stmt.target_list[0].etype, ExpressionType.TUPLE_VALUE) self.assertEqual(select_stmt.target_list[1].etype, ExpressionType.TUPLE_VALUE) # from_table self.assertIsNotNone(select_stmt.from_table) self.assertIsInstance(select_stmt.from_table, TableRef) self.assertEqual(select_stmt.from_table.table.table_name, 'TAIPAI') # where_clause self.assertIsNotNone(select_stmt.where_clause) # orderby_clause self.assertIsNotNone(select_stmt.orderby_list) self.assertEqual(len(select_stmt.orderby_list), 2) self.assertEqual(select_stmt.orderby_list[0][0].col_name, 'CLASS') self.assertEqual(select_stmt.orderby_list[0][1], ParserOrderBySortType.ASC) self.assertEqual(select_stmt.orderby_list[1][0].col_name, 'REDNESS') self.assertEqual(select_stmt.orderby_list[1][1], ParserOrderBySortType.DESC) # limit_count self.assertIsNotNone(select_stmt.limit_count) self.assertEqual(select_stmt.limit_count, ConstantValueExpression(3))
def test_should_return_smaller_num_rows(self): dfs = [ pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) for _ in range(4) ] batches = [Batch(frames=df) for df in dfs] limit_value = 125 plan = LimitPlan(ConstantValueExpression(limit_value)) limit_executor = LimitExecutor(plan) limit_executor.append_child(DummyExecutor(batches)) reduced_batches = list(limit_executor.exec()) total_size = 0 for batch in reduced_batches: total_size += batch.batch_size self.assertEqual(total_size, limit_value)
def visitSampleClause(self, ctx: evaql_parser.SampleClauseContext): return ConstantValueExpression(self.visitChildren(ctx))
def visitArrayLiteral(self, ctx: evaql_parser.ArrayLiteralContext): res = ConstantValueExpression( np.array(ast.literal_eval(ctx.getText())), ColumnType.NDARRAY) return res