def test_set(self, dialect): sql = "set var1 = NULL, var2 = 10" ast = parse_sql(sql, dialect=dialect) expected_ast = Set(arg=Tuple(items=[ BinaryOperation('=', args=[Identifier('var1'), NullConstant()]), BinaryOperation('=', args=[Identifier('var2'), Constant(10)]), ])) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast) sql = "SET NAMES some_name collate default" ast = parse_sql(sql, dialect=dialect) expected_ast = Set(category="names", arg=Identifier('some_name'), params={'COLLATE': 'DEFAULT'}) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast) sql = "SET NAMES some_name collate 'utf8mb4_general_ci'" ast = parse_sql(sql, dialect=dialect) expected_ast = Set(category="names", arg=Identifier('some_name'), params={'COLLATE': Constant('utf8mb4_general_ci')}) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_window_function(self, dialect): query = "select SUM(col0) OVER (PARTITION BY col1 order by col2) as al from table1 " expected_ast = Select( targets=[ WindowFunction( function=Function(op='sum', args=[Identifier('col0')]), partition=[Identifier('col1')], order_by=[OrderBy(field=Identifier('col2'))], alias=Identifier('al') ) ], from_table=Identifier('table1') ) ast = parse_sql(query, dialect=dialect) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() # no partition query = "select SUM(col0) OVER (order by col2) from table1 " expected_ast = Select( targets=[ WindowFunction( function=Function(op='sum', args=[Identifier('col0')]), order_by=[OrderBy(field=Identifier('col2'))], ) ], from_table=Identifier('table1') ) ast = parse_sql(query, dialect=dialect) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() # no order by query = "select SUM(col0) OVER (PARTITION BY col1) from table1 " expected_ast = Select( targets=[ WindowFunction( function=Function(op='sum', args=[Identifier('col0')]), partition=[Identifier('col1')], ) ], from_table=Identifier('table1') ) ast = parse_sql(query, dialect=dialect) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() # just over() query = "select SUM(col0) OVER () from table1 " expected_ast = Select( targets=[ WindowFunction( function=Function(op='sum', args=[Identifier('col0')]), ) ], from_table=Identifier('table1') ) ast = parse_sql(query, dialect=dialect) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_select_group_by(self, dialect): sql = f'SELECT column FROM tab WHERE column != 1 GROUP BY column1' ast = parse_sql(sql, dialect=dialect) assert str(ast).lower() == sql.lower() sql = f'SELECT column FROM tab WHERE column != 1 GROUP BY column1, column2' ast = parse_sql(sql, dialect=dialect) assert isinstance(ast, Select) assert len(ast.targets) == 1 assert isinstance(ast.targets[0], Identifier) assert ast.targets[0].parts[0] == 'column' assert isinstance(ast.from_table, Identifier) assert ast.from_table.parts[0] == 'tab' assert isinstance(ast.where, BinaryOperation) assert ast.where.op == '!=' assert isinstance(ast.group_by, list) assert isinstance(ast.group_by[0], Identifier) assert ast.group_by[0].parts[0] == 'column1' assert isinstance(ast.group_by[1], Identifier) assert ast.group_by[1].parts[0] == 'column2' assert str(ast).lower() == sql.lower()
def test_select_order_by(self, dialect): sql = f'SELECT column1 FROM tab ORDER BY column2' ast = parse_sql(sql, dialect=dialect) assert str(ast).lower() == sql.lower() assert len(ast.order_by) == 1 assert isinstance(ast.order_by[0], OrderBy) assert isinstance(ast.order_by[0].field, Identifier) assert ast.order_by[0].field.parts[0] == 'column2' assert ast.order_by[0].direction == 'default' sql = f'SELECT column1 FROM tab ORDER BY column2, column3 ASC, column4 DESC' ast = parse_sql(sql, dialect=dialect) assert str(ast).lower() == sql.lower() assert len(ast.order_by) == 3 assert isinstance(ast.order_by[0], OrderBy) assert isinstance(ast.order_by[0].field, Identifier) assert ast.order_by[0].field.parts[0] == 'column2' assert ast.order_by[0].direction == 'default' assert isinstance(ast.order_by[1], OrderBy) assert isinstance(ast.order_by[1].field, Identifier) assert ast.order_by[1].field.parts[0] == 'column3' assert ast.order_by[1].direction == 'ASC' assert isinstance(ast.order_by[2], OrderBy) assert isinstance(ast.order_by[2].field, Identifier) assert ast.order_by[2].field.parts[0] == 'column4' assert ast.order_by[2].direction == 'DESC'
def test_create_predictor_quotes(self): sql = """CREATE PREDICTOR xxx FROM `yyy` (SELECT * FROM zzz) AS x PREDICT sss """ ast = parse_sql(sql, dialect='mindsdb') expected_ast = CreatePredictor( name=Identifier('xxx'), integration_name=Identifier('yyy'), query_str="SELECT * FROM zzz", datasource_name=Identifier('x'), targets=[Identifier('sss')], ) assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) assert ast.to_tree() == expected_ast.to_tree() # or replace sql = """CREATE or REPLACE PREDICTOR xxx FROM `yyy` (SELECT * FROM zzz) AS x PREDICT sss """ ast = parse_sql(sql, dialect='mindsdb') expected_ast.is_replace = True assert to_single_line(str(ast)) == to_single_line(str(expected_ast)) assert ast.to_tree() == expected_ast.to_tree()
def test_drop_database(self, dialect): sql = "DROP DATABASE IF EXISTS dbname" ast = parse_sql(sql, dialect=dialect) expected_ast = DropDatabase(name=Identifier('dbname'), if_exists=True) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree() sql = "DROP DATABASE dbname" ast = parse_sql(sql, dialect=dialect) expected_ast = DropDatabase(name=Identifier('dbname'), if_exists=False) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree() # DROP SCHEMA is a synonym for DROP DATABASE. sql = "DROP SCHEMA dbname" ast = parse_sql(sql, dialect=dialect) expected_ast = DropDatabase(name=Identifier('dbname')) assert str(ast).lower() == 'DROP DATABASE dbname'.lower() assert ast.to_tree() == expected_ast.to_tree()
def test_select_limit_offset_raises_nonint(self, dialect): sql = f'SELECT column FROM tab OFFSET 3.0' with pytest.raises(ParsingException): ast = parse_sql(sql, dialect=dialect) sql = "SELECT column FROM tab LIMIT \'string\'" with pytest.raises(ParsingException): ast = parse_sql(sql, dialect=dialect)
def test_where_and_or_precedence(self, dialect): sql = "SELECT col1 FROM tab WHERE col1 AND col2 OR col3" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[Identifier.from_path_str('col1')], from_table=Identifier.from_path_str('tab'), where=BinaryOperation( op='or', args=( BinaryOperation( op='and', args=( Identifier.from_path_str('col1'), Identifier.from_path_str('col2'), )), Identifier.from_path_str('col3'), ))) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() sql = "SELECT col1 FROM tab WHERE col1 = 1 AND col2 = 1 OR col3 = 1" ast = parse_sql(sql, dialect=dialect) expected_ast = Select( targets=[Identifier.from_path_str('col1')], from_table=Identifier.from_path_str('tab'), where=BinaryOperation( op='or', args=( BinaryOperation( op='and', args=( BinaryOperation( op='=', args=( Identifier.from_path_str('col1'), Constant(1), )), BinaryOperation( op='=', args=( Identifier.from_path_str('col2'), Constant(1), )), )), BinaryOperation(op='=', args=( Identifier.from_path_str('col3'), Constant(1), )), ))) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_select_group_by_elaborate(self, dialect): query = """SELECT column1, column2, sum(column3) AS total FROM t1 GROUP BY column1, column2""" assert str(parse_sql(query)) == query assert str(parse_sql(query)) == str(Select(targets=[Identifier(parts=["column1"]), Identifier(parts=["column2"]), Function(op="sum", args=[Identifier(parts=["column3"])], alias=Identifier('total'))], from_table=Identifier(parts=['t1']), group_by=[Identifier(parts=["column1"]), Identifier(parts=["column2"])]))
def test_type_convert(self, dialect): sql = f"""SELECT CONVERT(column1, float)""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[TypeCast(type_name='float', arg=Identifier(parts=['column1']))]) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast) sql = f"""SELECT CONVERT((column1 + column2) USING float)""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[TypeCast(type_name='float', arg=BinaryOperation(op='+', parentheses=True, args=[ Identifier(parts=['column1']), Identifier(parts=['column2'])]))]) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_select_from_elaborate(self, dialect): query = """SELECT *, column1, column1 AS aliased, column1 + column2 FROM t1""" assert str(parse_sql(query)) == query assert str(parse_sql(query)) == str(Select(targets=[Star(), Identifier(parts=["column1"]), Identifier(parts=["column1"], alias=Identifier('aliased')), BinaryOperation(op="+", args=(Identifier(parts=['column1']), Identifier(parts=['column2'])) ) ], from_table=Identifier(parts=['t1'])))
def test_drop_predictor_table_syntax_ok(self, dialect): sql = "DROP TABLE mindsdb.tbl" ast = parse_sql(sql, dialect=dialect) expected_ast = DropTables(tables=[Identifier('mindsdb.tbl')]) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree() sql = "DROP TABLE if exists mindsdb.tbl" ast = parse_sql(sql, dialect=dialect) expected_ast = DropTables(tables=[Identifier('mindsdb.tbl')], if_exists=True) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_create_predictor_invalid_json(self): sql = """CREATE PREDICTOR pred FROM integration_name (select * FROM table) AS ds_name PREDICT f1 as f1_alias, f2 ORDER BY f_order_1 ASC, f_order_2, f_order_3 DESC GROUP BY f_group_1, f_group_2 WINDOW 100 HORIZON 7 USING 'not_really_json'""" with pytest.raises(ParsingException): parse_sql(sql, dialect='mindsdb')
def test_select_variable(self): sql = 'SELECT @version' ast = parse_sql(sql, dialect='mysql') expected_ast = Select(targets=[Variable('version')]) assert ast.to_tree() == expected_ast.to_tree() assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) sql = 'SELECT @@version' ast = parse_sql(sql, dialect='mysql') expected_ast = Select( targets=[Variable('version', is_system_var=True)]) assert ast.to_tree() == expected_ast.to_tree() assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast)
def test_select_from_predictor_get_columns(self): sql = f'SELECT GDP_per_capita_USD FROM hdi_predictor_external WHERE 1 = 0' query = parse_sql(sql, dialect='mindsdb') expected_query = Select( targets=[Identifier('GDP_per_capita_USD')], from_table=Identifier('hdi_predictor_external'), where=BinaryOperation(op="=", args=[Constant(1), Constant(0)])) assert query.to_tree() == expected_query.to_tree() expected_plan = QueryPlan( predictor_namespace='mindsdb', default_namespace='mindsdb', steps=[ GetPredictorColumns( namespace='mindsdb', predictor=Identifier('hdi_predictor_external')), ProjectStep(dataframe=Result(0), columns=[Identifier('GDP_per_capita_USD')]), ], ) plan = plan_query(query, predictor_namespace='mindsdb', default_namespace='mindsdb', predictor_metadata={'hdi_predictor_external': {}}) assert plan.steps == expected_plan.steps
def test_drop_predictor_if_exists(self): sql = "DROP PREDICTOR IF EXISTS mindsdb.pred" ast = parse_sql(sql, dialect='mindsdb') expected_ast = DropPredictor(name=Identifier('mindsdb.pred'), if_exists=True) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_drop_predictor_ok(self): sql = "DROP PREDICTOR mindsdb.pred" ast = parse_sql(sql, dialect='mindsdb') expected_ast = DropPredictor(name=Identifier('mindsdb.pred')) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_select_limit_offset(self, dialect): sql = f'SELECT column FROM tab LIMIT 5 OFFSET 3' ast = parse_sql(sql, dialect=dialect) assert str(ast).lower() == sql.lower() assert ast.limit == Constant(value=5) assert ast.offset == Constant(value=3)
def test_alter_table_keys(self, dialect): sql = "alter table some_table disable keys" ast = parse_sql(sql, dialect=dialect) expected_ast = AlterTable(target=Identifier('some_table'), arg='disable keys') assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast) sql = "alter table some_table enable keys" ast = parse_sql(sql, dialect=dialect) expected_ast = AlterTable(target=Identifier('some_table'), arg='enable keys') assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_rollback(self, dialect): sql = "rollback" ast = parse_sql(sql, dialect=dialect) expected_ast = RollbackTransaction() assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_start_transaction(self, dialect): sql = "start transaction" ast = parse_sql(sql, dialect=dialect) expected_ast = StartTransaction() assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_select_from_inner_join(self, dialect): sql = """SELECT * FROM t1 INNER JOIN t2 ON t1.x1 = t2.x2 and t1.x2 = t2.x2""" expected_ast = Select(targets=[Star()], from_table=Join(join_type=JoinType.INNER_JOIN, left=Identifier(parts=['t1']), right=Identifier(parts=['t2']), condition= BinaryOperation(op='and', args=[ BinaryOperation(op='=', args=( Identifier( parts=['t1','x1']), Identifier( parts=['t2','x2']))), BinaryOperation(op='=', args=( Identifier( parts=['t1','x2']), Identifier( parts=['t2','x2']))), ]) )) ast = parse_sql(sql, dialect=dialect) assert ast == expected_ast
def test_select_from_engines(self, dialect): sql = 'select * from engines' ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[Star()], from_table=Identifier.from_path_str('engines')) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_select_subquery_target(self, dialect): sql = f"""SELECT *, (SELECT 1) FROM t1""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[Star(), Select(targets=[Constant(1)], parentheses=True)], from_table=Identifier(parts=['t1'])) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast) sql = f"""SELECT *, (SELECT 1) AS ones FROM t1""" ast = parse_sql(sql, dialect=dialect) expected_ast = Select(targets=[Star(), Select(targets=[Constant(1)], alias=Identifier('ones'), parentheses=True)], from_table=Identifier(parts=['t1'])) assert str(ast).lower() == sql.lower() assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_common_like_double_where_from_in_modes(self, dialect): categories = [ 'COLUMNS', 'FIELDS', 'INDEX', 'INDEXES', 'KEYS', ] modes = [ ['EXTENDED'], ['FULL'], ['EXTENDED', 'FULL'], ] for cat in categories: for mode in modes: sql = f"SHOW {' '.join(mode)} {cat} from tab1 from db1 in tab2 in db2 like 'pattern' where a=1" ast = parse_sql(sql, dialect=dialect) expected_ast = Show(category=cat, like='pattern', from_table=Identifier('db1.tab1'), in_table=Identifier('db2.tab2'), modes=mode, where=BinaryOperation( op='=', args=[Identifier('a'), Constant(1)])) assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def test_commit(self, dialect): sql = "commit" ast = parse_sql(sql, dialect=dialect) expected_ast = CommitTransaction() assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def query_df(df, query): """ Perform simple query ('select' from one table, without subqueries and joins) on DataFrame. Args: df (pandas.DataFrame): data query (mindsdb_sql.parser.ast.Select | str): select query Returns: pandas.DataFrame """ query = parse_sql(str(query), dialect='mysql') if isinstance(query, Select) is False or isinstance( query.from_table, Identifier) is False: raise Exception( "Only 'SELECT from TABLE' statements supported for internal query") query.from_table.parts = ['df_table'] for identifier in query.targets: if isinstance(identifier, Identifier): identifier.parts = [identifier.parts[-1]] if isinstance(query.order_by, list): for orderby in query.order_by: if isinstance(orderby, OrderBy) and isinstance( orderby.field, Identifier): orderby.field.parts = [orderby.field.parts[-1]] _remove_table_name(query.where) # FIXME https://github.com/mindsdb/mindsdb_sql/issues/130 # we need way to dump suery in postgres dialect sql_query = str(query).replace('`', '') res = duckdb.query_df(df, 'df_table', sql_query) result_df = res.df() result_df = result_df.where(pd.notnull(result_df), None) return result_df
def test_explain(self, dialect): sql = "explain some_table" ast = parse_sql(sql, dialect=dialect) expected_ast = Explain(target=Identifier('some_table')) assert ast.to_tree() == expected_ast.to_tree() assert str(ast) == str(expected_ast)
def test_drop_datasource(self): sql = "DROP DATASOURCE dsname" ast = parse_sql(sql, dialect='mindsdb') expected_ast = DropDatasource(name=Identifier('dsname')) assert str(ast).lower() == sql.lower() assert str(ast) == str(expected_ast) assert ast.to_tree() == expected_ast.to_tree()
def df(self): view_interface = WithKWArgsWrapper(ViewController(), company_id=self.company_id) integration_controller = WithKWArgsWrapper(IntegrationController(), company_id=self.company_id) data_store = WithKWArgsWrapper(DataStore(), company_id=self.company_id) query = self.query if self.source_type == 'view_query': if isinstance(query, str): query = parse_sql(query, dialect='mysql') query_str = str(query) table = query.from_table.parts[-1] view_metadata = view_interface.get(name=table) integration = integration_controller.get_by_id( view_metadata['integration_id']) integration_name = integration['name'] dataset_name = data_store.get_vacant_name(table) data_store.save_datasource(dataset_name, integration_name, {'query': view_metadata['query']}) try: dataset_object = data_store.get_datasource_obj(dataset_name) data_df = dataset_object.df finally: data_store.delete_datasource(dataset_name) else: raise Exception(f'Unknown source_type: {self.source_type}') return data_df