def test_sql_two_inputs_params_success(): params = { ExecuteSQLOperation.QUERY_PARAM: "select * where df2.id = 1;", ExecuteSQLOperation.NAMES_PARAM: "col1, col2, col3" } n_in = {'input data 1': 'input_1', 'input data 2': 'input_2'} n_out = {'output data': 'output_1'} instance = ExecuteSQLOperation(params, named_inputs=n_in, named_outputs=n_out) code = instance.generate_code() expected_code = dedent(""" query = 'select * where df2.id = 1;' output_1 = sqldf(query, {'ds1': input_1, 'ds2': input_2}) names = ['col1', 'col2', 'col3'] if names is not None and len(names) > 0: old_names = output_1.columns if len(old_names) != len(names): raise ValueError('Invalid names. Number of attributes ' 'in result differs from names informed.') rename = dict(zip(old_names, names)) output_1.rename(columns=rename, inplace=True) """) result, msg = compare_ast(ast.parse(code), ast.parse(expected_code)) assert result, msg + format_code_comparison(code, expected_code)
def test_execute_sql_no_output_implies_no_code_success(): arguments = { 'parameters': { 'query': 'SELECT class, sepalwidth FROM ds1 WHERE sepalwidth > 3.2', 'names': 'class,sepalwidth' }, 'named_inputs': { 'input data': 'df', }, 'named_outputs': {} } instance = ExecuteSQLOperation(**arguments) assert instance.generate_code() is None
def test_execute_sql_names_param_not_informed_success(): df1 = util.iris(['class', 'sepalwidth'], size=10) test_df = df1.copy() arguments = { 'parameters': { 'query': 'SELECT class, sepalwidth FROM ds1' }, 'named_inputs': { 'input data 1': 'df1', }, 'named_outputs': { 'output data': 'out' } } instance = ExecuteSQLOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': df1}) assert result['out'].equals(test_df)
def test_wrong_query_parameter_failure(): params = { ExecuteSQLOperation.QUERY_PARAM: "ALTER TABLE Customer DROP Birth_Date;" } with pytest.raises(ValueError): n_in = {'input data 1': 'input_1'} n_out = {'output data': 'output_1'} ExecuteSQLOperation(params, named_inputs=n_in, named_outputs=n_out)
def test_execute_sql_column_not_found_fail(): df1 = util.iris(['class', 'sepalwidth'], size=10) arguments = { 'parameters': { 'query': 'SELECT unknown FROM ds1' }, 'named_inputs': { 'input data 1': 'df1', }, 'named_outputs': { 'output data': 'out' } } instance = ExecuteSQLOperation(**arguments) with pytest.raises(pandasql.PandaSQLException) as psql_err: util.execute(instance.generate_code(), {'df1': df1}) assert "(sqlite3.OperationalError) no such column: unknown" in str( psql_err.value)
def test_execute_sql_wrong_number_of_attributes_informed_fail(): df1 = util.iris(['class', 'sepalwidth'], size=10) arguments = { 'parameters': { 'query': 'SELECT class, sepalwidth FROM ds1', 'names': 'class' }, 'named_inputs': { 'input data 1': 'df1' }, 'named_outputs': { 'output data': 'out' } } instance = ExecuteSQLOperation(**arguments) with pytest.raises(ValueError) as val_err: util.execute(instance.generate_code(), {'df1': df1}) assert "Invalid names. Number of attributes in" \ " result differs from names informed." in str(val_err.value)
def test_execute_sql_success(): slice_size = 10 df = [ 'df', util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'], slice_size) ] arguments = { 'parameters': {}, 'named_inputs': { 'input data': df[0], }, 'named_outputs': { 'output data': 'out' } } instance = ExecuteSQLOperation(**arguments) result = util.execute(instance.generate_code(), dict([df])) assert result['out'].equals(util.iris(size=slice_size))
def test_execute_sql_success(): df1 = util.iris(['class', 'sepalwidth'], size=10) test_df = df1.copy().sepalwidth > 3.2 test_df = df1.copy()[test_df] test_df.reset_index(inplace=True, drop=True) arguments = { 'parameters': { 'query': 'SELECT class, sepalwidth FROM ds1 WHERE sepalwidth > 3.2', 'names': 'class,sepalwidth' }, 'named_inputs': { 'input data 1': 'df1' }, 'named_outputs': { 'output data': 'out' } } instance = ExecuteSQLOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': df1}) assert result['out'].equals(test_df)
def test_execute_sql_missing_parameters_fail(): arguments = { 'parameters': {}, 'named_inputs': { 'input data 1': 'df1' }, 'named_outputs': { 'output data': 'out' } } with pytest.raises(ValueError) as val_err: ExecuteSQLOperation(**arguments) assert "Required parameter query must be informed for task" in str( val_err.value)
def test_execute_sql_multiple_dataframes_success(): df1 = util.iris(['class'], size=10) df2 = util.iris(['sepalwidth'], size=10) test_df = df1.copy().join(df2.copy()) test_df.sort_values(by='sepalwidth', inplace=True) test_df.drop_duplicates(inplace=True, ignore_index=True) arguments = { 'parameters': { 'query': 'SELECT DISTINCT class, sepalwidth FROM ds1,' ' ds2 ORDER BY class, sepalwidth', 'names': 'class,sepalwidth' }, 'named_inputs': { 'input data 1': 'df1', 'input data 2': 'df2' }, 'named_outputs': { 'output data': 'out' } } instance = ExecuteSQLOperation(**arguments) result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2}) assert result['out'].equals(test_df)
def test_execute_sql_only_select_is_allowed_fail(): arguments = { 'parameters': { 'query': 'UPDATE sepalwidth FROM ds1', 'name': 'class,sepalwidth' }, 'named_inputs': { 'input data 1': 'df1' }, 'named_outputs': { 'output data': 'out' } } with pytest.raises(ValueError) as val_err: ExecuteSQLOperation(**arguments) assert "Invalid query. Only SELECT is allowed." in str(val_err.value)
def test_sql_missing_parameter_failure(): params = {} with pytest.raises(ValueError): n_in = {'input data 1': 'input_1'} n_out = {'output data': 'output_1'} ExecuteSQLOperation(params, named_inputs=n_in, named_outputs=n_out)