示例#1
0
def test_sql_two_inputs_params_success():
    params = {
        ExecuteSQLOperation.QUERY_PARAM: "select * where df2.id = 1;",
        ExecuteSQLOperation.NAMES_PARAM: "col1, col2, col3"
    }
    n_in = {'input data 1': 'input_1', 'input data 2': 'input_2'}
    n_out = {'output data': 'output_1'}
    instance = ExecuteSQLOperation(params,
                                   named_inputs=n_in,
                                   named_outputs=n_out)

    code = instance.generate_code()
    expected_code = dedent("""
    
        query = 'select * where df2.id = 1;'
        output_1 = sqldf(query, {'ds1': input_1, 'ds2': input_2})
        names = ['col1', 'col2', 'col3']

        if names is not None and len(names) > 0:
            old_names = output_1.columns
            if len(old_names) != len(names):
                raise ValueError('Invalid names. Number of attributes '
                                 'in result differs from names informed.')
            rename = dict(zip(old_names, names))
            output_1.rename(columns=rename, inplace=True)
        """)
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg + format_code_comparison(code, expected_code)
示例#2
0
def test_execute_sql_no_output_implies_no_code_success():
    arguments = {
        'parameters': {
            'query':
            'SELECT class, sepalwidth FROM ds1 WHERE sepalwidth > 3.2',
            'names': 'class,sepalwidth'
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {}
    }
    instance = ExecuteSQLOperation(**arguments)
    assert instance.generate_code() is None
示例#3
0
def test_execute_sql_names_param_not_informed_success():
    df1 = util.iris(['class', 'sepalwidth'], size=10)
    test_df = df1.copy()
    arguments = {
        'parameters': {
            'query': 'SELECT class, sepalwidth FROM ds1'
        },
        'named_inputs': {
            'input data 1': 'df1',
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = ExecuteSQLOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1})
    assert result['out'].equals(test_df)
示例#4
0
def test_wrong_query_parameter_failure():
    params = {
        ExecuteSQLOperation.QUERY_PARAM:
        "ALTER TABLE Customer DROP Birth_Date;"
    }
    with pytest.raises(ValueError):
        n_in = {'input data 1': 'input_1'}
        n_out = {'output data': 'output_1'}
        ExecuteSQLOperation(params, named_inputs=n_in, named_outputs=n_out)
示例#5
0
def test_execute_sql_column_not_found_fail():
    df1 = util.iris(['class', 'sepalwidth'], size=10)
    arguments = {
        'parameters': {
            'query': 'SELECT unknown FROM ds1'
        },
        'named_inputs': {
            'input data 1': 'df1',
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = ExecuteSQLOperation(**arguments)
    with pytest.raises(pandasql.PandaSQLException) as psql_err:
        util.execute(instance.generate_code(), {'df1': df1})
    assert "(sqlite3.OperationalError) no such column: unknown" in str(
        psql_err.value)
示例#6
0
def test_execute_sql_wrong_number_of_attributes_informed_fail():
    df1 = util.iris(['class', 'sepalwidth'], size=10)

    arguments = {
        'parameters': {
            'query': 'SELECT class, sepalwidth FROM ds1',
            'names': 'class'
        },
        'named_inputs': {
            'input data 1': 'df1'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = ExecuteSQLOperation(**arguments)
    with pytest.raises(ValueError) as val_err:
        util.execute(instance.generate_code(), {'df1': df1})
    assert "Invalid names. Number of attributes in" \
           " result differs from names informed." in str(val_err.value)
示例#7
0
def test_execute_sql_success():
    slice_size = 10
    df = [
        'df',
        util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'],
                  slice_size)
    ]

    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data': df[0],
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = ExecuteSQLOperation(**arguments)
    result = util.execute(instance.generate_code(), dict([df]))
    assert result['out'].equals(util.iris(size=slice_size))
示例#8
0
def test_execute_sql_success():
    df1 = util.iris(['class', 'sepalwidth'], size=10)
    test_df = df1.copy().sepalwidth > 3.2
    test_df = df1.copy()[test_df]
    test_df.reset_index(inplace=True, drop=True)

    arguments = {
        'parameters': {
            'query':
            'SELECT class, sepalwidth FROM ds1 WHERE sepalwidth > 3.2',
            'names': 'class,sepalwidth'
        },
        'named_inputs': {
            'input data 1': 'df1'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = ExecuteSQLOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1})
    assert result['out'].equals(test_df)
示例#9
0
def test_execute_sql_missing_parameters_fail():
    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data 1': 'df1'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    with pytest.raises(ValueError) as val_err:
        ExecuteSQLOperation(**arguments)
    assert "Required parameter query must be informed for task" in str(
        val_err.value)
示例#10
0
def test_execute_sql_multiple_dataframes_success():
    df1 = util.iris(['class'], size=10)
    df2 = util.iris(['sepalwidth'], size=10)
    test_df = df1.copy().join(df2.copy())
    test_df.sort_values(by='sepalwidth', inplace=True)
    test_df.drop_duplicates(inplace=True, ignore_index=True)

    arguments = {
        'parameters': {
            'query': 'SELECT DISTINCT class, sepalwidth FROM ds1,'
            ' ds2 ORDER BY class, sepalwidth',
            'names': 'class,sepalwidth'
        },
        'named_inputs': {
            'input data 1': 'df1',
            'input data 2': 'df2'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = ExecuteSQLOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df1': df1, 'df2': df2})
    assert result['out'].equals(test_df)
示例#11
0
def test_execute_sql_only_select_is_allowed_fail():
    arguments = {
        'parameters': {
            'query': 'UPDATE sepalwidth FROM ds1',
            'name': 'class,sepalwidth'
        },
        'named_inputs': {
            'input data 1': 'df1'
        },
        'named_outputs': {
            'output data': 'out'
        }
    }

    with pytest.raises(ValueError) as val_err:
        ExecuteSQLOperation(**arguments)
    assert "Invalid query. Only SELECT is allowed." in str(val_err.value)
示例#12
0
def test_sql_missing_parameter_failure():
    params = {}
    with pytest.raises(ValueError):
        n_in = {'input data 1': 'input_1'}
        n_out = {'output data': 'output_1'}
        ExecuteSQLOperation(params, named_inputs=n_in, named_outputs=n_out)