Пример #1
0
def test_invalid_case_must_raise_error(data):
    """
    Error on invalid case
    """
    with pytest.raises(errors.YeastValidationError):
        step = steps.CleanColumnNamesStep('camel')
        step.prepare(data).bake(data)
Пример #2
0
def test_snake_case_with_whitespaces_before_and_after(data):
    """
    Test cleaning with whitespaces before and after the name
    """
    data.columns = ['   series_Name    ', '  CreationYear', 'Total Seasons  ']
    step = steps.CleanColumnNamesStep('snake')
    baked_df = step.prepare(data).bake(data)

    assert 'series_name' in baked_df.columns
    assert 'creation_year' in baked_df.columns
    assert 'total_seasons' in baked_df.columns
Пример #3
0
def test_upper_camel_case_cleaning(data):
    """
    Test the upper camel case transformation
    """
    step = steps.CleanColumnNamesStep('upper_camel')
    baked_df = step.prepare(data).bake(data)

    assert 'SeriesName' in baked_df.columns
    assert 'CreationYear' in baked_df.columns
    assert 'TotalSeasons' in baked_df.columns

    assert 'series_Name' not in baked_df.columns
    assert 'Total Seasons' not in baked_df.columns
Пример #4
0
def test_snake_case_cleaning(data):
    """
    Test the snake case transformation
    """
    step = steps.CleanColumnNamesStep('snake')
    baked_df = step.prepare(data).bake(data)

    assert 'series_name' in baked_df.columns
    assert 'creation_year' in baked_df.columns
    assert 'total_seasons' in baked_df.columns

    assert 'series_Name' not in baked_df.columns
    assert 'CreationYear' not in baked_df.columns
    assert 'Total Seasons' not in baked_df.columns
Пример #5
0
def test_recipe_workflow(raw_data):
    """
    Secuential execution of the recipe
    """
    recipe = Recipe([
        steps.CleanColumnNamesStep('snake'),
        steps.SelectColumnsStep(['creation_year', 'total_seasons'])
    ])
    baked_data = recipe.prepare(raw_data).bake(raw_data)
    assert 'creation_year' in baked_data.columns
    assert 'total_seasons' in baked_data.columns
    assert 'series_name' not in baked_data.columns
    assert 'series_Name' not in baked_data.columns
    assert 'CreationYear' not in baked_data.columns
    assert 'Total Seasons' not in baked_data.columns
Пример #6
0
def test_skip_on_other_set_workflow(raw_data):
    """
    Secuential execution of the recipe but one step (FilterStep) must be skipped on new data
    """
    recipe = Recipe([
        steps.CleanColumnNamesStep('snake'),
        steps.SelectColumnsStep(['creation_year', 'total_seasons']),
        steps.FilterStep('creation_year in [2017, 2020]', role='train')
    ])
    recipe = recipe.prepare(raw_data)

    train = recipe.bake(raw_data)
    assert train.shape == (2, 2)  # Filter Step was applied

    test = recipe.bake(raw_data, role='test')
    assert test.shape == (6, 2)  # Filter Step was applied
Пример #7
0
def test_custom_roles_workflow(raw_data):
    """
    Different steps with different roles should be executed differently
    """
    recipe = Recipe([
        steps.CleanColumnNamesStep('snake'),
        steps.SelectColumnsStep(['creation_year', 'total_seasons']),
        steps.FilterStep('creation_year == 2020', role='train'),
        steps.SelectColumnsStep(['creation_year'], role='train'),
        steps.SelectColumnsStep(['total_seasons'], role='test'),
    ])
    recipe = recipe.prepare(raw_data)

    train = recipe.bake(raw_data, role='train')
    assert train.shape == (1, 1)  # Filter Step was applied
    assert 'creation_year' in train.columns  # Only creation_year in train role

    test = recipe.bake(raw_data, role='test')
    assert test.shape == (6, 1)  # Filter Step was applied
    assert 'total_seasons' in test.columns  # Only total_seasons in test role