Пример #1
0
def test_sample_or_partition_seed_success():
    """
    seeds 4294967296 or higher (integer limit) will be set to 0
    seeds lower than 0 will be set to 0
    """
    df = util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'],
                   size=10)
    test_df = df.copy()

    arguments = {
        'parameters': {
            'type': 'value',
            'seed': 4294967296,
            'value': 10
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'sampled data': 'out'
        }
    }
    instance = SampleOrPartitionOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df': df})

    test_out = test_df.sample(n=10, random_state=0)

    assert result['out'].equals(test_out)
Пример #2
0
def test_sample_or_partition_fraction_percentage_success():
    params = {'fraction': 45}
    n_in = {'input data': 'input_1'}
    n_out = {'sampled data': 'output_1'}
    instance = SampleOrPartitionOperation(params,
                                          named_inputs=n_in,
                                          named_outputs=n_out)
    code = instance.generate_code()
    expected_code = "output_1 = input_1.sample(frac={}, random_state={})"\
        .format(params['fraction'] * 0.01, 'None')
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg
Пример #3
0
def test_sample_or_partition_minimal_params_success():
    params = {'fraction': '3', 'seed': '0'}
    n_in = {'input data': 'input_1'}
    n_out = {'sampled data': 'output_1'}
    instance = SampleOrPartitionOperation(params,
                                          named_inputs=n_in,
                                          named_outputs=n_out)
    code = instance.generate_code()
    expected_code = "output_1 = input_1.sample(frac={}, random_state={})"\
        .format('0.03', params['seed'])
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg
Пример #4
0
def test_sample_or_partition_no_output_implies_no_code_success():
    arguments = {
        'parameters': {
            'type': 'percent',
            'fraction': 60
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {}
    }
    instance = SampleOrPartitionOperation(**arguments)
    assert instance.generate_code() is None
Пример #5
0
def test_sample_or_partition_type_head_success():
    params = {
        'value': 365,
        'seed': 0,
        'type': SampleOrPartitionOperation.TYPE_HEAD
    }
    n_in = {'input data': 'input_1'}
    n_out = {'sampled data': 'output_1'}
    instance = SampleOrPartitionOperation(params,
                                          named_inputs=n_in,
                                          named_outputs=n_out)
    code = instance.generate_code()
    expected_code = "output_1 = input_1.head({})".format(params['value'])
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))

    assert result, msg
Пример #6
0
def test_sample_or_partition_type_value_success():
    params = {
        'value': '400',
        'seed': '0',
        'type': SampleOrPartitionOperation.TYPE_VALUE
    }
    n_in = {'input data': 'input_1'}
    n_out = {'sampled data': 'output_1'}
    instance = SampleOrPartitionOperation(params,
                                          named_inputs=n_in,
                                          named_outputs=n_out)
    code = instance.generate_code()
    expected_code = "output_1 = input_1.sample(n={}, random_state=0)"\
        .format(params['value'])
    result, msg = compare_ast(ast.parse(code), ast.parse(expected_code))
    assert result, msg
Пример #7
0
def test_sample_or_partition_fraction_missing_failure():
    params = {'seed': '0'}
    with pytest.raises(ValueError):
        n_in = {'input data': 'input_1'}
        n_out = {'output data': 'output_1'}
        SampleOrPartitionOperation(params,
                                   named_inputs=n_in,
                                   named_outputs=n_out)
Пример #8
0
def test_sample_or_partition_head_success():
    df = util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'],
                   size=10)
    test_df = df.copy()
    arguments = {
        'parameters': {
            'type': 'head',
            'value': 2
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'sampled data': 'out'
        }
    }
    instance = SampleOrPartitionOperation(**arguments)
    result = util.execute(instance.generate_code(), {'df': df})
    assert len(result['out']) == 2
    assert result['out'].equals(test_df.iloc[:2, :])
Пример #9
0
def test_sample_or_partition_success():
    slice_size = 10
    df = [
        'df',
        util.iris(['sepallength', 'sepalwidth', 'petalwidth', 'petallength'],
                  slice_size)
    ]

    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data': df[0],
        },
        'named_outputs': {
            'output data': 'out'
        }
    }
    instance = SampleOrPartitionOperation(**arguments)
    result = util.execute(instance.generate_code(), dict([df]))
    assert result['out'].equals(util.iris(size=slice_size))
Пример #10
0
def test_sample_or_partition_invalid_value_failure():
    params = {
        'value': -365,
        'seed': '0',
        'type': SampleOrPartitionOperation.TYPE_HEAD
    }

    with pytest.raises(ValueError):
        n_in = {'input data': 'input_1'}
        n_out = {'output data': 'output_1'}
        SampleOrPartitionOperation(params,
                                   named_inputs=n_in,
                                   named_outputs=n_out)
Пример #11
0
def test_sample_or_partition_missing_parameters_fail():
    arguments = {
        'parameters': {},
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'sampled data': 'out'
        }
    }
    with pytest.raises(ValueError) as val_err:
        SampleOrPartitionOperation(**arguments)
    assert "Parameter 'fraction' must be 0<=x<=1 if is using the current type of" \
           " sampling in task" in str(val_err)
Пример #12
0
def test_sample_or_partition_invalid_value_param_fail():
    arguments = {
        'parameters': {
            'type': 'head',
            'value': -1
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'sampled data': 'out'
        }
    }
    with pytest.raises(ValueError) as val_err:
        SampleOrPartitionOperation(**arguments)
    assert "Parameter 'value' must be [x>=0] if is using the current type of" \
           " sampling in task" in str(val_err.value)
Пример #13
0
def test_sample_or_partition_invalid_fraction_param_fail():
    arguments = {
        'parameters': {
            'type': 'percent',
            'fraction': 110
        },
        'named_inputs': {
            'input data': 'df',
        },
        'named_outputs': {
            'sampled data': 'out'
        }
    }
    with pytest.raises(ValueError) as val_err:
        SampleOrPartitionOperation(**arguments)
    assert "Parameter 'fraction' must be 0<=x<=1 if is using the current type" \
           " of sampling in task" in str(val_err.value)