Пример #1
0
def test_options_context():
    # Straight test
    set_option('modify_input_data', False)
    assert not get_option('modify_input_data')
    with options(modify_input_data=True):
        assert get_option('modify_input_data')
    assert not get_option('modify_input_data')

    # With some data
    df = pd.DataFrame({'x': [0, 1, 2, 3]})

    df2 = df >> define(y='2*x')
    assert not df.equals(df2)

    with options(modify_input_data=True):
        df3 = df >> define(z='3*x')
    assert df.equals(df3)
    assert df is df3

    df4 = df >> define(w='4*x')
    assert not df.equals(df4)

    # That the options context manager should not muffle
    # an exception.
    with pytest.raises(ValueError):
        with options(modify_input_data=True):
            raise ValueError()

    # The above exception should not leave a modified option
    assert not get_option('modify_input_data')

    with pytest.raises(ValueError):
        assert not get_option('time_travel')
Пример #2
0
def test_data_mutability():
    # These tests affirm that we know the consequences of the verbs.
    # A test in the Mutable section should not fail without a change
    # in implementation. That change should be triggered when Pandas
    # implements a consistent copy-on-write policy.
    #
    # When a test in the mutable section fails, it is bad news. The
    # should be no memory usage gains by reusing the original data,
    # except for the case of `rename`.
    df = pd.DataFrame({'x': [0, 1, 2, 3, 4, 5], 'y': [0, 0, 1, 1, 2, 3]})

    # Default to not mutable
    df >> define(z='x**2')
    assert 'z' not in df

    df >> group_by(z='x**2')
    assert 'z' not in df

    arr = df >> pull('x')
    arr[0] = 99
    assert df.loc[0, 'x'] != 99

    df2 = df >> slice_rows(3)
    df2.loc[0, 'x'] = 999
    assert df.loc[0, 'x'] != 999

    set_option('modify_input_data', True)

    df2 = df.copy()
    df2 >> define(z='x**2')
    assert 'z' in df2

    df2 = df.copy()
    df2 >> group_by(z='x**2')
    assert 'z' in df2

    df2 = df.copy()
    arr = df2 >> pull('x')
    arr[0] = 99
    assert df2.loc[0, 'x'] == 99

    # Not mutable
    df2 = df.copy()
    df2 >> create(z='x**2')
    assert 'z' not in df2

    df2 >> sample_n(3) >> define(z='x**2')
    assert 'z' not in df2

    df2 >> sample_frac(.5) >> define(z='x**2')
    assert 'z' not in df2

    df2 >> select('x') >> define(z='x**2')
    assert 'z' not in df2

    df2 >> select('x', 'y') >> define(z='x**2')
    assert 'z' not in df2

    # dataframe.rename has copy-on-write (if copy=False) that affects
    # only the new frame. This creates possibility for "action at a
    # distance" effects on the new frame when the original is modified
    result = df2 >> rename(x='z')
    df2['y'] = 3
    result['x'] = 4
    assert 'z' not in df2
    assert df2.loc[0, 'y'] != 4
    assert result.loc[0, 'x'] != 3
    assert result is df2

    df2 >> arrange('x') >> define(z='x**2')
    assert 'z' not in df2

    df2 >> query('x%2') >> define(z='x**2')
    assert 'z' not in df2

    df2 >> group_indices(z='x%2')
    assert 'z' not in df2

    set_option('modify_input_data', False)