def test_datatable_drop_indices(sample_df):
    dt = DataTable(sample_df, index='id', time_index='signup_date')
    assert dt.index == 'id'
    assert dt.time_index == 'signup_date'

    dropped_index_dt = dt.drop('id')
    assert 'id' not in dropped_index_dt.columns
    assert dropped_index_dt.index is None
    assert dropped_index_dt.time_index == 'signup_date'

    dropped_time_index_dt = dt.drop(['signup_date'])
    assert 'signup_date' not in dropped_time_index_dt.columns
    assert dropped_time_index_dt.time_index is None
    assert dropped_time_index_dt.index == 'id'
def test_underlying_index(sample_df):
    if dd and isinstance(sample_df, dd.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Dask input')
    if ks and isinstance(sample_df, ks.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Koalas input')

    unspecified_index = pd.RangeIndex
    specified_index = pd.Index

    dt = DataTable(sample_df.copy(), index='id')
    assert dt._dataframe.index.name is None
    assert (dt._dataframe.index == [0, 1, 2, 3]).all()
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    dt = DataTable(sample_df.copy())
    dt = dt.set_index('full_name')
    assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    dt.index = 'id'
    assert (dt._dataframe.index == [0, 1, 2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    # test removing index removes the dataframe's index
    dt.index = None
    assert type(dt._dataframe.index) == unspecified_index
    assert type(dt.to_dataframe().index) == unspecified_index

    dt = DataTable(sample_df.copy(), index='made_index', make_index=True)
    assert (dt._dataframe.index == [0, 1, 2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    dt_dropped = dt.drop('made_index')
    assert 'made_index' not in dt_dropped.columns
    assert 'made_index' not in dt_dropped._dataframe.columns
    assert type(dt_dropped._dataframe.index) == unspecified_index
    assert type(dt_dropped.to_dataframe().index) == unspecified_index
Пример #3
0
def test_underlying_index_on_update(sample_df):
    if dd and isinstance(sample_df, dd.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Dask input')
    if ks and isinstance(sample_df, ks.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Koalas input')

    dt = DataTable(sample_df.copy(), index='id')

    dt.update_dataframe(sample_df.tail(2))
    assert (dt._dataframe.index == [2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == pd.Int64Index
    assert type(dt.to_dataframe().index) == pd.Int64Index

    actual = dt.iloc[[0, 1]]
    assert type(actual._dataframe.index) == pd.Index
    assert type(actual.to_dataframe().index) == pd.Index

    actual = dt.select(dt.index)
    assert type(actual._dataframe.index) == pd.Int64Index
    assert type(actual.to_dataframe().index) == pd.Int64Index

    actual = dt[['age']]
    assert type(actual._dataframe.index) == pd.Int64Index
    assert type(actual.to_dataframe().index) == pd.Int64Index

    actual = dt.drop(dt.index)
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    actual = dt.reset_semantic_tags(retain_index_tags=False)
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    actual = dt.set_types(retain_index_tags=False,
                          semantic_tags={'id': 'numeric'})
    assert type(actual._dataframe.index) == pd.RangeIndex
    assert type(actual.to_dataframe().index) == pd.RangeIndex

    dt.pop(dt.index)
    assert type(dt._dataframe.index) == pd.RangeIndex
    assert type(dt.to_dataframe().index) == pd.RangeIndex
Пример #4
0
def test_datatable_drop_errors(sample_df):
    dt = DataTable(sample_df)

    error = re.escape("['not_present'] not found in DataTable")
    with pytest.raises(ValueError, match=error):
        dt.drop('not_present')

    with pytest.raises(ValueError, match=error):
        dt.drop(['age', 'not_present'])

    error = re.escape("['not_present1', 4] not found in DataTable")
    with pytest.raises(ValueError, match=error):
        dt.drop(['not_present1', 4])