def test_set_index_twice(sample_df):
    dt = DataTable(sample_df, index='id', time_index='signup_date')
    original_df = dt.df.copy()

    dt_index_twice = dt.set_index('id')
    assert 'index' in dt_index_twice['id'].semantic_tags
    assert dt_index_twice.index == 'id'
    assert dt_index_twice == dt
    pd.testing.assert_frame_equal(to_pandas(original_df),
                                  to_pandas(dt_index_twice.df))

    dt_time_index_twice = dt.set_time_index('signup_date')
    assert 'time_index' in dt_time_index_twice['signup_date'].semantic_tags
    assert dt_time_index_twice.time_index == 'signup_date'
    assert dt_time_index_twice == dt
    pd.testing.assert_frame_equal(to_pandas(original_df),
                                  to_pandas(dt_time_index_twice.df))

    dt.index = 'id'
    assert 'index' in dt['id'].semantic_tags
    assert dt.index == 'id'
    pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt.df))

    dt.time_index = 'signup_date'
    assert 'time_index' in dt['signup_date'].semantic_tags
    assert dt.time_index == 'signup_date'
    pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt.df))
示例#2
0
def test_datatable_init_with_numpy(sample_df_pandas):
    numpy_df = sample_df_pandas.to_numpy()

    dt = DataTable(numpy_df, index=0)
    assert set(dt.columns.keys()) == {i for i in range(len(numpy_df[0]))}
    assert dt.index == 0
    assert dt[0].logical_type == Categorical
    assert dt[1].logical_type == NaturalLanguage
    assert dt[5].logical_type == Datetime

    np_ints = np.array([[1, 0], [2, 4], [3, 6], [4, 1]])
    dt = DataTable(np_ints)
    assert dt[0].logical_type == Integer
    assert dt[1].logical_type == Integer
    dt = dt.set_index(0)
    assert dt.index == 0

    dt = DataTable(np_ints,
                   time_index=0,
                   logical_types={
                       0: 'Double',
                       1: Datetime
                   },
                   semantic_tags={1: 'numeric_datetime'})
    assert dt.time_index == 0
    assert dt[0].logical_type == Double
    assert dt[0].semantic_tags == {'numeric', 'time_index'}
    assert dt[1].logical_type == Datetime
    assert dt[1].semantic_tags == {'numeric_datetime'}
def test_set_index(sample_df):
    # Test setting index with set_index()
    dt = DataTable(sample_df)
    new_dt = dt.set_index('id')
    assert new_dt is not dt
    assert new_dt.index == 'id'
    assert dt.index is None
    assert new_dt.columns['id'].semantic_tags == {'index'}
    non_index_cols = [
        col for col in new_dt.columns.values() if col.name != 'id'
    ]
    assert all(['index' not in col.semantic_tags for col in non_index_cols])
    # Test changing index with set_index()
    new_dt2 = new_dt.set_index('full_name')
    assert new_dt.index == 'id'
    assert new_dt2.columns['full_name'].semantic_tags == {'index'}
    non_index_cols = [
        col for col in new_dt2.columns.values() if col.name != 'full_name'
    ]
    assert all(['index' not in col.semantic_tags for col in non_index_cols])

    # Test setting index using setter
    dt = DataTable(sample_df)
    dt.index = 'id'
    assert dt.index == 'id'
    assert 'index' in dt.columns['id'].semantic_tags
    non_index_cols = [col for col in dt.columns.values() if col.name != 'id']
    assert all(['index' not in col.semantic_tags for col in non_index_cols])
    # Test changing index with setter
    dt.index = 'full_name'
    assert 'index' in dt.columns['full_name'].semantic_tags
    non_index_cols = [
        col for col in dt.columns.values() if col.name != 'full_name'
    ]
    assert all(['index' not in col.semantic_tags for col in non_index_cols])

    # Test changing index also changes underlying DataFrame - pandas only
    if isinstance(sample_df, pd.DataFrame):
        dt = DataTable(sample_df)
        dt.index = 'id'
        assert (dt.to_dataframe().index == [0, 1, 2, 3]).all()
        assert (dt._dataframe.index == [0, 1, 2, 3]).all()
        dt.index = 'full_name'
        assert (
            dt.to_dataframe().index == dt.to_dataframe()['full_name']).all()
        assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all()
def test_underlying_index(sample_df):
    if dd and isinstance(sample_df, dd.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Dask input')
    if ks and isinstance(sample_df, ks.DataFrame):
        pytest.xfail(
            'Setting underlying index is not supported with Koalas input')

    unspecified_index = pd.RangeIndex
    specified_index = pd.Index

    dt = DataTable(sample_df.copy(), index='id')
    assert dt._dataframe.index.name is None
    assert (dt._dataframe.index == [0, 1, 2, 3]).all()
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    dt = DataTable(sample_df.copy())
    dt = dt.set_index('full_name')
    assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    dt.index = 'id'
    assert (dt._dataframe.index == [0, 1, 2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    # test removing index removes the dataframe's index
    dt.index = None
    assert type(dt._dataframe.index) == unspecified_index
    assert type(dt.to_dataframe().index) == unspecified_index

    dt = DataTable(sample_df.copy(), index='made_index', make_index=True)
    assert (dt._dataframe.index == [0, 1, 2, 3]).all()
    assert dt._dataframe.index.name is None
    assert type(dt._dataframe.index) == specified_index
    assert type(dt.to_dataframe().index) == specified_index

    dt_dropped = dt.drop('made_index')
    assert 'made_index' not in dt_dropped.columns
    assert 'made_index' not in dt_dropped._dataframe.columns
    assert type(dt_dropped._dataframe.index) == unspecified_index
    assert type(dt_dropped.to_dataframe().index) == unspecified_index