def test_set_index_twice(sample_df): dt = DataTable(sample_df, index='id', time_index='signup_date') original_df = dt.df.copy() dt_index_twice = dt.set_index('id') assert 'index' in dt_index_twice['id'].semantic_tags assert dt_index_twice.index == 'id' assert dt_index_twice == dt pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt_index_twice.df)) dt_time_index_twice = dt.set_time_index('signup_date') assert 'time_index' in dt_time_index_twice['signup_date'].semantic_tags assert dt_time_index_twice.time_index == 'signup_date' assert dt_time_index_twice == dt pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt_time_index_twice.df)) dt.index = 'id' assert 'index' in dt['id'].semantic_tags assert dt.index == 'id' pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt.df)) dt.time_index = 'signup_date' assert 'time_index' in dt['signup_date'].semantic_tags assert dt.time_index == 'signup_date' pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt.df))
def test_datatable_init_with_numpy(sample_df_pandas): numpy_df = sample_df_pandas.to_numpy() dt = DataTable(numpy_df, index=0) assert set(dt.columns.keys()) == {i for i in range(len(numpy_df[0]))} assert dt.index == 0 assert dt[0].logical_type == Categorical assert dt[1].logical_type == NaturalLanguage assert dt[5].logical_type == Datetime np_ints = np.array([[1, 0], [2, 4], [3, 6], [4, 1]]) dt = DataTable(np_ints) assert dt[0].logical_type == Integer assert dt[1].logical_type == Integer dt = dt.set_index(0) assert dt.index == 0 dt = DataTable(np_ints, time_index=0, logical_types={ 0: 'Double', 1: Datetime }, semantic_tags={1: 'numeric_datetime'}) assert dt.time_index == 0 assert dt[0].logical_type == Double assert dt[0].semantic_tags == {'numeric', 'time_index'} assert dt[1].logical_type == Datetime assert dt[1].semantic_tags == {'numeric_datetime'}
def test_set_index(sample_df): # Test setting index with set_index() dt = DataTable(sample_df) new_dt = dt.set_index('id') assert new_dt is not dt assert new_dt.index == 'id' assert dt.index is None assert new_dt.columns['id'].semantic_tags == {'index'} non_index_cols = [ col for col in new_dt.columns.values() if col.name != 'id' ] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test changing index with set_index() new_dt2 = new_dt.set_index('full_name') assert new_dt.index == 'id' assert new_dt2.columns['full_name'].semantic_tags == {'index'} non_index_cols = [ col for col in new_dt2.columns.values() if col.name != 'full_name' ] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test setting index using setter dt = DataTable(sample_df) dt.index = 'id' assert dt.index == 'id' assert 'index' in dt.columns['id'].semantic_tags non_index_cols = [col for col in dt.columns.values() if col.name != 'id'] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test changing index with setter dt.index = 'full_name' assert 'index' in dt.columns['full_name'].semantic_tags non_index_cols = [ col for col in dt.columns.values() if col.name != 'full_name' ] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test changing index also changes underlying DataFrame - pandas only if isinstance(sample_df, pd.DataFrame): dt = DataTable(sample_df) dt.index = 'id' assert (dt.to_dataframe().index == [0, 1, 2, 3]).all() assert (dt._dataframe.index == [0, 1, 2, 3]).all() dt.index = 'full_name' assert ( dt.to_dataframe().index == dt.to_dataframe()['full_name']).all() assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all()
def test_underlying_index(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') unspecified_index = pd.RangeIndex specified_index = pd.Index dt = DataTable(sample_df.copy(), index='id') assert dt._dataframe.index.name is None assert (dt._dataframe.index == [0, 1, 2, 3]).all() assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index dt = DataTable(sample_df.copy()) dt = dt.set_index('full_name') assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index dt.index = 'id' assert (dt._dataframe.index == [0, 1, 2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index # test removing index removes the dataframe's index dt.index = None assert type(dt._dataframe.index) == unspecified_index assert type(dt.to_dataframe().index) == unspecified_index dt = DataTable(sample_df.copy(), index='made_index', make_index=True) assert (dt._dataframe.index == [0, 1, 2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == specified_index assert type(dt.to_dataframe().index) == specified_index dt_dropped = dt.drop('made_index') assert 'made_index' not in dt_dropped.columns assert 'made_index' not in dt_dropped._dataframe.columns assert type(dt_dropped._dataframe.index) == unspecified_index assert type(dt_dropped.to_dataframe().index) == unspecified_index