def test_set_time_index(sample_df): # Test setting time index with set_time_index() dt = DataTable(sample_df) new_dt = dt.set_time_index('signup_date') assert new_dt is not dt assert dt.time_index is None assert new_dt.time_index == 'signup_date' assert 'time_index' in new_dt.columns['signup_date'].semantic_tags non_index_cols = [ col for col in new_dt.columns.values() if col.name != 'signup_date' ] assert all( ['time_index' not in col.semantic_tags for col in non_index_cols]) # Test changing time index with set_time_index() sample_df['transaction_date'] = pd.to_datetime('2015-09-02') dt = DataTable(sample_df) new_dt = dt.set_time_index('signup_date') assert new_dt.time_index == 'signup_date' new_dt2 = new_dt.set_time_index('transaction_date') assert 'time_index' in new_dt2.columns['transaction_date'].semantic_tags non_index_cols = [ col for col in new_dt2.columns.values() if col.name != 'transaction_date' ] assert all( ['time_index' not in col.semantic_tags for col in non_index_cols]) # Test setting index using setter dt = DataTable(sample_df) assert dt.time_index is None dt.time_index = 'signup_date' assert dt.time_index == 'signup_date' assert 'time_index' in dt.columns['signup_date'].semantic_tags non_index_cols = [ col for col in dt.columns.values() if col.name != 'signup_date' ] assert all( ['time_index' not in col.semantic_tags for col in non_index_cols]) # Test changing time index with setter sample_df['transaction_date'] = pd.to_datetime('2015-09-02') dt = DataTable(sample_df) dt.time_index = 'signup_date' assert dt.time_index == 'signup_date' dt.time_index = 'transaction_date' assert 'time_index' in dt.columns['transaction_date'].semantic_tags non_index_cols = [ col for col in dt.columns.values() if col.name != 'transaction_date' ] assert all( ['time_index' not in col.semantic_tags for col in non_index_cols])
def test_set_index_twice(sample_df): dt = DataTable(sample_df, index='id', time_index='signup_date') original_df = dt.df.copy() dt_index_twice = dt.set_index('id') assert 'index' in dt_index_twice['id'].semantic_tags assert dt_index_twice.index == 'id' assert dt_index_twice == dt pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt_index_twice.df)) dt_time_index_twice = dt.set_time_index('signup_date') assert 'time_index' in dt_time_index_twice['signup_date'].semantic_tags assert dt_time_index_twice.time_index == 'signup_date' assert dt_time_index_twice == dt pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt_time_index_twice.df)) dt.index = 'id' assert 'index' in dt['id'].semantic_tags assert dt.index == 'id' pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt.df)) dt.time_index = 'signup_date' assert 'time_index' in dt['signup_date'].semantic_tags assert dt.time_index == 'signup_date' pd.testing.assert_frame_equal(to_pandas(original_df), to_pandas(dt.df))
def test_numeric_index_strings(time_index_df): error_msg = 'Time index column must contain datetime or numeric values' with pytest.raises(TypeError, match=error_msg): DataTable(time_index_df, time_index='strs') error_msg = 'Time index column must contain datetime or numeric values' with pytest.raises(TypeError, match=error_msg): DataTable(time_index_df, time_index='ints', logical_types={'ints': 'Categorical'}) error_msg = 'Time index column must contain datetime or numeric values' with pytest.raises(TypeError, match=error_msg): DataTable(time_index_df, time_index='letters', logical_types={'strs': 'Integer'}) dt = DataTable(time_index_df, time_index='strs', logical_types={'strs': 'Double'}) date_col = dt['strs'] assert dt.time_index == 'strs' assert date_col.logical_type == Double assert date_col.semantic_tags == {'time_index', 'numeric'} dt = DataTable(time_index_df, logical_types={'strs': 'Double'}) dt = dt.set_time_index('strs') date_col = dt['strs'] assert dt.time_index == 'strs' assert date_col.logical_type == Double assert date_col.semantic_tags == {'time_index', 'numeric'}
def test_numeric_time_index_dtypes(numeric_time_index_df): dt = DataTable(numeric_time_index_df, time_index='ints') date_col = dt['ints'] assert dt.time_index == 'ints' assert date_col.logical_type == Integer assert date_col.semantic_tags == {'time_index', 'numeric'} dt = dt.set_time_index('floats') date_col = dt['floats'] assert dt.time_index == 'floats' assert date_col.logical_type == Double assert date_col.semantic_tags == {'time_index', 'numeric'} dt = dt.set_time_index('with_null') date_col = dt['with_null'] assert dt.time_index == 'with_null' if ks and isinstance(numeric_time_index_df, ks.DataFrame): ltype = Double else: ltype = Integer assert date_col.logical_type == ltype assert date_col.semantic_tags == {'time_index', 'numeric'}