def test_BaseDataOps_invoke(self): """ensure BaseDataOps.from_object invokes __init__ of parent class""" df_test = make_dirty_numeric_dataframe(keycol) df_read = BaseDataOps.from_object(df_test).df self.assertEqual( pd.testing.assert_frame_equal(df_test, df_read), None, )
def test_to_numeric_coerce(self): """ensure to_numeric returns only numerics with coerce default""" Conv = self.create_ColumnConversions_class( make_dirty_numeric_dataframe()) Conv.to_numeric(keycol) assert False not in [ isinstance(val, np.float64) for val in Conv.df[keycol].values ]
def test_substitute_chars(self): """ensure substitute_chars strips non-match characters from column""" Conv = self.create_ColumnConversions_class( make_dirty_numeric_dataframe()) Conv.substitute_chars(keycol, '[^0-9]', '') assert False not in [ isinstance(val.item(), int) for val in Conv.df[keycol].dropna().astype(int).values ]
def test_check_nonnumeric(self): """ensure check_numeric returns value counts for all errors""" Conv = self.create_ColumnConversions_class( make_dirty_numeric_dataframe()) value_count_series = Conv.check_nonnumeric(keycol) value_test = np.array(numeric_dirt_list).astype(str) self.assertCountEqual( value_count_series.index.values.astype(str), value_test, )
def test_report_values(self): """ensure report_values reports all values and returns series""" df = make_dirty_numeric_dataframe() Conv = self.create_ColumnConversions_class(df) value_count_series = Conv.report_values(keycol) value_test = df[keycol].unique().astype(str) self.assertCountEqual( value_count_series.index.values.astype(str), value_test, ) self.assertIsInstance(value_count_series, pd.Series)
def test_to_numeric_coerce_false(self): """ensure to_numeric returns non-numeric values with coerce=False""" Conv = self.create_ColumnConversions_class( make_dirty_numeric_dataframe()) Conv.to_numeric(keycol, coerce=False) value_count_series = Conv.check_nonnumeric(keycol) value_test = np.array(numeric_dirt_list).astype(str) self.assertCountEqual( value_count_series.index.values.astype(str), value_test, ) self.assertIsInstance(Conv.df[keycol].values[0], int)
def test_map_values_exhaustive(self): """ensure map_values accurately maps values""" df = make_dirty_numeric_dataframe() Conv = self.create_ColumnConversions_class(df) map_keys = list(df[keycol].unique())[:-1] keys_len = len(map_keys) map_vals = list(range(keys_len)) map_dict = dict(zip(map_keys, map_vals)) Conv.map_values(keycol, map_dict, exhaustive=True) value_test = list(Conv.df[keycol].unique()) self.assertCountEqual(map_vals, value_test[:-1]) self.assertTrue(np.isnan(value_test[-1]))
def test_inplace_return_series_inplace(self): """ensure inplace_return_series makes inplace changes""" df = make_dirty_numeric_dataframe() series = make_dirty_datetime_dataframe()[keycol] series_test = inplace_return_series( df, keycol, series, inplace=True, return_series=False, ) self.assertIsNone(series_test) self.assertSequenceEqual(df[keycol].values.tolist(), series.values.tolist())
def test_map_column_names_inplace(self): """ensure map_column_names accurately maps names inplace""" df = make_dirty_numeric_dataframe() Conv = self.create_ColumnConversions_class(df) map_keys = list(Conv.df) keys_len = len(map_keys) map_vals = list(range(keys_len)) map_dict = dict(zip(map_keys, map_vals)) out = Conv.map_column_names(map_dict=map_dict) value_test = list(Conv.df) self.assertCountEqual( map_vals, value_test, ) self.assertIsNone(out)
def test_inplace_return_series_return(self): """ensure inplace_return_series returns series""" df = make_dirty_numeric_dataframe() series_original = df[keycol] series = make_dirty_datetime_dataframe()[keycol] series_test = inplace_return_series( df, keycol, series, inplace=False, return_series=True, ) self.assertSequenceEqual(series.values.tolist(), series_test.values.tolist()) self.assertSequenceEqual(df[keycol].values.tolist(), series_original.values.tolist())
def test_inplace_return_series_inplace_target_col(self): """ensure inplace_return_series makes inplace changes to target_col""" df = make_dirty_numeric_dataframe() series_original = df[keycol] series_target = make_dirty_datetime_dataframe()[keycol] target_column = 'test_target' series_test = inplace_return_series(df, keycol, series_target, inplace=True, return_series=False, target_column=target_column) self.assertIsNone(series_test) self.assertSequenceEqual(df[keycol].values.tolist(), series_original.values.tolist()) self.assertSequenceEqual(df[target_column].values.tolist(), series_target.values.tolist())
def test_map_column_names_return(self): """ensure map_column_names returns df and not inplace when False""" df = make_dirty_numeric_dataframe() Conv = self.create_ColumnConversions_class(df) map_keys = list(Conv.df) keys_len = len(map_keys) map_vals = list(range(keys_len)) map_dict = dict(zip(map_keys, map_vals)) out = Conv.map_column_names(map_dict=map_dict, inplace=False) value_test = list(Conv.df) out_test = list(out) self.assertCountEqual( map_keys, value_test, ) self.assertIsInstance(out, pd.DataFrame) self.assertCountEqual( out_test, map_vals, )