def test_pivot_data_time_columns(self): time_cols = [ 'date', 'year', 'quarter', 'month', 'two_week', 'week', 'day', 'hour', 'half_hour', 'quarter_hour', 'minute', 'second', 'microsecond' ] for col in time_cols: data = self.get_data_2() data[col] = data.date result = sdt.pivot_data(data.copy(), ['name'], values=['id'], index=col) expected = data.copy() \ .pivot(columns=['name'], values=['id'], index=col) expected = expected['id'] eft.enforce_columns_in_dataframe(expected.columns, result) result.index = Series(result.index.tolist()) \ .apply(lambda x: datetime( x.year, x.month, x.day, x.hour, x.minute, x.second, microsecond=1 )) eft.enforce_dataframes_are_equal(result, expected)
def test_enforce_dataframes_are_equal_shape(self): a = self.get_data() a.loc[3] = [0, 0, 0] b = self.get_data() expected = r'A and b have different shapes. \(4, 3\) != \(3, 3\).' with self.assertRaisesRegexp(EnforceError, expected): eft.enforce_dataframes_are_equal(a, b)
def test_query_data_sqldf(self): data = self.get_data() query = 'select * from data where ' query += 'Amount > 78 and ' query += "Category like '%food%'" result = sdt.query_data(data, query) expected = pandasql.sqldf(query, dict(data=data)) eft.enforce_dataframes_are_equal(result, expected)
def test_group_data(self): data = self.get_data_2() result = sdt.group_data(data, 'group', 'mean') grp = data.groupby('group', as_index=False) expected = grp.mean() expected['name'] = grp.first()['name'] expected['date'] = grp.first()['date'] eft.enforce_dataframes_are_equal(result, expected)
def test_enforce_dataframes_are_equal_columns(self): a = self.get_data() a['pizza'] = [1, 1, 1] b = self.get_data() b['taco'] = [0, 0, 0] expected = r"A and b have different columns: \['pizza', 'taco'\]\." with self.assertRaisesRegexp(EnforceError, expected): eft.enforce_dataframes_are_equal(a, b)
def test_query_data_empty(self): data = self.get_data() query = 'select * from data where ' query += "Category ~ '.*' and " query += "Category = 'empty' and " query += 'Amount > 78' result = sdt.query_data(data, query) expected = DataFrame(columns=data.columns) eft.enforce_dataframes_are_equal(result, expected)
def test_update(self): with TemporaryDirectory() as root: config, _ = self.get_config(root) result = db.Database(config).update()._data expected = pd.read_csv(config['data_path'], index_col=None) expected = sdt.conform(expected, actions=config['conform'], columns=config['columns']) eft.enforce_dataframes_are_equal(result, expected)
def test_query_data_not_regex(self): data = self.get_data() expected = data.loc[:0, ['Description', 'Amount']] # not regex query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category not regex ignore|fancy" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected) # not regex + '' query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category not regex 'ignore|fancy'" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected) # !~ query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category !~ ignore|fancy" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected) # !~ + '' query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category !~ 'ignore|fancy'" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected)
def test_query_data_regex(self): data = self.get_data() expected = data.loc[:1, ['Description', 'Amount']] # regex query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category regex food|fancy" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected) # regex + '' query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category regex 'food|fancy'" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected) # ~ query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category ~ food|fancy" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected) # ~ + '' query = 'select Description,Amount from data where ' query += 'Amount > 34 and ' query += "Category ~ 'food|fancy'" result = sdt.query_data(data, query) eft.enforce_dataframes_are_equal(result, expected)
def test_enforce_dataframes_are_equal_values(self): a = self.get_data() b = self.get_data() b['foo'] = [3, 3, 3] b['baz'] = [9, 9, 9] msg = [ ['foo', 1, 3], ['foo', 2, 3], ['baz', 7, 9], ['baz', 8, 9], ] msg = DataFrame(msg, columns=['column', 'a', 'b']).to_string() expected = f'DatFrames have different values:\n{msg}' with self.assertRaisesRegexp(EnforceError, expected): eft.enforce_dataframes_are_equal(a, b)
def test_to_records_no_mutation(self): result = self.get_data() result = sdt.conform(result) expected = result.copy() db.Database._to_records(result) eft.enforce_dataframes_are_equal(result, expected)
def test_enforce_dataframes_are_equal(self): a = self.get_data() eft.enforce_dataframes_are_equal(a, a) a['foo'] = None eft.enforce_dataframes_are_equal(a, a)