def test_data_clean(self): '''external_id record "2XyZyJ" should be "2XyVyJ" ''' for df in self.dfs: self.assertTrue(any(df.external_id == '2XyZyJ')) df_clean = data_clean(df) self.assertFalse(any(df_clean.external_id == '2XyZyJ')) self.assertTrue(any(df_clean.external_id == '2XyVyJ'))
def test_redcap_merge_duplicates(self): clean_dfs = [] for df in self.dfs: df = remove_dupes(df) df = data_clean(df) clean_dfs.append(df) final_data = merge_data(clean_dfs, self.names, 'internal_id') full_ids = len(final_data['internal_id']) unique_ids = len(final_data['internal_id'].unique()) self.assertTrue(full_ids, unique_ids)
def test_inquisit_merge_shape(self): clean_dfs = [] for df in self.dfs: df = remove_dupes(df) df = data_clean(df) clean_dfs.append(df) final_data = merge_data(clean_dfs, self.names, 'internal_id') rows = final_data.shape[0] cols = final_data.shape[1] cols_total = sum([x.shape[1] for x in self.dfs]) - len(self.dfs) + 1 self.assertTrue(rows > 0) self.assertEqual(cols_total, cols)
def test_block2_duplicates(self): df = remove_dupes(self.block2) df = data_clean(df) full_ids = len(df['internal_id']) unique_ids = len(df['internal_id'].unique()) self.assertTrue(full_ids, unique_ids)