def test_vmerge_blind_append(self): meta = self.example_data_A_meta data = self.example_data_A_data # Create left dataset subset_columns_l = [ 'unique_id', 'gender', 'locality', 'ethnicity', 'q2', 'q3' ] meta_l, data_l = subset_dataset(meta, data[:10], columns=subset_columns_l) dataset_left = (meta_l, data_l) # Create right dataset subset_columns_r = [ 'unique_id', 'gender', 'religion', 'q1', 'q2', 'q8', 'q9' ] meta_r, data_r = subset_dataset(meta, data[5:15], columns=subset_columns_r) dataset_right = (meta_r, data_r) # vmerge datasets indicating row_id dataset_left = (meta_l, data_l) meta_vm, data_vm = vmerge(dataset_left, dataset_right, verbose=False) # check merged dataframe verify_vmerge_data(self, data_l, data_r, data_vm, meta_vm, blind_append=True)
def test_vmerge_basic(self): meta = self.example_data_A_meta data = self.example_data_A_data # Create left dataset subset_columns_l = [ 'unique_id', 'gender', 'locality', 'ethnicity', 'q2', 'q3' ] meta_l, data_l = subset_dataset(meta, data[:10], columns=subset_columns_l) dataset_left = (meta_l, data_l) # Create right dataset subset_columns_r = [ 'unique_id', 'gender', 'religion', 'q1', 'q2', 'q8', 'q9' ] meta_r, data_r = subset_dataset(meta, data[5:15], columns=subset_columns_r) dataset_right = (meta_r, data_r) # vmerge datasets using left_on/right_on dataset_left = (meta_l, data_l) meta_vm, data_vm = vmerge(dataset_left, dataset_right, left_on='unique_id', right_on='unique_id', verbose=False) # check merged dataframe verify_vmerge_data(self, data_l, data_r, data_vm, meta_vm) # vmerge datasets using on dataset_left = (meta_l, data_l) meta_vm, data_vm = vmerge(dataset_left, dataset_right, on='unique_id', verbose=False) # check merged data verify_vmerge_data(self, data_l, data_r, data_vm, meta_vm)
def test_vmerge_row_id(self): meta = self.example_data_A_meta data = self.example_data_A_data # Create left dataset subset_columns_l = [ 'unique_id', 'gender', 'locality', 'ethnicity', 'q2', 'q3' ] meta_l, data_l = subset_dataset(meta, data[:10], columns=subset_columns_l) dataset_left = (meta_l, data_l) # Create right dataset subset_columns_r = [ 'unique_id', 'gender', 'religion', 'q1', 'q2', 'q8', 'q9' ] meta_r, data_r = subset_dataset(meta, data[5:15], columns=subset_columns_r) dataset_right = (meta_r, data_r) # vmerge datasets indicating row_id dataset_left = (meta_l, data_l) meta_vm, data_vm = vmerge(dataset_left, dataset_right, on='unique_id', row_id_name='DataSource', left_id=1, right_id=2, verbose=False) expected = { 'text': { 'en-GB': 'vmerge row id' }, 'type': 'int', 'name': 'DataSource' } actual = meta_vm['columns']['DataSource'] self.assertEqual(actual, expected) self.assertTrue(data_vm['DataSource'].dtype == 'int64') # check merged dataframe verify_vmerge_data(self, data_l, data_r, data_vm, meta_vm, row_id_name='DataSource', left_id=1, right_id=2) # vmerge datasets indicating row_id dataset_left = (meta_l, data_l) meta_vm, data_vm = vmerge(dataset_left, dataset_right, on='unique_id', row_id_name='DataSource', left_id=1, right_id=2.0, verbose=False) expected = { 'text': { 'en-GB': 'vmerge row id' }, 'type': 'float', 'name': 'DataSource' } actual = meta_vm['columns']['DataSource'] self.assertEqual(actual, expected) self.assertTrue(data_vm['DataSource'].dtype == 'float64') # check merged dataframe verify_vmerge_data(self, data_l, data_r, data_vm, meta_vm, row_id_name='DataSource', left_id=1, right_id=2.0) # vmerge datasets indicating row_id dataset_left = (meta_l, data_l) meta_vm, data_vm = vmerge(dataset_left, dataset_right, on='unique_id', row_id_name='DataSource', left_id='W1', right_id=2.0, verbose=False) expected = { 'text': { 'en-GB': 'vmerge row id' }, 'type': 'str', 'name': 'DataSource' } actual = meta_vm['columns']['DataSource'] self.assertEqual(actual, expected) #self.assertTrue(data_vm['DataSource'].dtype == 'str') self.assertTrue(is_string_dtype(data_vm['DataSource'])) # check merged dataframe verify_vmerge_data(self, data_l, data_r, data_vm, meta_vm, row_id_name='DataSource', left_id='W1', right_id='2.0')