Пример #1
0
 def test_update_map_duplicate_keys(self):
     t_map = KeyMap(self.key_cols, self.target_cols)
     stern_df = pd.read_csv(self.stern_test2_path, delimiter='\t', header=0)
     duplicates = t_map.update(stern_df)
     self.assertTrue(
         duplicates,
         "update should return a list of duplicates if repeated keys")
Пример #2
0
 def test_update_map_not_unique(self):
     t_map = KeyMap(self.key_cols, self.target_cols)
     stern_df = pd.read_csv(self.stern_test2_path, delimiter='\t', header=0)
     duplicates = t_map.update(stern_df)
     self.assertEqual(len(t_map.col_map.columns), 4,
                      "update should produce correct number of columns")
     self.assertEqual(len(t_map.col_map),
                      len(stern_df) - len(duplicates),
                      "update should produce the correct number of rows")
     self.assertTrue(duplicates, "update using event file has duplicates")
Пример #3
0
 def test_update_map(self):
     t_map = KeyMap(self.key_cols, self.target_cols)
     stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0)
     duplicates = t_map.update(stern_df)
     df_map = t_map.col_map
     df_dict = t_map.map_dict
     self.assertEqual(len(df_map), len(stern_df),
                      "update map should contain all the entries")
     self.assertEqual(len(df_dict.keys()), len(stern_df),
                      "update dictionary should contain all the entries")
     self.assertFalse(
         duplicates, "update should not have any duplicates for stern map")
Пример #4
0
 def test_remap_a(self):
     key_cols = ['type']
     target_cols = ['event_type', 'task_role', 'letter']
     key_map = KeyMap(key_cols, target_cols, 'my_name')
     key_map.update(self.stern_map_path)
     event_file_list = get_file_list(self.data_dir,
                                     name_prefix='sternberg',
                                     name_suffix="_events",
                                     extensions=[".tsv"])
     for file in event_file_list:
         df_new, missing = key_map.remap(file)
         self.assertFalse(missing)
Пример #5
0
 def test_update_map_missing_key(self):
     keys = self.key_cols + ['another']
     t_map = KeyMap(keys, self.target_cols)
     stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0)
     try:
         t_map.update(stern_df)
     except HedFileError:
         pass
     except Exception as ex:
         self.fail(
             f'update threw the wrong exception {ex} when key column missing'
         )
     else:
         self.fail(
             'update should have thrown a HedFileError exception when key column was missing'
         )
Пример #6
0
 def test_remap(self):
     t_map = KeyMap(self.key_cols, self.target_cols)
     stern_df = pd.read_csv(self.stern_map_path,
                            delimiter='\t',
                            header=0,
                            keep_default_na=False,
                            na_values=",null")
     t_map.update(stern_df)
     stern_test1 = pd.read_csv(self.stern_test1_path,
                               delimiter='\t',
                               header=0)
     df_new, missing = t_map.remap(stern_test1)
     self.assertFalse(
         missing,
         "remap should return empty missing when all keys are accounted for"
     )
     self.assertEqual(len(stern_test1), len(df_new),
                      "remap should not change number rows in file")
     self.assertEqual(df_new.iloc[3]["event_type"], 'show_letter',
                      "remap should not change number rows in file")
Пример #7
0
    def test_lookup_cols(self):
        t_map = KeyMap(self.key_cols, self.target_cols)
        stern_df = pd.read_csv(self.stern_map_path,
                               delimiter='\t',
                               header=0,
                               keep_default_na=False,
                               na_values=",null")
        t_map.update(stern_df)
        t_col = t_map.col_map
        for index, row in stern_df.iterrows():
            key = get_row_hash(row, self.key_cols)
            key_value = t_map.map_dict[key]
            self.assertEqual(t_col.iloc[key_value]['type'], row['type'],
                             "The key should be looked up for same map")

        stern_test1 = pd.read_csv(self.stern_test1_path,
                                  delimiter='\t',
                                  header=0)
        for index, row in stern_test1.iterrows():
            key = get_row_hash(row, self.key_cols)
            key_value = t_map.map_dict[key]
            self.assertEqual(t_col.iloc[key_value]['type'], row['type'],
                             "The key should be looked up for other file")
Пример #8
0
 def test_remap_missing(self):
     t_map = KeyMap(self.key_cols, self.target_cols)
     stern_df = pd.read_csv(self.stern_map_path,
                            delimiter='\t',
                            header=0,
                            keep_default_na=False,
                            na_values=",null")
     t_map.update(stern_df)
     stern_test1 = pd.read_csv(self.stern_test1_path,
                               delimiter='\t',
                               header=0)
     stern_test1.at[3, 'type'] = 'baloney'
     stern_test1.at[10, 'type'] = 'special'
     df_new, missing = t_map.remap(stern_test1)
     self.assertTrue(
         missing,
         "remap should return return nonempty missing when when keys missing"
     )
     self.assertEqual(len(stern_test1), len(df_new),
                      "remap should not change number rows in file")
     self.assertEqual(
         df_new.iloc[3]["event_type"], 'n/a',
         "remap should have n/a in the targets when key is missing")
Пример #9
0
    def test_constructor(self):
        t_map = KeyMap(self.key_cols, self.target_cols)
        self.assertIsInstance(t_map, KeyMap)
        df = t_map.col_map
        self.assertIsInstance(df, pd.DataFrame)
        try:
            KeyMap(None, ['a'])
        except HedFileError:
            pass
        except Exception as ex:
            self.fail(
                f'KeyMap threw the wrong exception {ex} when no key columns')
        else:
            self.fail(
                'KeyMap should have thrown a HedFileError exception when no key columns'
            )

        try:
            KeyMap(['a', 'b', 'c'], ['b', 'c', 'd'])
        except HedFileError:
            pass
        except Exception as ex:
            self.fail(
                f'KeyMap threw the wrong exception {ex} when key and target columns overlap'
            )
        else:
            self.fail(
                'KeyMap should have thrown a HedFileError exception when key and target columns overlap'
            )

        emap1 = KeyMap(['a'], [])
        self.assertIsInstance(emap1, KeyMap,
                              "KeyMap: target columns can be empty")
        self.assertEqual(
            len(emap1.col_map.columns), 1,
            "The column map should have only key columns when no target")