def test_keywords_params_combine(self): matcher = KeywordsMatching.from_dict({ 'path_to_search': 'testing_data/images2d', 'filename_contains': '_g', 'filename_removefromid': 'img|_g' }) f_list, s_list = matcher.matching_subjects_and_filenames() self.assertEqual(len(f_list), 10) self.assertEqual(len(s_list), 10) matcher_comp = KeywordsMatching.from_dict({ 'path_to_search': 'testing_data/images2d', 'filename_not_contains': ('_m', '_u'), 'filename_removefromid': "img|_g" }) f_comp, s_comp = matcher_comp.matching_subjects_and_filenames() self.assertEqual(f_comp, f_list) self.assertEqual(s_comp, s_list) matcher = KeywordsMatching.from_dict({ 'path_to_search': 'testing_data/images2d', 'filename_removefromid': 'img|_g|_m|_u' }) with self.assertRaisesRegexp(ValueError, ""): matcher.matching_subjects_and_filenames()
def test_keywords_not_contain(self): matcher = KeywordsMatching.from_dict({ 'path_to_search': 'testing_data/images2d', 'filename_not_contains': 'img' }) with self.assertRaisesRegexp(ValueError, ""): # not filename (not containing 'img') matched matcher.matching_subjects_and_filenames() matcher = KeywordsMatching.from_dict({ 'path_to_search': 'testing_data/images2d', 'filename_not_contains': ('_m', '_u') }) f_list, s_list = matcher.matching_subjects_and_filenames() self.assertEqual(len(f_list), 10) self.assertEqual(len(s_list), 10) matcher_comp = KeywordsMatching.from_dict({ 'path_to_search': 'testing_data/images2d', 'filename_contains': '_g' }) f_comp, s_comp = matcher_comp.matching_subjects_and_filenames() self.assertEqual(len(f_comp), 10) self.assertEqual(len(f_comp), 10) self.assertEqual(f_comp, f_list)
def test_from_dict(self): with self.assertRaisesRegexp(ValueError, ""): KeywordsMatching.from_dict({'path_to_search': 'wrong_folder'}) matcher = KeywordsMatching.from_dict( {'path_to_search': 'testing_data/images2d'}) f_list, s_list = matcher.matching_subjects_and_filenames() self.assertEqual(len(f_list), 30) self.assertEqual(len(s_list), 30) self.assertEqual(s_list[0][0], 'img0_g')
def test_keywords_grep(self): matcher = KeywordsMatching.from_dict( {'path_to_search': 'testing_data/images2d', 'filename_contains': 'img'}) f_list, s_list = matcher.matching_subjects_and_filenames() self.assertEqual(len(f_list), 30) self.assertEqual(len(s_list), 30) # filename matched 'img' will return and # the matched string is removed from subject_id self.assertEqual(s_list[0][0], '0_g')
def grep_files_by_data_section(self, modality_name): """ list all files by a given input data section:: if the ``csv_file`` property of ``data_param[modality_name]`` corresponds to a file, read the list from the file; otherwise write the list to ``csv_file``. :return: a table with two columns, the column names are ``(COLUMN_UNIQ_ID, modality_name)``. """ if modality_name not in self.data_param: tf.logging.fatal( 'unknown section name [%s], ' 'current input section names: %s.', modality_name, list(self.data_param)) raise ValueError # input data section must have a ``csv_file`` section for loading # or writing filename lists if isinstance(self.data_param[modality_name], dict): mod_spec = self.data_param[modality_name] else: mod_spec = vars(self.data_param[modality_name]) ######################### # guess the csv_file path ######################### temp_csv_file = None try: csv_file = os.path.expanduser(mod_spec.get('csv_file', None)) if not os.path.isfile(csv_file): # writing to the same folder as data_split_file default_csv_file = os.path.join( os.path.dirname(self.data_split_file), '{}.csv'.format(modality_name)) tf.logging.info( '`csv_file = %s` not found, ' 'writing to "%s" instead.', csv_file, default_csv_file) csv_file = default_csv_file if os.path.isfile(csv_file): tf.logging.info('Overwriting existing: "%s".', csv_file) csv_file = os.path.abspath(csv_file) except (AttributeError, KeyError, TypeError): tf.logging.debug('`csv_file` not specified, writing the list of ' 'filenames to a temporary file.') import tempfile temp_csv_file = os.path.join(tempfile.mkdtemp(), '{}.csv'.format(modality_name)) csv_file = temp_csv_file ############################################# # writing csv file if path_to_search specified ############################################## if mod_spec.get('path_to_search', None): if not temp_csv_file: tf.logging.info( '[%s] search file folders, writing csv file %s', modality_name, csv_file) # grep files by section properties and write csv try: matcher = KeywordsMatching.from_dict( input_dict=mod_spec, default_folder=self.default_image_file_location) match_and_write_filenames_to_csv([matcher], csv_file) except (IOError, ValueError) as reading_error: tf.logging.warning( 'Ignoring input section: [%s], ' 'due to the following error:', modality_name) tf.logging.warning(repr(reading_error)) return pandas.DataFrame( columns=[COLUMN_UNIQ_ID, modality_name]) else: tf.logging.info( '[%s] using existing csv file %s, skipped filenames search', modality_name, csv_file) if not os.path.isfile(csv_file): tf.logging.fatal('[%s] csv file %s not found.', modality_name, csv_file) raise IOError ############################### # loading the file as dataframe ############################### try: csv_list = pandas.read_csv(csv_file, header=None, dtype=(str, str), names=[COLUMN_UNIQ_ID, modality_name], skipinitialspace=True) except Exception as csv_error: tf.logging.fatal(repr(csv_error)) raise if temp_csv_file: shutil.rmtree(os.path.dirname(temp_csv_file), ignore_errors=True) return csv_list
def test_default(self): matcher = KeywordsMatching() with self.assertRaisesRegexp(ValueError, ""): matcher.matching_subjects_and_filenames() with self.assertRaisesRegexp(AttributeError, ""): KeywordsMatching.from_dict('wrong_argument')
def grep_files_by_data_section(self, modality_name): """ list all files by a given input data section:: if the ``csv_file`` property of ``data_param[modality_name]`` corresponds to a file, read the list from the file; otherwise write the list to ``csv_file``. :return: a table with two columns, the column names are ``(COLUMN_UNIQ_ID, modality_name)``. """ if modality_name not in self.data_param: tf.logging.fatal('unknown section name [%s], ' 'current input section names: %s.', modality_name, list(self.data_param)) raise ValueError # input data section must have a ``csv_file`` section for loading # or writing filename lists if isinstance(self.data_param[modality_name], dict): mod_spec = self.data_param[modality_name] else: mod_spec = vars(self.data_param[modality_name]) ######################### # guess the csv_file path ######################### temp_csv_file = None try: csv_file = os.path.expanduser(mod_spec.get('csv_file', None)) if not os.path.isfile(csv_file): # writing to the same folder as data_split_file default_csv_file = os.path.join( os.path.dirname(self.data_split_file), '{}.csv'.format(modality_name)) tf.logging.info('`csv_file = %s` not found, ' 'writing to "%s" instead.', csv_file, default_csv_file) csv_file = default_csv_file if os.path.isfile(csv_file): tf.logging.info('Overwriting existing: "%s".', csv_file) csv_file = os.path.abspath(csv_file) except (AttributeError, KeyError, TypeError): tf.logging.debug('`csv_file` not specified, writing the list of ' 'filenames to a temporary file.') import tempfile temp_csv_file = os.path.join( tempfile.mkdtemp(), '{}.csv'.format(modality_name)) csv_file = temp_csv_file ############################################# # writing csv file if path_to_search specified ############################################## if mod_spec.get('path_to_search', None): if not temp_csv_file: tf.logging.info( '[%s] search file folders, writing csv file %s', modality_name, csv_file) # grep files by section properties and write csv try: matcher = KeywordsMatching.from_dict( input_dict=mod_spec, default_folder=self.default_image_file_location) match_and_write_filenames_to_csv([matcher], csv_file) except (IOError, ValueError) as reading_error: tf.logging.warning('Ignoring input section: [%s], ' 'due to the following error:', modality_name) tf.logging.warning(repr(reading_error)) return pandas.DataFrame( columns=[COLUMN_UNIQ_ID, modality_name]) else: tf.logging.info( '[%s] using existing csv file %s, skipped filenames search', modality_name, csv_file) if not os.path.isfile(csv_file): tf.logging.fatal( '[%s] csv file %s not found.', modality_name, csv_file) raise IOError ############################### # loading the file as dataframe ############################### try: csv_list = pandas.read_csv( csv_file, header=None, dtype=(str, str), names=[COLUMN_UNIQ_ID, modality_name], skipinitialspace=True) except Exception as csv_error: tf.logging.fatal(repr(csv_error)) raise finally: if temp_csv_file: os.remove(temp_csv_file) os.rmdir(os.path.dirname(temp_csv_file)) return csv_list