def test_keywords_params_combine(self):
        matcher = KeywordsMatching.from_dict({
            'path_to_search': 'testing_data/images2d',
            'filename_contains': '_g',
            'filename_removefromid': 'img|_g'
        })
        f_list, s_list = matcher.matching_subjects_and_filenames()
        self.assertEqual(len(f_list), 10)
        self.assertEqual(len(s_list), 10)

        matcher_comp = KeywordsMatching.from_dict({
            'path_to_search':
            'testing_data/images2d',
            'filename_not_contains': ('_m', '_u'),
            'filename_removefromid':
            "img|_g"
        })
        f_comp, s_comp = matcher_comp.matching_subjects_and_filenames()
        self.assertEqual(f_comp, f_list)
        self.assertEqual(s_comp, s_list)

        matcher = KeywordsMatching.from_dict({
            'path_to_search':
            'testing_data/images2d',
            'filename_removefromid':
            'img|_g|_m|_u'
        })
        with self.assertRaisesRegexp(ValueError, ""):
            matcher.matching_subjects_and_filenames()
    def test_keywords_not_contain(self):
        matcher = KeywordsMatching.from_dict({
            'path_to_search': 'testing_data/images2d',
            'filename_not_contains': 'img'
        })
        with self.assertRaisesRegexp(ValueError, ""):
            # not filename (not containing 'img') matched
            matcher.matching_subjects_and_filenames()

        matcher = KeywordsMatching.from_dict({
            'path_to_search':
            'testing_data/images2d',
            'filename_not_contains': ('_m', '_u')
        })
        f_list, s_list = matcher.matching_subjects_and_filenames()
        self.assertEqual(len(f_list), 10)
        self.assertEqual(len(s_list), 10)

        matcher_comp = KeywordsMatching.from_dict({
            'path_to_search': 'testing_data/images2d',
            'filename_contains': '_g'
        })
        f_comp, s_comp = matcher_comp.matching_subjects_and_filenames()
        self.assertEqual(len(f_comp), 10)
        self.assertEqual(len(f_comp), 10)
        self.assertEqual(f_comp, f_list)
示例#3
0
 def test_from_dict(self):
     with self.assertRaisesRegexp(ValueError, ""):
         KeywordsMatching.from_dict({'path_to_search': 'wrong_folder'})
     matcher = KeywordsMatching.from_dict(
         {'path_to_search': 'testing_data/images2d'})
     f_list, s_list = matcher.matching_subjects_and_filenames()
     self.assertEqual(len(f_list), 30)
     self.assertEqual(len(s_list), 30)
     self.assertEqual(s_list[0][0], 'img0_g')
示例#4
0
 def test_keywords_grep(self):
     matcher = KeywordsMatching.from_dict(
         {'path_to_search': 'testing_data/images2d',
          'filename_contains': 'img'})
     f_list, s_list = matcher.matching_subjects_and_filenames()
     self.assertEqual(len(f_list), 30)
     self.assertEqual(len(s_list), 30)
     # filename matched 'img' will return and
     # the matched string is removed from subject_id
     self.assertEqual(s_list[0][0], '0_g')
    def grep_files_by_data_section(self, modality_name):
        """
        list all files by a given input data section::
            if the ``csv_file`` property of ``data_param[modality_name]``
            corresponds to a file, read the list from the file;
            otherwise
                write the list to ``csv_file``.

        :return: a table with two columns,
                 the column names are ``(COLUMN_UNIQ_ID, modality_name)``.
        """
        if modality_name not in self.data_param:
            tf.logging.fatal(
                'unknown section name [%s], '
                'current input section names: %s.', modality_name,
                list(self.data_param))
            raise ValueError

        # input data section must have a ``csv_file`` section for loading
        # or writing filename lists
        if isinstance(self.data_param[modality_name], dict):
            mod_spec = self.data_param[modality_name]
        else:
            mod_spec = vars(self.data_param[modality_name])

        #########################
        # guess the csv_file path
        #########################
        temp_csv_file = None
        try:
            csv_file = os.path.expanduser(mod_spec.get('csv_file', None))
            if not os.path.isfile(csv_file):
                # writing to the same folder as data_split_file
                default_csv_file = os.path.join(
                    os.path.dirname(self.data_split_file),
                    '{}.csv'.format(modality_name))
                tf.logging.info(
                    '`csv_file = %s` not found, '
                    'writing to "%s" instead.', csv_file, default_csv_file)
                csv_file = default_csv_file
                if os.path.isfile(csv_file):
                    tf.logging.info('Overwriting existing: "%s".', csv_file)
            csv_file = os.path.abspath(csv_file)
        except (AttributeError, KeyError, TypeError):
            tf.logging.debug('`csv_file` not specified, writing the list of '
                             'filenames to a temporary file.')
            import tempfile
            temp_csv_file = os.path.join(tempfile.mkdtemp(),
                                         '{}.csv'.format(modality_name))
            csv_file = temp_csv_file

        #############################################
        # writing csv file if path_to_search specified
        ##############################################
        if mod_spec.get('path_to_search', None):
            if not temp_csv_file:
                tf.logging.info(
                    '[%s] search file folders, writing csv file %s',
                    modality_name, csv_file)
            # grep files by section properties and write csv
            try:
                matcher = KeywordsMatching.from_dict(
                    input_dict=mod_spec,
                    default_folder=self.default_image_file_location)
                match_and_write_filenames_to_csv([matcher], csv_file)
            except (IOError, ValueError) as reading_error:
                tf.logging.warning(
                    'Ignoring input section: [%s], '
                    'due to the following error:', modality_name)
                tf.logging.warning(repr(reading_error))
                return pandas.DataFrame(
                    columns=[COLUMN_UNIQ_ID, modality_name])
        else:
            tf.logging.info(
                '[%s] using existing csv file %s, skipped filenames search',
                modality_name, csv_file)

        if not os.path.isfile(csv_file):
            tf.logging.fatal('[%s] csv file %s not found.', modality_name,
                             csv_file)
            raise IOError
        ###############################
        # loading the file as dataframe
        ###############################
        try:
            csv_list = pandas.read_csv(csv_file,
                                       header=None,
                                       dtype=(str, str),
                                       names=[COLUMN_UNIQ_ID, modality_name],
                                       skipinitialspace=True)
        except Exception as csv_error:
            tf.logging.fatal(repr(csv_error))
            raise

        if temp_csv_file:
            shutil.rmtree(os.path.dirname(temp_csv_file), ignore_errors=True)

        return csv_list
示例#6
0
 def test_default(self):
     matcher = KeywordsMatching()
     with self.assertRaisesRegexp(ValueError, ""):
         matcher.matching_subjects_and_filenames()
     with self.assertRaisesRegexp(AttributeError, ""):
         KeywordsMatching.from_dict('wrong_argument')
    def grep_files_by_data_section(self, modality_name):
        """
        list all files by a given input data section::
            if the ``csv_file`` property of ``data_param[modality_name]``
            corresponds to a file, read the list from the file;
            otherwise
                write the list to ``csv_file``.

        :return: a table with two columns,
                 the column names are ``(COLUMN_UNIQ_ID, modality_name)``.
        """
        if modality_name not in self.data_param:
            tf.logging.fatal('unknown section name [%s], '
                             'current input section names: %s.',
                             modality_name, list(self.data_param))
            raise ValueError

        # input data section must have a ``csv_file`` section for loading
        # or writing filename lists
        if isinstance(self.data_param[modality_name], dict):
            mod_spec = self.data_param[modality_name]
        else:
            mod_spec = vars(self.data_param[modality_name])

        #########################
        # guess the csv_file path
        #########################
        temp_csv_file = None
        try:
            csv_file = os.path.expanduser(mod_spec.get('csv_file', None))
            if not os.path.isfile(csv_file):
                # writing to the same folder as data_split_file
                default_csv_file = os.path.join(
                    os.path.dirname(self.data_split_file),
                    '{}.csv'.format(modality_name))
                tf.logging.info('`csv_file = %s` not found, '
                                'writing to "%s" instead.',
                                csv_file, default_csv_file)
                csv_file = default_csv_file
                if os.path.isfile(csv_file):
                    tf.logging.info('Overwriting existing: "%s".', csv_file)
            csv_file = os.path.abspath(csv_file)
        except (AttributeError, KeyError, TypeError):
            tf.logging.debug('`csv_file` not specified, writing the list of '
                             'filenames to a temporary file.')
            import tempfile
            temp_csv_file = os.path.join(
                tempfile.mkdtemp(), '{}.csv'.format(modality_name))
            csv_file = temp_csv_file

        #############################################
        # writing csv file if path_to_search specified
        ##############################################
        if mod_spec.get('path_to_search', None):
            if not temp_csv_file:
                tf.logging.info(
                    '[%s] search file folders, writing csv file %s',
                    modality_name, csv_file)
            # grep files by section properties and write csv
            try:
                matcher = KeywordsMatching.from_dict(
                    input_dict=mod_spec,
                    default_folder=self.default_image_file_location)
                match_and_write_filenames_to_csv([matcher], csv_file)
            except (IOError, ValueError) as reading_error:
                tf.logging.warning('Ignoring input section: [%s], '
                                   'due to the following error:',
                                   modality_name)
                tf.logging.warning(repr(reading_error))
                return pandas.DataFrame(
                    columns=[COLUMN_UNIQ_ID, modality_name])
        else:
            tf.logging.info(
                '[%s] using existing csv file %s, skipped filenames search',
                modality_name, csv_file)

        if not os.path.isfile(csv_file):
            tf.logging.fatal(
                '[%s] csv file %s not found.', modality_name, csv_file)
            raise IOError
        ###############################
        # loading the file as dataframe
        ###############################
        try:
            csv_list = pandas.read_csv(
                csv_file,
                header=None,
                dtype=(str, str),
                names=[COLUMN_UNIQ_ID, modality_name],
                skipinitialspace=True)
        except Exception as csv_error:
            tf.logging.fatal(repr(csv_error))
            raise
        finally:
            if temp_csv_file:
                os.remove(temp_csv_file)
                os.rmdir(os.path.dirname(temp_csv_file))
        return csv_list