示例#1
0
    def test_chop_one_spectrogram_file(self):
        
        with tempfile.TemporaryDirectory(dir='/tmp',
                                         prefix='chopping', 
                                         ) as dir_nm:
            chopper = SpectrogramChopper(
                self.spectro_root,
                dir_nm,
                overwrite_policy=WhenAlreadyDone.OVERWRITE
                )
            species = Path(self.spectro_file).parent.stem
            outdir  = os.path.join(dir_nm, species)
            true_snippet_time_width = chopper.chop_one_spectro_file(
                self.spectro_file,
                outdir,
                'DOVE',
                skip_size=self.skip_size
                )
            snippet_names = os.listdir(outdir)
            num_expected_snippets = 0
            cur_time = true_snippet_time_width
            while cur_time < self.duration:
                num_expected_snippets += 1
                cur_time += self.skip_size

            self.assertEqual(len(snippet_names), num_expected_snippets)
            
            # Check embedded metadata of one snippet:
            
            _spectro, metadata = SoundProcessor.load_spectrogram(Utils.listdir_abs(outdir)[0])
            self.assertEqual(round(float(metadata['duration(secs)']), 3),
                             round(true_snippet_time_width, 3)
                             )
            self.assertEqual(metadata['species'], 'DOVE')
示例#2
0
    def prep_aug_tmp_dirs(self, dst_tmp_dir):
        '''
        Copies AMADEC single-spectrogram directory,
        and FORANA 5-spectrogram dir to the given tmp 
        dir. Creates dir 'aug_spectros' in that same
        tmp dir. Returns path to that aug_spectros
        dir.
         
        :param dst_tmp_dir: temporary directory
        :type dst_tmp_dir: src
        :return: output directory for future spectro augmentations
        :rtype: str
        '''

        # Do all testing in the tmp dir, where
        # all files/dirs will be deleted automatically:

        for species_dir in Utils.listdir_abs(self.full_species_root):
            species_name = Path(species_dir).stem
            dst_species_dir = os.path.join(dst_tmp_dir, species_name)
            shutil.copytree(species_dir, dst_species_dir)

        # Dir where augmentations are to be placed,
        # one subdir per species:
        out_dir = os.path.join(dst_tmp_dir, 'aug_spectros')
        os.mkdir(out_dir)
        return out_dir
示例#3
0
    def test_listdir_abs(self):

        # Get the built-in directory listing
        # with just the file names:
        nearly_truth = os.listdir(self.cur_dir)

        abs_paths = Utils.listdir_abs(self.cur_dir)
        self.assertEquals(len(nearly_truth), len(abs_paths))

        # Check existence of first file or dir:
        self.assertTrue(os.path.exists(abs_paths[0]))
示例#4
0
 def setUpClass(cls):
     cls.cur_dir = os.path.dirname(__file__)
     cls.model_path = os.path.join(
         cls.cur_dir, 
         '../../birdsong/tests/models/mod_2021-05-04T13_02_14_net_resnet18_pre_True_frz_0_lr_0.01_opt_SGD_bs_128_ks_7_folds_10_gray_True_classes_34_ep9.pth'
         )
     cls.snips_dir = os.path.join(
         cls.cur_dir,
         '../../birdsong/utils/tests/data/fld_snippets'
         )
     cls.example_img_path = Utils.listdir_abs(cls.snips_dir)[0]
示例#5
0
 def check_spectro_sanity(self, dirs_filled):
     '''
     Raises assertion error if any file in
     the passed-in list of directories is less than
     5000 bytes long
     
     :param dirs_filled: list of directories whose content
         files to check for size
     :type dirs_filled: [str]
     '''
     # Check that each spectro is of
     # reasonable size:
     for species_dst_dir in dirs_filled:
         for spec_file in Utils.listdir_abs(species_dst_dir):
             self.assertTrue(os.stat(spec_file).st_size > 5000)
示例#6
0
    def record_creation_times(self, dirs_filled):
        '''
        Given list of absolute file paths, return 
        a dict mapping each path to a Unix modification time
        in fractional epoch seconds
        
        :param dirs_filled: list of absolute file paths
        :type dirs_filled: [str]
        :return dict of modification times
        :rtype {str : float}
        '''
        
        file_times = {}
        for species_dst_dir in dirs_filled:
            for spec_fname in Utils.listdir_abs(species_dst_dir):
                file_times[spec_fname] = os.path.getmtime(spec_fname)

        return file_times
示例#7
0
    def test_generate_all_augmentations_max(self):
        with tempfile.TemporaryDirectory(dir='/tmp',
                                         prefix='test_spectro') as dst_dir:

            out_dir = self.prep_aug_tmp_dirs(dst_dir)
            # Tell the augmenter where the src and dest roots are:
            self.spectro_augmenter_max.input_dir_path = dst_dir
            self.spectro_augmenter_max.output_dir_path = out_dir

            # AMADEC has 1 spectro, FORANA has 5
            # MAX is 5, So AMADEC needs 4 augementation:

            num_augs_needed = 4

            self.spectro_augmenter_max.generate_all_augmentations()

            # Should have one directory in aug_spectros
            new_dirs = Utils.listdir_abs(out_dir)
            self.assertTrue(len(new_dirs) == 1)
            # AMADEC subdir should have 2 new files
            new_files = os.listdir(new_dirs[0])
            self.assertTrue(len(new_files), num_augs_needed)
示例#8
0
    def augment_one_species(self, in_dir, out_dir, num_augs_to_do):
        '''
        Takes one species, and a number of audio
        augmentations to do. Generates the files,
        and returns a list of the newly created 
        files (full paths).
        
        The maximum number of augmentations created
        depends on the number of audio augmentation 
        methods available (currently 3), and the number
        of audio files available for the given species:
        
           num-available-audio-augs * num-of-audio-files
        
        If num_augs_to_do is higher than the above maximum,
        only that maximum is created. The rest will need to 
        be accomplished by spectrogram augmentation in a 
        different portion of the workflow.

        Augmentations are effectively done round robin across all of
        the species' audio files such that each file is
        augmented roughly the same number of times until
        num_augs_to_do is accomplished.

        :param in_dir: directory holding one species' audio files
        :type in_dir: str
        :param out_dir: destination for new audio files
        :type out_dir: src
        :param num_augs_to_do: number of augmentations
        :type num_augs_to_do: int
        :returns: list of newly created file paths
        :rtype: [src]
        '''
        
        # By convention, species name is the last part of the directory:
        species_name = Path(in_dir).stem
        
        # Create subfolder for the given species:
        if not Utils.create_folder(out_dir, self.overwrite_policy):
            self.log.info(f"Skipping augmentations for {species_name}")
            return []

        # Get dict: {full-path-to-an-audio_file : 0}
        # The zeroes will be counts of augmentations
        # needed for that file:    
        in_wav_files     = {full_in_path : 0
                            for full_in_path
                            in Utils.listdir_abs(in_dir)
                            } 
        # Cannot do augmentations for species with 0 samples
        if len(in_wav_files) == 0:
            self.log.info(f"Skipping for {species_name} since there are no original samples.")
            return []

        # Distribute augmenations across the original
        # input files:
        aug_assigned = 0
        while aug_assigned < num_augs_to_do:
            for fname in in_wav_files.keys():
                in_wav_files[fname] += 1
                aug_assigned += 1
                if aug_assigned >= num_augs_to_do:
                    break
        new_sample_paths = []
        failures = 0

        for in_fname, num_augs_this_file in in_wav_files.items():

            # Create augs with different methods:

            # Pick audio aug methods to apply (without replacement)
            # Note that if more augs are to be applied to each file
            # than methods are available, some methods will need
            # to be applied multiple times; no problem, as each
            # method includes randomness:
            max_methods_sample_size = min(len(list(AudAugMethod)), num_augs_this_file)
            methods = random.sample(list(AudAugMethod), max_methods_sample_size)
            
            # Now have something like:
            #     [volume, time-shift], or all methods: [volume, time-shift, noise]
            
            if num_augs_this_file > len(methods):
                # Repeat the methods as often as
                # needed:
                num_method_set_repeats = int(math.ceil(num_augs_this_file/len(methods)))
                # The slice to num_augs_this_file chops off
                # the possible excess from the array replication: 
                method_seq = (methods * num_method_set_repeats)[:num_augs_this_file]
                
                # Assuming num_augs_per_file is 7, we not have method_seq:
                #    [m1,m2,m3,m1,m2,m3,m1]
            else:
                method_seq = methods
                
            for method in method_seq:
                out_path_or_err = self.create_new_sample(in_fname, out_dir, method)
                if isinstance(out_path_or_err, Exception):
                    failures += 1
                else:
                    new_sample_paths.append(out_path_or_err)

        self.log.info(f"Audio aug report: {len(new_sample_paths)} new files; {failures} failures")
                
        return new_sample_paths, failures
    def create_snips_gen_for_sel_tbls(self, snippets_src, sel_tables_src):
        '''
        Given one or more Raven selection tables, 
        and one or more recording snippet paths, return
        a dict:
        
               {<recording-id> : SelTblSnipsAssoc-inst<table-path, snippets-dir>}

        where recording-id is like AM01_20190719_063242; table-path
        is the full path to one selection table with the respective
        recording-id, and snippets-dir is root of a director containing
        the snippets covered in the recording. 
        
        Usage concept:
            o There are relatively few selection tables, since they
              are human-generated
            o There can be thousands of snippet .png files whose time spans
              are covered in one table
            o The data structure returned from this method can be
              used like this:
              
                    tbl_snips_match = create_snips_gen_for_sel_tbls('/foo/my_snips', '/bar/my_tbls')
                    
                    # For each selection table, work on the snippets
                    # that are covered by that table
                    
                    for rec_id in tbl_snips_match:
                        for snip_path in tbl_snips_match.snips_iterator():
                            <do something with spectrogram snippet>
        
        
        :param snippets_src: iterable over absolute paths to snippets,
            or the absolute path to a directory
        :type snippets_src: {Iterator(str) | str}
        :param sel_tables_src: absolute path to selection table, or path 
            to a directory that contains selection tables, or
            iterator over absolute paths to selection tables
        :type sel_tables_src: str
        :returned dict mapping recording ID to SelTblSnipsAssoc instances
        :rtype {str : SelTblSnipsAssoc}
        '''

        # Table paths may be an individual
        # file, a directory, or a generator
        # of absolute paths. Sanity checks:

        if type(sel_tables_src) == str:
            if not os.path.isabs(sel_tables_src):
                raise ValueError(
                    f"Table paths must be a generator, or an absolute path to a selection table or dir"
                )
            if os.path.isfile(sel_tables_src):
                sel_tables_src = [sel_tables_src]
            elif os.path.isdir(sel_tables_src):
                sel_tables_src = Utils.listdir_abs(sel_tables_src)
        # If not a string, sel_tables_src better be a generator:
        elif not isinstance(sel_tables_src, types.GeneratorType):
            raise ValueError(
                f"Table paths must be a generator, or an absolute path to a selection table or dir"
            )

        # Same checks for snippet location:
        if type(snippets_src) == str:
            if not os.path.isabs(snippets_src) \
                or not os.path.isdir(snippets_src):
                raise ValueError(
                    f"Snippet paths must be a generator, or an absolute path to a snippet dir"
                )
            snippets_src = iter(Utils.listdir_abs(snippets_src))
        # If not a string, snippets_src better be a generator:
        elif not isinstance(sel_tables_src, types.GeneratorType):
            raise ValueError(
                f"Snippets src must be a generator, or an absolute path to dir"
            )

        # Build a dict:
        #    {<recording_id> : <dir-of-matching-snippets>}
        recording_selection_tables = {}
        for table_path in sel_tables_src:
            recording_id = self.extract_recording_id(table_path)
            if recording_id is not None:
                recording_selection_tables[recording_id] = \
                    SelTblSnipsAssoc(table_path, snippets_src)

        return recording_selection_tables