Python SoundProcessor.save_image示例，data_augmentation.sound_processor.SoundProcessor.save_image Python示例

示例#1

0

显示文件

文件： match_snippets_to_selection_table.py 项目： paepcke/birds

    def save_updated_snippet(self, outdir, species, snippet_path, spectro_arr,
                             metadata):
        '''
        Create path name: 
            
            outdir/species/snippet-fname
            
        and save the spectro_arr to that path
        as a .png file with embedded metadata
        
        :param outdir: destination directory
        :type outdir: str
        :param snippet_path: file name or absolute path to snipet
        :type snippet_path: src
        :param spectro_arr: image data
        :type spectro_arr: np.array
        :param metadata: auxiliary info to include in the .png file
        :type metadata: {str : str}
        '''

        # Save the updated snippet_path:
        species_subdir = os.path.join(outdir, species)
        snip_outname = os.path.join(species_subdir,
                                    os.path.basename(snippet_path))
        FileUtils.ensure_directory_existence(snip_outname)
        SoundProcessor.save_image(spectro_arr, snip_outname, metadata)

示例#2

0

显示文件

文件： list_png_metadata.py 项目： paepcke/birds

    def set_metadata(cls, png_fpath, info_to_set, outfile=None, setting=False):
        '''
        Modify metadata in a .png file. Distinguishes between
        replacing existing metadata (setting == True), and adding
        to the existing info (set == False). Either way, takes
        a dict of metadata in info_to_set. 
        
        If outfile is None (or same as the input file png_fpath),
        the modification is in-place.
        
        :param png_fpath: input png file
        :type png_fpath: str
        :param info_to_set: dict of metadata information
        :type info_to_set: {str : str}
        :param outfile: if provided, create a new png file with the 
            provided metadata
        :type outfile: {None | str}
        :param setting: whether or not to replace existing metadata
            with info_to_set, or to add. Replacing only for common
            keys
        :type setting: bool
        '''

        if type(info_to_set) != dict:
            raise TypeError(f"info_to_set must be a dict, not {type(dict)}")

        img, metadata = SoundProcessor.load_spectrogram(png_fpath)
        if outfile is None:
            outfile = png_fpath

        if setting:
            metadata = info_to_set
        else:
            metadata.update(info_to_set)

        SoundProcessor.save_image(img, outfile, metadata)

示例#3

0

显示文件

文件： chop_spectrograms.py 项目： paepcke/birds

    def chop_one_spectro_file(
        cls,
        spectro_fname,
        out_dir,
        species_name,
        window_len=5,
        skip_size=2,
        original_duration=None,
        overwrite_policy=WhenAlreadyDone.ASK,
    ):
        """
        Generates window_len second spectrogram snippets
        from spectrograms files of arbitrary length. 
        
        To compute the number of time slices to extract
        for each snippet, the time_slices of the spectrogram time
        slices in fractional seconds must be known. The time_slices
        can be approximated if the play length of the underlying
        audio is known (even if the precise fft settings are unavailable).
        
        If the given .png file contains metadata with a 'duration' 
        key, then the corresponding value is used as the duration of 
        the original audio file in fractional seconds. This metadata
        will be present if the .png file was created with the 
        SoundProcessor.create_spectrogram(). 
        
        To enable use of spectrogram images created elsewhere, callers
        can instead supply original_duration in fractional seconds.
        
        For now, if neither the embedded metadata, nor the original_duration
        is supplied, a ValueError is raised. 
    
        :param spectro_fname: full path to spectrogram file to chop
        :type spectro_fname: str
        :param out_dir: root directory under which spectrogram
            snippets will be saved (in different subdirs)
        :type out_dir: str
        :param species_name: name of species to embed in the 
            metadata of this snippet, and use for determining
            subdirectory where to place the snippet
        :type species_name: str
        :param window_len: number of seconds to be covered by each snippet
        :type window_len: int
        :param skip_size: number of seconds to shift right in 
            time for the start of each chop
        :type skip_size: int
        :param original_duration:
        :raise ValueError: if neither embedded duration metadata is found
            in the given file, nor original_duration is provided
        """

        # Read the spectrogram, getting an np array:
        spectro_arr, metadata = SoundProcessor.load_spectrogram(spectro_fname)
        duration = metadata.get('duration', None)

        if duration is None:
            if original_duration is None:
                raise ValueError(
                    f"Time duration of original recording cannot be determined for {spectro_fname}"
                )
            else:
                duration = float(original_duration)
        else:
            duration = float(duration)

        # If original file is already at or below
        # the single window length, it's a snippet
        # in itself. Copy it to the output with an
        # appropriate snippet name to match the other
        # snippets: wall start time is zero:

        if duration < window_len:
            # No partial snippets
            return
        # Note: Also have sample rate ('sr') and species ('label')
        # in the metadata, but don't need those here.

        _freq_bands, time_slices = spectro_arr.shape
        # Time in fractions of second
        # per spectrogram column:
        twidth = duration / time_slices

        # Integer of duration (which is in seconds):
        time_dur_int = int(np.ceil(duration))
        time_upper_bound = 1 + time_dur_int - skip_size

        # Caller specifies skip_size and window
        # length in *seconds*. Convert to spectrogram
        # time slices (with rounding error):

        samples_win_len = int(window_len // twidth)
        # Does samples_win_len satisfy the
        # minimum spectrogram snippet width for
        # pretrained models?
        samples_win_len = max(cls.MIN_SNIPPET_WIDTH, samples_win_len)

        time_true_each_snippet = samples_win_len * twidth

        samples_skip_size = int(skip_size // twidth)
        samples_upper_bound = int(time_upper_bound // twidth)

        assert (samples_upper_bound <= time_slices)

        for _snip_num, samples_start_idx in enumerate(
                range(0, samples_upper_bound, samples_skip_size)):

            # Absolute start time of this snippet
            # within the entire spectrogram:
            wall_start_time = samples_start_idx * twidth
            # Create a name for the snippet file:
            snippet_path = cls.create_snippet_fpath(spectro_fname,
                                                    round(wall_start_time),
                                                    out_dir)

            spectro_done = os.path.exists(snippet_path)

            if spectro_done:
                if overwrite_policy == WhenAlreadyDone.SKIP:
                    # Next snippet:
                    continue
                elif overwrite_policy == WhenAlreadyDone.ASK:
                    if not Utils.user_confirm(
                            f"Snippet {Path(snippet_path).stem} exists, overwrite?",
                            default='N'):
                        continue

            # Chop: All rows, columns from current
            #       window start for window lenth samples:
            snippet_data = spectro_arr[:, samples_start_idx:samples_start_idx +
                                       samples_win_len]
            _num_rows, num_cols = snippet_data.shape
            if num_cols < samples_win_len:
                # Leave that little spectrogram
                # snippet leftover for Elijah:
                break

            snippet_info = metadata.copy()
            # Add the
            snippet_info['duration(secs)'] = samples_win_len * twidth
            snippet_info['start_time(secs)'] = wall_start_time
            snippet_info['end_time(secs)'] = wall_start_time + (
                samples_win_len * twidth)
            snippet_info['species'] = species_name
            SoundProcessor.save_image(snippet_data, snippet_path, snippet_info)
        return time_true_each_snippet

示例#4

0

显示文件

    def create_new_sample(self, sample_path, out_dir, method):
        '''
        Given one spectrogram file, and an image augmentation
        method name, compute that augmentation, create a file name
        that gives insight into the aug applied, and write that
        new spectrogram file to out_dir.
        
        Currently available types of image augmentation technique:
        
            o adding random or uniform sounds
            o frequency masking
            o time masking

        Returns the full path of the newly created spectrogram file:
        
        :param sample_path: absolute path to spectrogram
        :type sample_path: str
        :param out_dir: destination of resulting new spectros
        :type out_dir: src
        :param method: the (spectrogram) image augmentation method to apply
        :type method: ImgAugMethod
        :return: Newly created spectro file (full path) or None,
            if a failure occurred.
        :rtype: {str | None|
        '''

        success = False
        spectro, metadata = SoundProcessor.load_spectrogram(sample_path)
        if method == ImgAugMethod.NOISE:
            try:
                # Default is uniform noise:
                new_spectro, out_fname = SoundProcessor.random_noise(spectro)
                metadata['augmentation'] = 'noise'
                success = True
            except Exception as e:
                sample_fname = Path(sample_path).stem
                self.log.err(
                    f"Failed to add noise to {sample_fname} ({repr(e)})")

        elif method == ImgAugMethod.FMASK:
            try:
                # Horizontal bands:
                new_spectro, out_fname = SoundProcessor.freq_mask(
                    spectro,
                    max_height=15  # num freq bands
                )
                metadata['augmentation'] = 'fmask'
                success = True
            except Exception as e:
                sample_fname = Path(sample_path).stem
                self.log.err(
                    f"Failed to time shift on {sample_fname} ({repr(e)})")

        elif method == ImgAugMethod.TMASK:
            try:
                # Vertical bands:
                new_spectro, out_fname = SoundProcessor.time_mask(
                    spectro,
                    max_width=15  # num time ticks
                )
                metadata['augmentation'] = 'tmask'
                success = True
            except Exception as e:
                sample_fname = Path(sample_path).stem
                self.log.err(
                    f"Failed to time shift on {sample_fname} ({repr(e)})")

        if success:
            sample_p = Path(sample_path)
            appended_fname = sample_p.stem + out_fname + sample_p.suffix
            out_path = os.path.join(out_dir, appended_fname)
            SoundProcessor.save_image(new_spectro, out_path, metadata)
        return out_path if success else None