Exemplo n.º 1
0
    def __init__(self, feature_extract_by: str = 'librosa', sample_rate: int = 16000,
                 n_mels: int = 80, frame_length: int = 20, frame_shift: int = 10,
                 del_silence: bool = False, input_reverse: bool = True,
                 normalize: bool = False,  transform_method: str = 'mel',
                 time_mask_para: int = 70, freq_mask_para: int = 12, time_mask_num: int = 2, freq_mask_num: int = 2,
                 sos_id: int = 1, eos_id: int = 2, target_dict: dict = None, noise_augment: bool = False,
                 dataset_path: str = None, noiseset_size: int = 0, noise_level: float = 0.7) -> None:
        super(SpectrogramParser, self).__init__(dataset_path, noiseset_size, sample_rate, noise_level, noise_augment)
        self.del_silence = del_silence
        self.input_reverse = input_reverse
        self.normalize = normalize
        self.sos_id = sos_id
        self.eos_id = eos_id
        self.target_dict = target_dict
        self.spec_augment = SpecAugment(time_mask_para, freq_mask_para, time_mask_num, freq_mask_num)

        if transform_method.lower() == 'mel':
            self.transforms = MelSpectrogram(sample_rate, n_mels, frame_length, frame_shift, feature_extract_by)

        elif transform_method.lower() == 'mfcc':
            self.transforms = MFCC(sample_rate, n_mels, frame_length, frame_shift, feature_extract_by)

        elif transform_method.lower() == 'spect':
            self.transforms = Spectrogram(sample_rate, frame_length, frame_shift, feature_extract_by)

        elif transform_method.lower() == 'fbank':
            self.transforms = FilterBank(sample_rate, n_mels, frame_length, frame_shift)

        else:
            raise ValueError("Unsupported feature : {0}".format(transform_method))
Exemplo n.º 2
0
    def __init__(
        self,
        feature_extract_by:
        str = 'librosa',  # which library to use for feature extraction
        sample_rate: int = 16000,  # sample rate of audio signal.
        n_mels: int = 80,  # Number of mfc coefficients to retain.
        frame_length: int = 20,  # frame length for spectrogram
        frame_shift: int = 10,  # Length of hop between STFT windows.
        del_silence:
        bool = False,  # flag indication whether to delete silence or not
        input_reverse:
        bool = True,  # flag indication whether to reverse input or not
        normalize:
        bool = False,  # flag indication whether to normalize spectrum or not
        transform_method:
        str = 'mel',  # which feature to use [mel, fbank, spect, mfcc]
        freq_mask_para:
        int = 12,  # hyper Parameter for Freq Masking to limit freq masking length
        time_mask_num: int = 2,  # how many time-masked area to make
        freq_mask_num: int = 2,  # how many freq-masked area to make
        sos_id: int = 1,  # start of sentence token`s identification
        eos_id: int = 2,  # end of sentence token`s identification
        dataset_path: str = None,  # noise dataset path
        audio_extension: str = 'pcm'  # audio extension
    ) -> None:
        super(SpectrogramParser, self).__init__(dataset_path)
        self.del_silence = del_silence
        self.input_reverse = input_reverse
        self.normalize = normalize
        self.sos_id = sos_id
        self.eos_id = eos_id
        self.spec_augment = SpecAugment(freq_mask_para, time_mask_num,
                                        freq_mask_num)
        self.audio_extension = audio_extension

        if transform_method.lower() == 'mel':
            self.transforms = MelSpectrogram(sample_rate, n_mels, frame_length,
                                             frame_shift, feature_extract_by)

        elif transform_method.lower() == 'mfcc':
            self.transforms = MFCC(sample_rate, n_mels, frame_length,
                                   frame_shift, feature_extract_by)

        elif transform_method.lower() == 'spect':
            self.transforms = Spectrogram(sample_rate, frame_length,
                                          frame_shift, feature_extract_by)

        elif transform_method.lower() == 'fbank':
            self.transforms = FilterBank(sample_rate, n_mels, frame_length,
                                         frame_shift)

        else:
            raise ValueError(
                "Unsupported feature : {0}".format(transform_method))