Пример #1
0
    def _get_2D_feature(audio, feature_type, audio_feature_dict,
                        sampling_rate_in_hz):
        window_length_in_s = audio_feature_dict['window_length_in_s']
        window_shift_in_s = audio_feature_dict['window_shift_in_s']

        if 'num_fft_points' in audio_feature_dict:
            num_fft_points = audio_feature_dict['num_fft_points']
        else:
            num_fft_points = get_length_in_samp(window_length_in_s,
                                                sampling_rate_in_hz)

        if 'window_type' in audio_feature_dict:
            window_type = audio_feature_dict['window_type']
        else:
            window_type = 'hamming'

        if feature_type == 'stft_phase':
            return get_phase_stft_magnitude(audio, sampling_rate_in_hz,
                                            window_length_in_s,
                                            window_shift_in_s, num_fft_points,
                                            window_type)
        if feature_type == 'stft':
            return get_stft_magnitude(audio, sampling_rate_in_hz,
                                      window_length_in_s, window_shift_in_s,
                                      num_fft_points, window_type)
        if feature_type == 'group_delay':
            return get_group_delay(audio, sampling_rate_in_hz,
                                   window_length_in_s, window_shift_in_s,
                                   num_fft_points, window_type)
Пример #2
0
    def _get_feature_dim(audio_feature_dict, sampling_rate_in_hz):
        feature_type = audio_feature_dict[TYPE]

        if feature_type == "raw":
            feature_dim = 1
        elif feature_type == "stft_phase":
            feature_dim_symmetric = get_length_in_samp(audio_feature_dict["window_length_in_s"], sampling_rate_in_hz)
            feature_dim = 2 * get_non_symmetric_length(feature_dim_symmetric)
        elif feature_type in ["stft", "group_delay"]:
            feature_dim_symmetric = get_length_in_samp(audio_feature_dict["window_length_in_s"], sampling_rate_in_hz)
            feature_dim = get_non_symmetric_length(feature_dim_symmetric)
        elif feature_type == "fbank":
            feature_dim = audio_feature_dict["num_filter_bands"]
        else:
            raise ValueError(f"{feature_type} is not recognized.")

        return feature_dim
Пример #3
0
    def _get_feature_dim(audio_feature_dict, sampling_rate_in_hz):
        feature_type = audio_feature_dict['type']

        if feature_type == 'raw':
            feature_dim = 1
        elif feature_type == 'stft_phase':
            feature_dim_symmetric = get_length_in_samp(
                audio_feature_dict['window_length_in_s'], sampling_rate_in_hz)
            feature_dim = 2 * get_non_symmetric_length(feature_dim_symmetric)
        elif feature_type in ['stft', 'group_delay']:
            feature_dim_symmetric = get_length_in_samp(
                audio_feature_dict['window_length_in_s'], sampling_rate_in_hz)
            feature_dim = get_non_symmetric_length(feature_dim_symmetric)
        else:
            raise ValueError('{} is not recognized.'.format(feature_type))

        return feature_dim
Пример #4
0
    def _get_2D_feature(audio, feature_type, audio_feature_dict,
                        sampling_rate_in_hz):
        window_length_in_s = audio_feature_dict["window_length_in_s"]
        window_shift_in_s = audio_feature_dict["window_shift_in_s"]
        window_length_in_samp = get_length_in_samp(window_length_in_s,
                                                   sampling_rate_in_hz)

        if "num_fft_points" in audio_feature_dict:
            num_fft_points = audio_feature_dict["num_fft_points"]
            if num_fft_points < window_length_in_samp:
                raise ValueError("num_fft_points: {} < window length in "
                                 "samples: {} (corresponds to window length"
                                 " in s: {}".format(num_fft_points,
                                                    window_length_in_s,
                                                    window_length_in_samp))
        else:
            num_fft_points = window_length_in_samp

        if "window_type" in audio_feature_dict:
            window_type = audio_feature_dict["window_type"]
        else:
            window_type = "hamming"

        if feature_type == "stft_phase":
            return get_phase_stft_magnitude(audio, sampling_rate_in_hz,
                                            window_length_in_s,
                                            window_shift_in_s, num_fft_points,
                                            window_type)
        if feature_type == "stft":
            return get_stft_magnitude(audio, sampling_rate_in_hz,
                                      window_length_in_s, window_shift_in_s,
                                      num_fft_points, window_type)
        if feature_type == "group_delay":
            return get_group_delay(audio, sampling_rate_in_hz,
                                   window_length_in_s, window_shift_in_s,
                                   num_fft_points, window_type)
        if feature_type == "fbank":
            num_filter_bands = audio_feature_dict["num_filter_bands"]
            return get_fbank(
                audio,
                sampling_rate_in_hz,
                window_length_in_s,
                window_shift_in_s,
                num_fft_points,
                window_type,
                num_filter_bands,
            )
Пример #5
0
    def _get_2D_feature(audio, feature_type, audio_feature_dict,
                        sampling_rate_in_hz):
        window_length_in_s = audio_feature_dict['window_length_in_s']
        window_shift_in_s = audio_feature_dict['window_shift_in_s']
        window_length_in_samp = get_length_in_samp(window_length_in_s,
                                                    sampling_rate_in_hz)

        if 'num_fft_points' in audio_feature_dict:
            num_fft_points = audio_feature_dict['num_fft_points']
            if num_fft_points < window_length_in_samp:
                raise ValueError(
                    'num_fft_points: {} < window length in '
                    'samples: {} (corresponds to window length'
                    ' in s: {}'.format(num_fft_points, window_length_in_s,
                    window_length_in_samp))
        else:
            num_fft_points = window_length_in_samp

        if 'window_type' in audio_feature_dict:
            window_type = audio_feature_dict['window_type']
        else:
            window_type = 'hamming'

        if feature_type == 'stft_phase':
            return get_phase_stft_magnitude(audio, sampling_rate_in_hz,
                                            window_length_in_s,
                                            window_shift_in_s, num_fft_points,
                                            window_type)
        if feature_type == 'stft':
            return get_stft_magnitude(audio, sampling_rate_in_hz,
                                      window_length_in_s, window_shift_in_s,
                                      num_fft_points, window_type)
        if feature_type == 'group_delay':
            return get_group_delay(audio, sampling_rate_in_hz,
                                   window_length_in_s, window_shift_in_s,
                                   num_fft_points, window_type)
        if feature_type == 'fbank':
            num_filter_bands = audio_feature_dict['num_filter_bands']
            return get_fbank(audio, sampling_rate_in_hz,
                                window_length_in_s, window_shift_in_s,
                                num_fft_points, window_type, num_filter_bands)
Пример #6
0
    def _get_2D_feature(
        audio: torch.Tensor,
        feature_type: str,
        audio_feature_dict: Dict[str, Union[float, int, str]],
        sampling_rate_in_hz: int,
    ) -> torch.Tensor:
        window_length_in_s = audio_feature_dict["window_length_in_s"]
        window_shift_in_s = audio_feature_dict["window_shift_in_s"]
        assert torch.jit.isinstance(window_length_in_s, float)
        assert torch.jit.isinstance(window_shift_in_s, float)

        window_length_in_samp = get_length_in_samp(window_length_in_s,
                                                   sampling_rate_in_hz)

        if "num_fft_points" in audio_feature_dict:
            num_fft_points = audio_feature_dict["num_fft_points"]
            assert torch.jit.isinstance(num_fft_points, int)

            if num_fft_points < window_length_in_samp:
                raise ValueError("num_fft_points: {} < window length in "
                                 "samples: {} (corresponds to window length"
                                 " in s: {}".format(num_fft_points,
                                                    window_length_in_s,
                                                    window_length_in_samp))
        else:
            num_fft_points = window_length_in_samp

        if "window_type" in audio_feature_dict:
            window_type = audio_feature_dict["window_type"]
            assert torch.jit.isinstance(window_type, str)
        else:
            window_type = "hamming"

        if feature_type == "stft_phase":
            return get_phase_stft_magnitude(audio, sampling_rate_in_hz,
                                            window_length_in_s,
                                            window_shift_in_s, num_fft_points,
                                            window_type)
        elif feature_type == "stft":
            return get_stft_magnitude(audio, sampling_rate_in_hz,
                                      window_length_in_s, window_shift_in_s,
                                      num_fft_points, window_type)
        elif feature_type == "group_delay":
            return get_group_delay(audio, sampling_rate_in_hz,
                                   window_length_in_s, window_shift_in_s,
                                   num_fft_points, window_type)
        elif feature_type == "fbank":
            num_filter_bands = audio_feature_dict["num_filter_bands"]
            assert torch.jit.isinstance(num_filter_bands, int)

            return get_fbank(
                audio,
                sampling_rate_in_hz,
                window_length_in_s,
                window_shift_in_s,
                num_fft_points,
                window_type,
                num_filter_bands,
            )
        else:
            raise ValueError(
                f'feature_type "{feature_type}" is not recognized.')