示例#1
0
def _add_noise(signal, noise_file_name, snr, sample_rate):
    """

    :param signal:
    :param noise_file_name:
    :param snr:
    :return:
    """
    # Open noise file
    noise, fs_noise = read_audio(noise_file_name, sample_rate)
    logging.info("Noise.shape = {}".format(noise.shape))
    logging.info("signal.shape = {}".format(signal.shape))

    # Generate random section of masker
    if len(noise) < len(signal):
        dup_factor = len(signal) // len(noise) + 1
        noise = numpy.tile(noise, dup_factor)

    if len(noise) != len(signal):
        idx = numpy.random.randint(0, len(noise) - len(signal))
        noise = noise[idx:idx + len(signal)]

    # Compute energy of both signals
    N_dB = _rms_energy(noise)
    S_dB = _rms_energy(signal)

    # Rescale N
    N_new = S_dB - snr
    noise_scaled = 10 ** (N_new / 20) * noise / 10 ** (N_dB / 20)
    noisy = signal + noise_scaled

    return (noisy - noisy.mean()) / noisy.std()
示例#2
0
def _add_reverb(signal, reverb_file_name, sample_rate, reverb_level=-26.0, ):
    '''Adds reverb (convolutive noise) to a speech signal.
    The output speech level is normalized to asl_level.
    '''
    reverb, _ = read_audio(reverb_file_name, sample_rate)
    y = lfilter(reverb, 1, signal)
    y = y/10**(asl_meter(y, sample_rate)/20) * 10**(reverb_level/20)

    return (y - y.mean()) / y.std()
示例#3
0
    def extract(self, *file):
        show, channel, input_audio_filename, output_feature_filename, extra = file[
            0]
        backing_store = True
        if input_audio_filename is not None:
            self.audio_filename_structure = input_audio_filename
        audio_filename = self.audio_filename_structure.format(show)

        # If the output file name does not include the ID of the show,
        # (i.e., if the feature_filename_structure does not include {})
        # the feature_filename_structure is updated to use the output_feature_filename
        if output_feature_filename is not None:
            self.feature_filename_structure = output_feature_filename

        if extra:
            feature_filename = self.feature_filename_structure.format(
                show, extra)
        else:
            feature_filename = self.feature_filename_structure.format(show)
        # if os.path.exists(feature_filename):
        #     return
        # Open audio file, get the signal and possibly the sampling frequency
        signal, sample_rate = read_audio(audio_filename,
                                         self.sampling_frequency)
        if signal.ndim == 1:
            signal = signal[:, np.newaxis]

        # Process the target channel to return Filter-Banks, Cepstral coefficients and BNF if required
        length, chan = signal.shape

        # If the size of the signal is not enough for one frame, return zero features
        PARAM_TYPE = np.float32
        if length < self.window_sample:
            cep = np.empty((0, self.ceps_number), dtype=PARAM_TYPE)
            energy = np.empty((0, 1), dtype=PARAM_TYPE)
            fb = np.empty((0, self.filter_bank_size), dtype=PARAM_TYPE)
            label = np.empty((0, 1), dtype='int8')

        else:
            # Random noise is added to the input signal to avoid zero frames.
            np.random.seed(0)
            signal[:, channel] += 0.0001 * np.random.randn(signal.shape[0])

            dec = self.shift_sample * 250 * 25000 + self.window_sample
            dec2 = self.window_sample - self.shift_sample
            start = 0
            end = min(dec, length)

            # Process the signal by batch to avoid problems for very long signals
            while start < (length - dec2):

                if self.feature_type == 'mfcc':
                    # Extract cepstral coefficients, energy and filter banks
                    cep, energy, _, fb = mfcc(signal[start:end, channel],
                                              fs=self.sampling_frequency,
                                              lowfreq=self.lower_frequency,
                                              maxfreq=self.higher_frequency,
                                              nlinfilt=self.filter_bank_size if
                                              self.filter_bank == "lin" else 0,
                                              nlogfilt=self.filter_bank_size if
                                              self.filter_bank == "log" else 0,
                                              nwin=self.window_size,
                                              shift=self.shift,
                                              nceps=self.ceps_number,
                                              get_spec=False,
                                              get_mspec=True,
                                              prefac=self.pre_emphasis)
                elif self.feature_type == 'plp':
                    cep, energy, _, fb = plp(signal[start:end, channel],
                                             nwin=self.window_size,
                                             fs=self.sampling_frequency,
                                             plp_order=self.ceps_number,
                                             shift=self.shift,
                                             get_spec=False,
                                             get_mspec=True,
                                             prefac=self.pre_emphasis,
                                             rasta=self.rasta_plp)

                # Perform feature selection
                label, threshold = self._vad(cep, energy, fb, signal[start:end,
                                                                     channel])
                # print(len(label[label]))
                if len(label) < len(energy):
                    label = np.hstack(
                        (label, np.zeros(len(energy) - len(label),
                                         dtype='bool')))

                start = end - dec2
                end = min(end + dec, length)

        # Create the HDF5 file
        # Create the directory if it dosn't exist
        dir_name = os.path.dirname(feature_filename)  # get the path
        if not os.path.exists(dir_name) and (dir_name is not ''):
            os.makedirs(dir_name)

        h5f = h5py.File(feature_filename,
                        'a',
                        backing_store=backing_store,
                        driver='core')
        if "cep" not in self.save_param:
            cep = None
            # cep_mean = None
            # cep_std = None
        if "energy" not in self.save_param:
            energy = None
            # energy_mean = None
            # energy_std = None
        if "fb" not in self.save_param:
            fb = None

        if "vad" not in self.save_param:
            label = None

        cep, fb, label = self.postProc(cep, energy, fb, label)

        write_hdf5(show, h5f, cep, None, None, None, None, None, fb, None,
                   None, None, None, None, label)

        h5f.close()
        pass
    def extract(self, show, channel,
                input_audio_filename=None,
                output_feature_filename=None,
                backing_store=False):
        """
        Compute the acoustic parameters (filter banks, cepstral coefficients, log-energy and bottleneck features
        for a single channel from a given audio file.

        :param show: ID if the show
        :param channel: channel number (0 if mono file)
        :param input_audio_filename: name of the input audio file to consider if the name of the audio file is independent from the ID of the show
        :param output_feature_filename: name of the output feature file to consider if the name of the feature file is independent from the ID of the show
        :param backing_store: boolean, if False, nothing is writen to disk, if True, the file is writen to disk when closed
        :param feature_type: can be mfcc or plp
        :param rasta: boolean, only for PLP parameters, if True, perform RASTA filtering

        :return: an hdf5 file handler
        """
        # Create the filename to load

        # If the input audio file name does not include the ID of the show
        # (i.e., if the audio_filename_structure does not include {})
        # the audio_filename_structure is updated to use the input_audio_filename
        if input_audio_filename is not None:
            self.audio_filename_structure = input_audio_filename
        audio_filename = self.audio_filename_structure.format(show)

        # If the output file name does not include the ID of the show,
        # (i.e., if the feature_filename_structure does not include {})
        # the feature_filename_structure is updated to use the output_feature_filename
        if output_feature_filename is not None:
            self.feature_filename_structure = output_feature_filename
        feature_filename = self.feature_filename_structure.format(show)

        # Open audio file, get the signal and possibly the sampling frequency
        signal, sample_rate = read_audio(audio_filename, self.sampling_frequency)
        if signal.ndim == 1:
            signal = signal[:, numpy.newaxis]

        # Process the target channel to return Filter-Banks, Cepstral coefficients and BNF if required
        length, chan = signal.shape

        # If the size of the signal is not enough for one frame, return zero features
        if length < self.window_sample:
            cep = numpy.empty((0, self.ceps_number), dtype=PARAM_TYPE)
            energy = numpy.empty((0, 1), dtype=PARAM_TYPE)
            fb = numpy.empty((0, self.filter_bank_size), dtype=PARAM_TYPE)
            label = numpy.empty((0, 1), dtype='int8')

        else:
            # Random noise is added to the input signal to avoid zero frames.
            numpy.random.seed(0)
            signal[:, channel] += 0.0001 * numpy.random.randn(signal.shape[0])

            dec = self.shift_sample * 250 * 25000 + self.window_sample
            dec2 = self.window_sample - self.shift_sample
            start = 0
            end = min(dec, length)

            # Process the signal by batch to avoid problems for very long signals
            while start < (length - dec2):
                logging.info('process part : %f %f %f',
                             start / self.sampling_frequency,
                             end / self.sampling_frequency,
                             length / self.sampling_frequency)

                if self.feature_type == 'mfcc':
                    # Extract cepstral coefficients, energy and filter banks
                    cep, energy, _, fb = mfcc(signal[start:end, channel],
                                              fs=self.sampling_frequency,
                                              lowfreq=self.lower_frequency,
                                              maxfreq=self.higher_frequency,
                                              nlinfilt=self.filter_bank_size if self.filter_bank == "lin" else 0,
                                              nlogfilt=self.filter_bank_size if self.filter_bank == "log" else 0,
                                              nwin=self.window_size,
                                              shift=self.shift,
                                              nceps=self.ceps_number,
                                              get_spec=False,
                                              get_mspec=True,
                                              prefac=self.pre_emphasis)
                elif self.feature_type == 'plp':
                    cep, energy, _, fb = plp(signal[start:end, channel],
                                             nwin=self.window_size,
                                             fs=self.sampling_frequency,
                                             plp_order=self.ceps_number,
                                             shift=self.shift,
                                             get_spec=False,
                                             get_mspec=True,
                                             prefac=self.pre_emphasis,
                                             rasta=self.rasta_plp)
                
                # Perform feature selection
                label, threshold = self._vad(cep, energy, fb, signal[start:end, channel])
                if len(label) < len(energy):
                    label = numpy.hstack((label, numpy.zeros(len(energy)-len(label), dtype='bool')))

                start = end - dec2
                end = min(end + dec, length)
                if cep.shape[0] > 0:
                    logging.info('!! size of signal cep: %f len %d type size %d', cep[-1].nbytes/1024/1024,
                                 len(cep[-1]),
                                 cep[-1].nbytes/len(cep[-1]))

        # Compute the lean and std of fb and cepstral coefficient comuted for all selected frames
        energy_mean = energy[label].mean(axis=0)
        energy_std = energy[label].std(axis=0)
        fb_mean = fb[label, :].mean(axis=0)
        fb_std = fb[label, :].std(axis=0)
        cep_mean = cep[label, :].mean(axis=0)
        cep_std = cep[label, :].std(axis=0)
        # bnf_mean = bnf[label, :].mean(axis=0)
        # bnf_std = bnf[label, :].std(axis=0)

        # Create the HDF5 file
        # Create the directory if it dosn't exist
        dir_name = os.path.dirname(feature_filename)  # get the path
        if not os.path.exists(dir_name) and (dir_name is not ''):
            os.makedirs(dir_name) 

        h5f = h5py.File(feature_filename, 'a', backing_store=backing_store, driver='core')
        if "cep" not in self.save_param:
            cep = None
            cep_mean = None
            cep_std = None
        if "energy" not in self.save_param:
            energy = None
            energy_mean = None
            energy_std = None
        if "fb" not in self.save_param:
            fb = None
            fb_mean = None
            fb_std = None
        if "bnf" not in self.save_param:
            bnf = None
            bnf_mean = None
            bnf_std = None
        if "vad" not in self.save_param:
            label = None
        logging.info(label)
       
        write_hdf5(show, h5f,
                   cep, cep_mean, cep_std,
                   energy, energy_mean, energy_std,
                   fb, fb_mean, fb_std,
                   bnf, bnf_mean, bnf_std,
                   label)

        return h5f