Exemplo n.º 1
0
    def _save(show, feature_filename_structure, save_param, cep, energy, fb, bnf, label):
        """

        :param show:
        :param feature_filename_structure:
        :param save_param:
        :param cep:
        :param energy:
        :param fb:
        :param bnf:
        :param label:
        :return:
        """
        feature_filename = feature_filename_structure.format(show)
        logging.info('output finename: '+feature_filename)
        dir_name = os.path.dirname(feature_filename)  # get the path
        if not os.path.exists(dir_name) and (dir_name is not ''):
            os.makedirs(dir_name)

        h5f = h5py.File(feature_filename, 'a', backing_store=True, driver='core')

        if "cep" not in save_param:
            cep = None
            cep_mean = None
            cep_std = None
        else:
            cep_mean = cep[label, :].mean(axis=0)
            cep_std = cep[label, :].std(axis=0)
        if "energy" not in save_param:
            energy = None
            energy_mean = None
            energy_std = None
        else:
            energy_mean = energy[label].mean(axis=0)
            energy_std = energy[label].std(axis=0)
        if "fb" not in save_param:
            fb = None
            fb_mean = None
            fb_std = None
        else:
            fb_mean = fb[label, :].mean(axis=0)
            fb_std = fb[label, :].std(axis=0)
        if "bnf" not in save_param:
            bnf = None
            bnf_mean = None
            bnf_std = None
        if "vad" not in save_param:
            label = None
        logging.info(label)

        write_hdf5(show, h5f,
                   cep, cep_mean, cep_std,
                   energy, energy_mean, energy_std,
                   fb, fb_mean, fb_std,
                   bnf, bnf_mean, bnf_std,
                   label)
        h5f.close()
Exemplo n.º 2
0
    def extract(self, *file):
        show, channel, input_audio_filename, output_feature_filename, extra = file[
            0]
        backing_store = True
        if input_audio_filename is not None:
            self.audio_filename_structure = input_audio_filename
        audio_filename = self.audio_filename_structure.format(show)

        # If the output file name does not include the ID of the show,
        # (i.e., if the feature_filename_structure does not include {})
        # the feature_filename_structure is updated to use the output_feature_filename
        if output_feature_filename is not None:
            self.feature_filename_structure = output_feature_filename

        if extra:
            feature_filename = self.feature_filename_structure.format(
                show, extra)
        else:
            feature_filename = self.feature_filename_structure.format(show)
        # if os.path.exists(feature_filename):
        #     return
        # Open audio file, get the signal and possibly the sampling frequency
        signal, sample_rate = read_audio(audio_filename,
                                         self.sampling_frequency)
        if signal.ndim == 1:
            signal = signal[:, np.newaxis]

        # Process the target channel to return Filter-Banks, Cepstral coefficients and BNF if required
        length, chan = signal.shape

        # If the size of the signal is not enough for one frame, return zero features
        PARAM_TYPE = np.float32
        if length < self.window_sample:
            cep = np.empty((0, self.ceps_number), dtype=PARAM_TYPE)
            energy = np.empty((0, 1), dtype=PARAM_TYPE)
            fb = np.empty((0, self.filter_bank_size), dtype=PARAM_TYPE)
            label = np.empty((0, 1), dtype='int8')

        else:
            # Random noise is added to the input signal to avoid zero frames.
            np.random.seed(0)
            signal[:, channel] += 0.0001 * np.random.randn(signal.shape[0])

            dec = self.shift_sample * 250 * 25000 + self.window_sample
            dec2 = self.window_sample - self.shift_sample
            start = 0
            end = min(dec, length)

            # Process the signal by batch to avoid problems for very long signals
            while start < (length - dec2):

                if self.feature_type == 'mfcc':
                    # Extract cepstral coefficients, energy and filter banks
                    cep, energy, _, fb = mfcc(signal[start:end, channel],
                                              fs=self.sampling_frequency,
                                              lowfreq=self.lower_frequency,
                                              maxfreq=self.higher_frequency,
                                              nlinfilt=self.filter_bank_size if
                                              self.filter_bank == "lin" else 0,
                                              nlogfilt=self.filter_bank_size if
                                              self.filter_bank == "log" else 0,
                                              nwin=self.window_size,
                                              shift=self.shift,
                                              nceps=self.ceps_number,
                                              get_spec=False,
                                              get_mspec=True,
                                              prefac=self.pre_emphasis)
                elif self.feature_type == 'plp':
                    cep, energy, _, fb = plp(signal[start:end, channel],
                                             nwin=self.window_size,
                                             fs=self.sampling_frequency,
                                             plp_order=self.ceps_number,
                                             shift=self.shift,
                                             get_spec=False,
                                             get_mspec=True,
                                             prefac=self.pre_emphasis,
                                             rasta=self.rasta_plp)

                # Perform feature selection
                label, threshold = self._vad(cep, energy, fb, signal[start:end,
                                                                     channel])
                # print(len(label[label]))
                if len(label) < len(energy):
                    label = np.hstack(
                        (label, np.zeros(len(energy) - len(label),
                                         dtype='bool')))

                start = end - dec2
                end = min(end + dec, length)

        # Create the HDF5 file
        # Create the directory if it dosn't exist
        dir_name = os.path.dirname(feature_filename)  # get the path
        if not os.path.exists(dir_name) and (dir_name is not ''):
            os.makedirs(dir_name)

        h5f = h5py.File(feature_filename,
                        'a',
                        backing_store=backing_store,
                        driver='core')
        if "cep" not in self.save_param:
            cep = None
            # cep_mean = None
            # cep_std = None
        if "energy" not in self.save_param:
            energy = None
            # energy_mean = None
            # energy_std = None
        if "fb" not in self.save_param:
            fb = None

        if "vad" not in self.save_param:
            label = None

        cep, fb, label = self.postProc(cep, energy, fb, label)

        write_hdf5(show, h5f, cep, None, None, None, None, None, fb, None,
                   None, None, None, None, label)

        h5f.close()
        pass
Exemplo n.º 3
0
    def extract(self, show, channel,
                input_audio_filename=None,
                output_feature_filename=None,
                backing_store=False):
        """
        Compute the acoustic parameters (filter banks, cepstral coefficients, log-energy and bottleneck features
        for a single channel from a given audio file.

        :param show: ID if the show
        :param channel: channel number (0 if mono file)
        :param input_audio_filename: name of the input audio file to consider if the name of the audio file is independent from the ID of the show
        :param output_feature_filename: name of the output feature file to consider if the name of the feature file is independent from the ID of the show
        :param backing_store: boolean, if False, nothing is writen to disk, if True, the file is writen to disk when closed
        :param feature_type: can be mfcc or plp
        :param rasta: boolean, only for PLP parameters, if True, perform RASTA filtering

        :return: an hdf5 file handler
        """
        # Create the filename to load

        # If the input audio file name does not include the ID of the show
        # (i.e., if the audio_filename_structure does not include {})
        # the audio_filename_structure is updated to use the input_audio_filename
        if input_audio_filename is not None:
            self.audio_filename_structure = input_audio_filename
        audio_filename = self.audio_filename_structure.format(show)

        # If the output file name does not include the ID of the show,
        # (i.e., if the feature_filename_structure does not include {})
        # the feature_filename_structure is updated to use the output_feature_filename
        if output_feature_filename is not None:
            self.feature_filename_structure = output_feature_filename
        feature_filename = self.feature_filename_structure.format(show)

        # Open audio file, get the signal and possibly the sampling frequency
        signal, sample_rate = read_audio(audio_filename, self.sampling_frequency)
        if signal.ndim == 1:
            signal = signal[:, numpy.newaxis]

        # Process the target channel to return Filter-Banks, Cepstral coefficients and BNF if required
        length, chan = signal.shape

        # If the size of the signal is not enough for one frame, return zero features
        if length < self.window_sample:
            cep = numpy.empty((0, self.ceps_number), dtype=PARAM_TYPE)
            energy = numpy.empty((0, 1), dtype=PARAM_TYPE)
            fb = numpy.empty((0, self.filter_bank_size), dtype=PARAM_TYPE)
            label = numpy.empty((0, 1), dtype='int8')

        else:
            # Random noise is added to the input signal to avoid zero frames.
            numpy.random.seed(0)
            signal[:, channel] += 0.0001 * numpy.random.randn(signal.shape[0])

            dec = self.shift_sample * 250 * 25000 + self.window_sample
            dec2 = self.window_sample - self.shift_sample
            start = 0
            end = min(dec, length)

            # Process the signal by batch to avoid problems for very long signals
            while start < (length - dec2):
                logging.info('process part : %f %f %f',
                             start / self.sampling_frequency,
                             end / self.sampling_frequency,
                             length / self.sampling_frequency)

                if self.feature_type == 'mfcc':
                    # Extract cepstral coefficients, energy and filter banks
                    cep, energy, _, fb = mfcc(signal[start:end, channel],
                                              fs=self.sampling_frequency,
                                              lowfreq=self.lower_frequency,
                                              maxfreq=self.higher_frequency,
                                              nlinfilt=self.filter_bank_size if self.filter_bank == "lin" else 0,
                                              nlogfilt=self.filter_bank_size if self.filter_bank == "log" else 0,
                                              nwin=self.window_size,
                                              shift=self.shift,
                                              nceps=self.ceps_number,
                                              get_spec=False,
                                              get_mspec=True,
                                              prefac=self.pre_emphasis)
                elif self.feature_type == 'plp':
                    cep, energy, _, fb = plp(signal[start:end, channel],
                                             nwin=self.window_size,
                                             fs=self.sampling_frequency,
                                             plp_order=self.ceps_number,
                                             shift=self.shift,
                                             get_spec=False,
                                             get_mspec=True,
                                             prefac=self.pre_emphasis,
                                             rasta=self.rasta_plp)
                
                # Perform feature selection
                label, threshold = self._vad(cep, energy, fb, signal[start:end, channel])
                if len(label) < len(energy):
                    label = numpy.hstack((label, numpy.zeros(len(energy)-len(label), dtype='bool')))

                start = end - dec2
                end = min(end + dec, length)
                if cep.shape[0] > 0:
                    logging.info('!! size of signal cep: %f len %d type size %d', cep[-1].nbytes/1024/1024,
                                 len(cep[-1]),
                                 cep[-1].nbytes/len(cep[-1]))

        # Compute the lean and std of fb and cepstral coefficient comuted for all selected frames
        energy_mean = energy[label].mean(axis=0)
        energy_std = energy[label].std(axis=0)
        fb_mean = fb[label, :].mean(axis=0)
        fb_std = fb[label, :].std(axis=0)
        cep_mean = cep[label, :].mean(axis=0)
        cep_std = cep[label, :].std(axis=0)
        # bnf_mean = bnf[label, :].mean(axis=0)
        # bnf_std = bnf[label, :].std(axis=0)

        # Create the HDF5 file
        # Create the directory if it dosn't exist
        dir_name = os.path.dirname(feature_filename)  # get the path
        if not os.path.exists(dir_name) and (dir_name is not ''):
            os.makedirs(dir_name) 

        h5f = h5py.File(feature_filename, 'a', backing_store=backing_store, driver='core')
        if "cep" not in self.save_param:
            cep = None
            cep_mean = None
            cep_std = None
        if "energy" not in self.save_param:
            energy = None
            energy_mean = None
            energy_std = None
        if "fb" not in self.save_param:
            fb = None
            fb_mean = None
            fb_std = None
        if "bnf" not in self.save_param:
            bnf = None
            bnf_mean = None
            bnf_std = None
        if "vad" not in self.save_param:
            label = None
        logging.info(label)
       
        write_hdf5(show, h5f,
                   cep, cep_mean, cep_std,
                   energy, energy_mean, energy_std,
                   fb, fb_mean, fb_std,
                   bnf, bnf_mean, bnf_std,
                   label)

        return h5f