示例#1
0
 def resample(self, file):
     """Uses Sox to resample the wav file to 16kHz, 1 channel, 16 bit wav which is
     the ideal format for processing"""
     sampler = sox.Transformer()
     sampler.convert(samplerate=16000, n_channels=1, bitdepth=16)
     resampled_file = '{0}_sampled'.format(file)
     resampled_path = common.file_path(resampled_file)
     sampler.build(common.file_path(file), resampled_path)
     common.file_exists(resampled_path)
     return resampled_file
示例#2
0
 def is_wav_compatible(self, file):
     """Check if the wave is in the correct wav format for processing.
         wave provides the file's property params and the audio's channel, length and bit rate are checked
         Also the file must not be compressed"""
     try:
         wav_file = wave.open(common.file_path(file))
         params = wav_file.getparams()
         wav_file.close()
     except wave.Error, exc:
         log.info(
             'Failed to retrieve properties of the wav file: {}'.format(
                 exc))
         return False
示例#3
0
 def export_audio_chunks(self, file, chunks):
     """
     For each chunk of audio it gets exported to wav
     :param file: file name with extension
     :param chunks: small chunks of wav
     :return: new exported file names
     """
     chuck_names = []
     for i, chunk in enumerate(chunks):
         file_path = common.file_path(file)
         chunk_name = "{0}_{1}.wav".format(file, i)
         chunk.export(file_path, format="wav")
         chuck_names.append(chunk_name)
         common.file_exists(file_path)
     return chuck_names
 def spk_train_init(file):
     """
         TODO: Generate train data
         This improves the Speech Recognition so that the system can recognize speakers recorded from past data
         Train the speaker model using the Gaussian mixture model (GMM) model."""
     name, _ = common.split_file_ext(file)
     args = [
         common.JAVA_EXE, '-Xmx256m', '-cp', common.LIUM_PATH,
         'fr.lium.spkDiarization.programs.MTrainInit',
         '--sInputMask={}.seg'.format(common.seg_path(name)),
         '--fInputMask={}'.format(common.file_path(file)),
         '--sInputMask={}.ubm.gmm'.format(common.seg_path(name)),
         '--emInitMethod=copy',
         '--tOutputMask={}.init.gmm'.format(common.seg_path(name)), name
     ]
     common.call_subproc(args)
     common.file_exists('%s.init.gmm' % name)
示例#5
0
    def split_to_equal_chunks(self, file):
        """
        Splits Wav file to smaller chunks of equal duration for training and better signal processing
        :param file: fileName with extension
        :return:
        """
        audio = AudioSegment.from_file(common.file_path(file), "wav")
        duration_in_sec = len(audio) / 1000
        bit_rate = int(
            (audio.frame_rate * audio.frame_width * 8 * audio.channels) / 1000)
        size = (bit_rate * audio.frame_rate * audio.channels *
                duration_in_sec) / 8

        len_in_sec = math.ceil((duration_in_sec * 10000000) / size)
        len_in_ms = len_in_sec * 1000
        chunks = make_chunks(audio, len_in_ms)
        self.export_audio_chunks(file, chunks)
 def spk_train_map(file):
     """
        TODO: Generate train data
        This improves the Speech Recognition so that the system can recognize speakers recorded from past data
        Train the speaker model using the Maximum a posteriori (MAP) adaptation of GMM
     """
     name, _ = common.split_file_ext(file)
     args = [
         common.JAVA_EXE, '-Xmx256m', '-cp', common.LIUM_PATH,
         'fr.lium.spkDiarization.programs.MTrainMAP',
         '--sInputMask={}.ident.seg'.format(common.seg_path(name)),
         '--fInputMask={}'.format(common.file_path(file)),
         '--sInputMask={}.init.gmm'.format(common.seg_path(name)),
         '--emCtrl=1,5,0.01', '--varCtrl=0.01,10.0',
         '--tOutputMask={}.gmm'.format(common.seg_path(name)), name
     ]
     common.call_subproc(args)
     common.file_exists(name + '.gmm')
示例#7
0
    def audio_segmentation(self,
                           file,
                           start_list,
                           end_list,
                           concat=False,
                           file_name=None):
        """
        Breaks the file into small parts based on time slices and puts it back together if
        the concat option is True
        :param file: filename with extension
        :param start_list: list of ints representing start time ms
        :param end_list: list of ints representing en time ms
        :param concat: option to merge the file
        :param file_name: new file name for export
        :return: new file name/s
        """

        file_names = []
        baseName, ext = common.split_file_ext(file)
        seg_name = '{0}_{1}.{2}'.format(baseName,
                                        file_name if file_name else 'seg', ext)
        audio = AudioSegment.from_file(common.file_path(file), "wav")
        duration_in_ms = len(audio) * 1000
        audio_segs = [
            audio[start:end] for start, end in izip(start_list, end_list)
            if (duration_in_ms >= start >= 0) and (duration_in_ms >= end > 0)
        ]
        if not audio_segs:
            return file_names
        if concat:
            seg_path = common.seg_path(seg_name)
            audio_concat = reduce(lambda x, y: x + y, audio_segs)
            audio_concat.export(seg_path, format="wav")
            file_names.append(seg_name)
            common.file_exists(seg_path)
        else:
            file_names = self.export_audio_chunks(seg_name, audio_segs)
        return file_names
 def diarization(self, file):
     """Take a wav file in the right format and build a segmentation file.
     The seg file stores the speaker, start time, duration, gender and also additional info for speech recognition"""
     name, _ = common.split_file_ext(file)
     seg_file = '{}.seg'.format(name)
     seg_path = common.seg_path(seg_file)
     args = [
         common.JAVA_EXE,
         '-Xmx{}m'.format(common.JAVA_MEM),
         '-jar',
         common.LIUM_PATH,
         '--fInputMask={}'.format(common.file_path(file)),  # Input file
         '--sOutputMask={}'.format(seg_path),  # Output file
         '--doCEClustering',
         name
     ]  # Add cluster for each speaker
     log.info('Processing diariazation for {}'.format(file))
     common.call_subproc(args)
     common.file_exists(seg_path)
     log.info('File {} successfully diarized!'.format(file))
     data = self.build_speakers_segments(seg_file, name)
     # Put together audio files for each speaker's part
     sp_file_names = {}
     for speaker in data:
         speaker_id_file = speaker['speaker_id']
         file_names = self.speechClassifier.audioProcessor.audio_segmentation(
             file,
             speaker['start'],
             speaker['end'],
             concat=True,
             file_name=speaker_id_file)
         if not file_names:
             log.warn('Waring! Failed to perform audio segmentation for {}'.
                      format(speaker_id_file))
         sp_file_names[speaker_id_file] = file_names[0]
     return self.build_speakers_transcript(sp_file_names)