Пример #1
0
    def adjust_wav_amplitude(self, wav_file, rms_amplitude):
        """Calls normalize-audio to adjust amplitude of WAV file

        Args:
            :wav_file
            :rms_amplitude
        """
        if rms_amplitude > 1.0:
            rms_amplitude = 1.0

        voice_gain_cmd = [self.__tools.get_tool(Tools.KEY_NORMALIZE), '-a', str(rms_amplitude), wav_file]
        if Util.execute_rc(voice_gain_cmd) != 0:
            raise RuntimeError('Failed to adjust voice overlay volume')
Пример #2
0
    def to_wav(self, output_file_name):
        """ Converts source audio track to WAV format

        convert source mp3 to wav. this is required for many reasons:
        * we need to adjust voice overlay amplitude to match MP3 file level and to do that we use "sox" too
          which cannot deal with MP3 directly.
        * there are some odd issues with "ffmpeg" failing during mixing phase when source is mp3 file.
          blind guess for now is that it's due to some structure mismatch between MP3 file (i.e. having cover
          image) and speech segments being just plain WAV. Most likely this can be solved better way but we
          need WAV anyway so no point wasting time at the moment for further research.
        """
        wav_cmd = ['ffmpeg', '-i', self.file_name, output_file_name]
        if Util.execute_rc(wav_cmd) != 0:
            raise RuntimeError('Failed to convert to WAV file')
Пример #3
0
    def check_env(self):
        """Checks if all external tools we need are already available and in $PATH
        """
        if sys.platform == 'win32':
            self.__tools = {
                self.KEY_FFMPEG: 'ffmpeg.exe',
                self.KEY_SOX: 'sox.exe',
                self.KEY_ESPEAK: 'espeak.exe',
            }
        else:
            self.__tools = {
                self.KEY_FFMPEG: 'ffmpeg',
                self.KEY_SOX: 'sox',
                self.KEY_ESPEAK: 'espeak',
            }

        for _, tool in self.__tools.items():
            failed = False
            if Util.which(tool) is None:
                Log.e("'{}' not found.".format(tool))

            if failed:
                Util.abort('Required tools not found. See documentation for installation guidelines.')

        # sometimes normalize is called normalize-audio (i.e. in Debian/Ubuntu)
        # so we do special checks just for this one particular tool
        normalize_check_result = False
        if sys.platform == 'win32':
            normalize = 'normalize.exe'
            if Util.which(normalize) is not None:
                self.__tools[self.KEY_NORMALIZE] = normalize
                normalize_check_result = True
        else:
            normalize = 'normalize'
            if Util.which(normalize) is not None:
                self.__tools[self.KEY_NORMALIZE] = normalize
                normalize_check_result = True
            else:
                normalize = 'normalize-audio'
                if Util.which(normalize) is not None:
                    self.__tools[self.KEY_NORMALIZE] = normalize
                    normalize_check_result = True

        if not normalize_check_result:
            Util.abort('2: "{}" not found. See documentation for installation guidelines.'.format('normalize'))

        self.__check_env_called = True
Пример #4
0
    def __create_voice_wav(self, segments, speech_wav_file_name):
        for idx, segment_text in enumerate(segments):
            segment_file_name = os.path.join(self.__tmp_dir,
                                             '{}.wav'.format(idx))
            if not self.speak_to_wav(segment_text, segment_file_name):
                raise RuntimeError(
                    'Failed to save speak "{0}" into "{1}".'.format(
                        segment_text, segment_file_name))

        # we need to get the frequency of speech waveform generated by espeak to later be able to tell
        # ffmpeg how to pad/clip the part
        import wave
        wav = wave.open(os.path.join(self.__tmp_dir, '0.wav'), 'rb')
        speech_frame_rate = wav.getframerate()
        wav.close()

        # merge voice overlay segments into one file with needed padding
        concat_cmd = [self.__tools.get_tool(Tools.KEY_FFMPEG), '-y']
        filter_complex = ''
        filter_complex_concat = ';'
        separator = ''

        max_len_tick = speech_frame_rate * 60 * self.__config.tick_interval
        max_len_title = speech_frame_rate * 60 * self.__config.tick_offset
        for idx, _ in enumerate(segments):
            concat_cmd.extend(
                ['-i',
                 os.path.join(self.__tmp_dir, '{}.wav'.format(idx))])

            # samples = rate_per_second * seconds * tick_interval_in_minutes
            max_len = max_len_title if idx == 0 else max_len_tick
            # http://ffmpeg.org/ffmpeg-filters.html#Filtergraph-description
            filter_complex += '{}[{}]apad=whole_len={}[g{}]'.format(
                separator, idx, max_len, idx)
            separator = ';'

            filter_complex_concat += '[g{}]'.format(idx)

        filter_complex_concat += 'concat=n={}:v=0:a=1'.format(len(segments))

        concat_cmd.extend(
            ['-filter_complex', filter_complex + filter_complex_concat])
        concat_cmd.append(speech_wav_file_name)

        if Util.execute_rc(concat_cmd) != 0:
            raise RuntimeError('Failed to merge voice segments')
Пример #5
0
    def mix_wav_tracks(self, file_out, encoding_quality, wav_files):
        """Mixes given WAV tracks together

        Args:
            :file_out
            :encoding_quality LAME encoder quality parameter
            :wav_files list of WAV files to mix
        """
        merge_cmd = [self.__tools.get_tool(Tools.KEY_FFMPEG), '-y']
        _ = [merge_cmd.extend(['-i', wav]) for wav in wav_files]
        merge_cmd.extend([
            '-filter_complex', 'amerge',
            '-ac', '2',
            '-c:a', 'libmp3lame',
            '-q:a', str(encoding_quality),
            file_out])
        if Util.execute_rc(merge_cmd) != 0:
            raise RuntimeError('Failed to create final MP3 file')
Пример #6
0
    def calculate_rms_amplitude(self, wav_file):
        """Calls SOX to get the RMS amplitude of WAV file

        Args:
            :wav_file

        Returns:
            float
        """
        src_amplitude_cmd = [self.__tools.get_tool(Tools.KEY_SOX), wav_file, '-n', 'stat']
        rc, _, err = Util.execute(src_amplitude_cmd)
        if rc != 0:
            raise RuntimeError('Failed to calculate RMS amplitude of "{}"'.format(wav_file))

        # let's check what "sox" figured out
        sox_results = {re.sub(' +', '_', err[i].split(':')[0].strip().lower()): err[i].split(':')[1].strip() for i
                       in range(0, len(err))}
        return float(sox_results['rms_amplitude'])
Пример #7
0
    def speak_to_wav(self, text, out_file_name):
        # noinspection PyProtectedMember
        text_tmp_file = os.path.join(
            self.__tmp_dir,
            next(tempfile._get_candidate_names()) + '.txt')
        with open(text_tmp_file, "wb+") as fh:
            fh.write(text)
            fh.close()

            rc = Util.execute_rc([
                self.__tools.get_tool(Tools.KEY_ESPEAK), '-s',
                str(self.__config.speech_speed), '-z', '-w', out_file_name,
                '-f', text_tmp_file
            ],
                                 debug=self.__config.debug)

            if rc == 0 and not self.__config.no_cleanup:
                os.remove(text_tmp_file)

            return rc == 0
Пример #8
0
    def get_out_file_name(self, music_track):
        """Build out file name based on provided template and music_track data
        """
        out_base_name, out_base_ext = Util.split_file_name(
            music_track.file_name)
        formatted_file_name = self.__config.file_out_format.format(
            name=out_base_name, ext=out_base_ext)

        out_file_name = os.path.basename(music_track.file_name)
        if self.__config.file_out is None:
            out_file_name = os.path.join(
                os.path.dirname(music_track.file_name), formatted_file_name)
        else:
            if os.path.isfile(self.__config.file_out):
                out_file_name = self.__config.file_out
            else:
                if os.path.isdir(self.__config.file_out):
                    out_file_name = os.path.join(self.__config.file_out,
                                                 formatted_file_name)

        return out_file_name
Пример #9
0
    def __init__(self, file_name):
        if not os.path.isfile(file_name):
            raise OSError('File not found: "{}"'.format(file_name))

        mp3 = MP3(file_name)

        base_name, _ = Util.split_file_name(file_name)
        self.base_name = base_name
        self.file_name = file_name

        # we round up duration to full minutes
        self.duration = mp3.info.length
        self.bitrate = mp3.info.bitrate

        # get track title either from tag, or from filename
        self.title = self.__get_tag(mp3, self.TAG_TITLE)
        self.artist = self.__get_tag(mp3, self.TAG_ARTIST)
        self.album_artist = self.__get_tag(mp3, self.TAG_ALBUM_ARTIST)
        self.album_title = self.__get_tag(mp3, self.TAG_ALBUM_TITLE)
        self.composer = self.__get_tag(mp3, self.TAG_COMPOSER)
        self.performer = self.__get_tag(mp3, self.TAG_PERFORMER)
        self.comment = self.__get_tag(mp3, self.TAG_COMMENT)
        self.track_number = self.__get_tag(mp3, self.TAG_TRACK_NUMBER)
Пример #10
0
    def voice_stamp(self, mp3_file_name):
        result = True

        try:
            Log.level_push('Processing "{}"'.format(mp3_file_name))
            music_track = Mp3FileInfo(mp3_file_name)

            # some sanity checks first
            min_track_length = 1 + self.__config.tick_offset
            if music_track.duration < min_track_length:
                raise ValueError(
                    'Track too short (min. {}, current len {})'.format(
                        min_track_length, music_track.duration))

            # check if we can create output file too
            if not self.__config.dry_run_mode:
                if os.path.exists(self.get_out_file_name(
                        music_track)) and not self.__config.force_overwrite:
                    raise OSError(
                        'Target "{}" already exists. Use -f to force overwrite.'
                        .format(self.get_out_file_name(music_track)))

                # create temporary folder
                self.__make_temp_dir()

            # let's now create WAVs with our spoken parts.
            ticks = range(self.__config.tick_offset, music_track.duration,
                          self.__config.tick_interval)
            extras = {'config_name': self.__config.name}

            # First goes track title, then time ticks
            # NOTE: we will generate title WAV even if i.e. title_format is empty. This is intentional, to keep
            #       further logic simpler, because if both title and tick formats would be empty, then skipping
            #       WAV generation would left us with no speech overlay file for processing and mixing.
            #       I do not want to have the checks for such case
            track_title_to_speak = Util.prepare_for_speak(
                Util.process_placeholders(
                    self.__config.title_format,
                    Util.merge_dicts(music_track.get_placeholders(), extras)))
            Log.i('Announced as "{}"'.format(track_title_to_speak))
            Log.v('Announcement format "{}"'.format(
                self.__config.title_format))

            segments = [track_title_to_speak]

            if self.__config.tick_format != '':
                for time_marker in ticks:
                    minutes = time_marker + self.__config.tick_add
                    extras = {
                        'minutes': minutes,
                        'minutes_digits': Util.separate_chars(minutes),
                    }
                    tick_string = Util.process_placeholders(
                        self.__config.tick_format,
                        Util.merge_dicts(music_track.get_placeholders(),
                                         extras))
                    segments.append(Util.prepare_for_speak(tick_string))

            if self.__config.dry_run_mode:
                Log.i('Duration {} mins, tick count: {}'.format(
                    music_track.duration, (len(segments) - 1)))
                Log.v('Tick format "{}"'.format(self.__config.tick_format))

            if not self.__config.dry_run_mode:
                speech_wav_full = os.path.join(self.__tmp_dir, 'speech.wav')

                self.__create_voice_wav(segments, speech_wav_full)

                # convert source music track to WAV
                music_wav_full_path = os.path.join(
                    self.__tmp_dir,
                    os.path.basename(music_track.file_name) + '.wav')
                music_track.to_wav(music_wav_full_path)

                # calculate RMS amplitude of music track as reference to gain voice to match
                rms_amplitude = self.__audio.calculate_rms_amplitude(
                    music_wav_full_path)

                target_speech_rms_amplitude = rms_amplitude * self.__config.speech_volume_factor
                self.__audio.adjust_wav_amplitude(music_wav_full_path,
                                                  target_speech_rms_amplitude)

            # mix all stuff together
            file_out = self.get_out_file_name(music_track)

            if not self.__config.dry_run_mode:
                Log.i('Writing: "{}"'.format(file_out))

                # noinspection PyProtectedMember
                self.__tmp_mp3_file = os.path.join(
                    os.path.dirname(file_out),
                    next(tempfile._get_candidate_names()) + '.mp3')

                # noinspection PyUnboundLocalVariable
                self.__audio.mix_wav_tracks(
                    self.__tmp_mp3_file,
                    music_track.get_encoding_quality_for_lame_encoder(),
                    [music_wav_full_path, speech_wav_full])

                # copy some ID tags to newly create MP3 file
                music_track.write_id3_tags(self.__tmp_mp3_file)

                if os.path.exists(file_out):
                    os.remove(file_out)

                os.rename(self.__tmp_mp3_file, file_out)
                self.__tmp_mp3_file = None
            else:
                output_file_msg = 'Output file "{}"'.format(file_out)
                if os.path.exists(self.get_out_file_name(music_track)):
                    output_file_msg += ' *** TARGET FILE ALREADY EXISTS ***'
                Log.i(output_file_msg)
                Log.v('Output file name format "{}"'.format(
                    self.__config.file_out_format))
                Log.i('')

        except RuntimeError as ex:
            if not self.__config.debug:
                Log.e(ex)
            else:
                raise
            result = False

        finally:
            Log.level_pop()
            self.__cleanup()

        return result