def generate_subtitles(source_path, output, concurrency, subtitle_file_format, api_key, secret_key): audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = WAVConverter(source_path=audio_filename) recognizer = SpeechRecognizer(api_key, secret_key, audio_rate, audio_filename[-3:]) transcripts = [] if regions: try: widgets = [ "Converting speech regions to WAV files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): if transcript == 1: return 0 else: transcripts.append(transcript) pbar.update(i) pbar.finish() except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") return 0 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base, ext = os.path.splitext(source_path) dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) shutil.rmtree('temp') return dest
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) print("audio_filename: {}".format(audio_filename)) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) speaker_recognizer = SR(source_path=audio_filename) transcripts = [] regions_update = [] labels_update = [] if regions: try: labels = [] for region in regions: label = speaker_recognizer(region) labels.append(label) print("Labels {}:".format(labels)) _label = [] items = [] labels_update = [] start = None end = None i = 0 for index, label in enumerate(labels): if len(_label) == 0: _label.append(label) start = index labels_update.append(label) i = i + 1 elif label in _label and i < 10: end = index i = i + 1 else: item = (start, end) items.append(item) _label = [] _label.append(label) start = index end = None labels_update.append(label) i = 0 print("items {}".format(items)) print("labels {}".format(labels)) print("labels_update {}".format(labels_update)) for item in items: if item[1] == None: start, end = regions[item[0]] regions_update.append((start, end)) else: start1, end1 = regions[item[0]] start2, end2 = regions[item[1]] regions_update.append((start1, end2)) print("regions_update {}".format(regions_update)) widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] process_bar = ProgressBar(widgets=widgets, maxval=len(regions_update)).start() extracted_regions = [] for i, extracted_region in enumerate( pool.imap(converter, regions_update)): extracted_regions.append(extracted_region) process_bar.update(i) process_bar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] process_bar.widgets = widgets process_bar.maxval = len(regions_update) process_bar.start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) process_bar.update(i) print("transcripts {}".format(transcripts)) process_bar.finish() if src_language.split("-")[0] != dst_language.split("-")[0]: if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] process_bar.widgets = widgets process_bar.maxval = len(regions_update) process_bar.start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) process_bar.update(i) process_bar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 except KeyboardInterrupt: process_bar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [ (r, t, l) for r, t, l in zip(regions_update, labels_update, transcripts) if t ] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) print("formatted_subtitles {}: ".format(formatted_subtitles)) # dest = output # if not dest: # base = os.path.splitext(source_path)[0] # dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(output, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return output
def main(): parser = argparse.ArgumentParser() parser.add_argument('source_path', default=".\CCTV_News.MP4", help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)") parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="zh-CN") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="zh-CN") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() if args.list_formats: print("List of formats:") for subtitle_format in list(FORMATTERS.keys()): print(("{format}".format(format=subtitle_format))) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print(("{code}\t{language}".format(code=code, language=language))) return 0 if args.format not in list(FORMATTERS.keys()): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in list(LANGUAGE_CODES.keys()): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in list(LANGUAGE_CODES.keys()): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(args.concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=args.src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if not is_same_language(args.src_language, args.dst_language): if args.api_key: google_translate_api_key = args.api_key translator = Translator(args.dst_language, google_translate_api_key, dst=args.dst_language, src=args.src_language) prompt = "Translating from {0} to {1}: ".format( args.src_language, args.dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. \ See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print("Subtitles file created at {}".format(dest)) os.remove(audio_filename) return 0
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) create() transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) with open("./data/cctv/cctv.txt", 'w+') as f: for j in extracted_regions: f.write('{}\n'.format(j)) f.close() pbar.finish() transcripts = SpeechRecognizer() except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise bar = progressbar.ProgressBar(widgets=[ progressbar.Percentage(), progressbar.Bar(), ' (', progressbar.SimpleProgress(), ') ', ' (', progressbar.ETA(), ') ', ]) timed_subtitles = [(r, t) for r, t in bar(zip(regions, transcripts)) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest
def main(): parser = argparse.ArgumentParser() parser.add_argument('source_path', default="./CCTV_News.mp4", help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)") parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="zh-CN") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="zh-CN") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() if args.list_formats: print("List of formats:") for subtitle_format in FORMATTERS.keys(): print("{format}".format(format=subtitle_format)) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print("{code}\t{language}".format(code=code, language=language)) return 0 if args.format not in FORMATTERS.keys(): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in LANGUAGE_CODES.keys(): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in LANGUAGE_CODES.keys(): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(args.concurrency) converter = WAVConverter(source_path=audio_filename, slicenum=len(regions)) if regions: try: widgets = [ "Converting speech regions to WAVC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() os.remove(audio_filename) wavlist = create_manifest(os.getcwd() + '/temp', os.getcwd() + '/temp' + '/wavlist.txt') transcripts = infer.infer_interface(wavlist, len(extracted_regions)) except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print("Subtitles file created at {}".format(dest)) shutil.rmtree('temp') return 0
def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments source_path, output=None, concurrency=DEFAULT_CONCURRENCY, src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, ): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if src_language.split("-")[0] != dst_language.split("-")[0]: if api_key: google_translate_api_key = api_key translator = Translator(dst_language, google_translate_api_key, dst=dst_language, src=src_language) prompt = "Translating from {0} to {1}: ".format( src_language, dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. " "See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") raise timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(subtitle_file_format) formatted_subtitles = formatter(timed_subtitles) dest = output if not dest: base = os.path.splitext(source_path)[0] dest = "{base}.{format}".format(base=base, format=subtitle_file_format) with open(dest, 'wb') as output_file: output_file.write(formatted_subtitles.encode("utf-8")) os.remove(audio_filename) return dest