def validate(args): """ Check that the CLI arguments passed to autosub are valid. """ if args.list_formats: print("List of formats:") for subtitle_format in list(FORMATTERS.keys()): print(("{format}".format(format=subtitle_format))) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print(("{code}\t{language}".format(code=code, language=language))) return 0 if args.format not in list(FORMATTERS.keys()): print("Subtitle format not supported. Run with --list-formats to see all supported formats.") return 1 if args.src_language not in list(LANGUAGE_CODES.keys()): print("Source language not supported. Run with --list-languages to see all supported languages.") return 1 if args.dst_language not in list(LANGUAGE_CODES.keys()): print( "Destination language not supported. Run with --list-languages to see all supported languages.") return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 return True
def validate(args): """ Check that the CLI arguments passed to autosub are valid. """ if args.format not in FORMATTERS: print("Subtitle format not supported. " "Run with --list-formats to see all supported formats.") return False if args.src_language not in LANGUAGE_CODES.keys(): print("Source language not supported. " "Run with --list-languages to see all supported languages.") return False if args.dst_language not in LANGUAGE_CODES.keys(): print("Destination language not supported. " "Run with --list-languages to see all supported languages.") return False if not args.source_path: print("Error: You need to specify a source path.") return False return True
def main(path_file): """ Run autosub as a command-line program. """ print(path_file) parser = argparse.ArgumentParser() parser.add_argument('--source_path', help="Path to the video or audio file to subtitle", nargs='?', default=path_file) parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=DEFAULT_CONCURRENCY) parser.add_argument( '-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)", default=DEFAULT_OUTPUT) parser.add_argument('-F', '--format', help="Destination subtitle format", default=DEFAULT_SUBTITLE_FORMAT) parser.add_argument('-S', '--src-language', help="Language spoken in source file", default=DEFAULT_SRC_LANGUAGE) parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default=DEFAULT_DST_LANGUAGE) parser.add_argument('-K', '--api-key', help="The Google Translate API key to be used. \ (Required for subtitle translation)") parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() if args.list_formats: print("List of formats:") for subtitle_format in FORMATTERS: print("{format}".format(format=subtitle_format)) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print("{code}\t{language}".format(code=code, language=language)) return 0 if not validate(args): return 1 try: subtitle_file_path = generate_subtitles( source_path=args.source_path, concurrency=args.concurrency, src_language=args.src_language, dst_language=args.dst_language, api_key=args.api_key, subtitle_file_format=args.format, output=args.output, ) print("Subtitles file created at {}".format(subtitle_file_path)) return subtitle_file_path except KeyboardInterrupt: return 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('source_path', default=".\CCTV_News.MP4", help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)") parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="zh-CN") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="zh-CN") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() if args.list_formats: print("List of formats:") for subtitle_format in list(FORMATTERS.keys()): print(("{format}".format(format=subtitle_format))) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print(("{code}\t{language}".format(code=code, language=language))) return 0 if args.format not in list(FORMATTERS.keys()): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in list(LANGUAGE_CODES.keys()): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in list(LANGUAGE_CODES.keys()): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(args.concurrency) converter = FLACConverter(source_path=audio_filename) recognizer = SpeechRecognizer(language=args.src_language, rate=audio_rate, api_key=GOOGLE_SPEECH_API_KEY) transcripts = [] if regions: try: widgets = [ "Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() widgets = [ "Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() for i, transcript in enumerate( pool.imap(recognizer, extracted_regions)): transcripts.append(transcript) pbar.update(i) pbar.finish() if not is_same_language(args.src_language, args.dst_language): if args.api_key: google_translate_api_key = args.api_key translator = Translator(args.dst_language, google_translate_api_key, dst=args.dst_language, src=args.src_language) prompt = "Translating from {0} to {1}: ".format( args.src_language, args.dst_language) widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() translated_transcripts = [] for i, transcript in enumerate( pool.imap(translator, transcripts)): translated_transcripts.append(transcript) pbar.update(i) pbar.finish() transcripts = translated_transcripts else: print( "Error: Subtitle translation requires specified Google Translate API key. \ See --help for further information.") return 1 except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print("Subtitles file created at {}".format(dest)) os.remove(audio_filename) return 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('source_path', default="./CCTV_News.mp4", help="Path to the video or audio file to subtitle", nargs='?') parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make", type=int, default=10) parser.add_argument( '-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)") parser.add_argument('-F', '--format', help="Destination subtitle format", default="srt") parser.add_argument('-S', '--src-language', help="Language spoken in source file", default="zh-CN") parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default="zh-CN") parser.add_argument( '-K', '--api-key', help= "The Google Translate API key to be used. (Required for subtitle translation)" ) parser.add_argument('--list-formats', help="List all available subtitle formats", action='store_true') parser.add_argument('--list-languages', help="List all available source/destination languages", action='store_true') args = parser.parse_args() if args.list_formats: print("List of formats:") for subtitle_format in FORMATTERS.keys(): print("{format}".format(format=subtitle_format)) return 0 if args.list_languages: print("List of all languages:") for code, language in sorted(LANGUAGE_CODES.items()): print("{code}\t{language}".format(code=code, language=language)) return 0 if args.format not in FORMATTERS.keys(): print( "Subtitle format not supported. Run with --list-formats to see all supported formats." ) return 1 if args.src_language not in LANGUAGE_CODES.keys(): print( "Source language not supported. Run with --list-languages to see all supported languages." ) return 1 if args.dst_language not in LANGUAGE_CODES.keys(): print( "Destination language not supported. Run with --list-languages to see all supported languages." ) return 1 if not args.source_path: print("Error: You need to specify a source path.") return 1 audio_filename, audio_rate = extract_audio(args.source_path) regions = find_speech_regions(audio_filename) pool = multiprocessing.Pool(args.concurrency) converter = WAVConverter(source_path=audio_filename, slicenum=len(regions)) if regions: try: widgets = [ "Converting speech regions to WAVC files: ", Percentage(), ' ', Bar(), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start() extracted_regions = [] for i, extracted_region in enumerate(pool.imap(converter, regions)): extracted_regions.append(extracted_region) pbar.update(i) pbar.finish() os.remove(audio_filename) wavlist = create_manifest(os.getcwd() + '/temp', os.getcwd() + '/temp' + '/wavlist.txt') transcripts = infer.infer_interface(wavlist, len(extracted_regions)) except KeyboardInterrupt: pbar.finish() pool.terminate() pool.join() print("Cancelling transcription") return 1 timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t] formatter = FORMATTERS.get(args.format) formatted_subtitles = formatter(timed_subtitles) dest = args.output if not dest: base, ext = os.path.splitext(args.source_path) dest = "{base}.{format}".format(base=base, format=args.format) with open(dest, 'wb') as f: f.write(formatted_subtitles.encode("utf-8")) print("Subtitles file created at {}".format(dest)) shutil.rmtree('temp') return 0