def main(): global line_count print("AutoSub v0.1\n") parser = argparse.ArgumentParser(description="AutoSub v0.1") parser.add_argument('--model', required=True, help='DeepSpeech model file') parser.add_argument('--scorer', help='DeepSpeech scorer file') parser.add_argument('--file', required=True, help='Input video file') args = parser.parse_args() ds_model = args.model if not ds_model.endswith(".pbmm"): print("Invalid model file. Exiting\n") exit(1) # Load DeepSpeech model ds = Model(ds_model) if args.scorer: ds_scorer = args.scorer if not ds_scorer.endswith(".scorer"): print("Invalid scorer file. Running inference using only model file\n") else: ds.enableExternalScorer(ds_scorer) input_file = args.file print("\nInput file:", input_file) base_directory = os.getcwd() output_directory = os.path.join(base_directory, "output") audio_directory = os.path.join(base_directory, "audio") video_file_name = input_file.split("/")[-1].split(".")[0] audio_file_name = os.path.join(audio_directory, video_file_name + ".wav") srt_file_name = os.path.join(output_directory, video_file_name + ".srt") # Extract audio from input video file extract_audio(input_file, audio_file_name) print("Splitting on silent parts in audio file") silenceRemoval(audio_file_name) # Output SRT file file_handle = open(srt_file_name, "a+") print("\nRunning inference:") for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))): audio_segment_path = os.path.join(audio_directory, file) # Dont run inference on the original audio file if audio_segment_path.split("/")[-1] != audio_file_name.split("/")[-1]: ds_process_audio(ds, audio_segment_path, file_handle) print("\nSRT file saved to", srt_file_name) file_handle.close()
def to_deepspeech(filepath): model_file_path = 'deepspeech-0.9.3-models.pbmm' scorer_path = 'deepspeech-0.9.3-models.scorer' ds = deepspeech.Model(model_file_path) ds.enableExternalScorer(scorer_path) base_directory = os.getcwd() #return current directory output_directory = os.path.join(base_directory,"output") audio_directory = os.path.join(base_directory,"temp")#save temp audio segment #output text file audio_file_name = filepath.split(os.sep)[-1].split(".")[0] filename = ntpath.basename(filepath).split(".")[0] txt_file_name = os.path.join(output_directory,filename+"_temp1.txt") file_handle = open(txt_file_name,"w+") #split silent parts in audio file segmentAudio.silenceRemoval(filepath,audio_directory) print("Running interface:") for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))): audio_segment_path = os.path.join(audio_directory,file) ds_process_audio(ds, audio_segment_path,file_handle) file_handle.close() print("\nSpeech-to-Text Conversion Complete, restoring Punctuation...") # Running commands to punctuate deepspeech output punct2_script = os.path.join(base_directory,"punctutator2_theano\punctuator2-master\punctuator.py") punct2_model = os.path.join(base_directory,"punctutator2_theano\models\INTERSPEECH-T-BRNN.pcl") punct_output = os.path.join(output_directory,filename+"_temp2.txt") final_output = os.path.join(output_directory,filename+".txt") subprocess.run(["python2",punct2_script,punct2_model,punct_output,txt_file_name]) #punct_output_txt correct_output(punct_output,final_output) print("\nFinished! Output is",final_output,"\n") #final_output_txt print("Summarization starts...\n") # deleting original, unpuncutatued deepspeech output if os.path.exists(txt_file_name): os.remove(txt_file_name) else: print("The file does not exist") if os.path.exists(punct_output): os.remove(punct_output) else: print("The file does not exist") # Text Summarization LANGUAGE = "english" f = open(final_output,'r') lines = f.read().find(".") #print(lines,"\n") f.close() SENTENCES_COUNT = int(lines/4) #Quarter of the content # print(SENTENCES_COUNT,"\n") parser = PlaintextParser.from_file(final_output, Tokenizer(LANGUAGE)) stemmer = Stemmer(LANGUAGE) summarizer = Summarizer(stemmer) summarizer.stop_words = get_stop_words(LANGUAGE) Final = os.path.join(output_directory,filename+"_Final.txt") F = open(Final,"w") print("Your output will be shown below and saved in the output directory.\n") for sentence in summarizer(parser.document, SENTENCES_COUNT): print(sentence) print(sentence, file=F) F.close() print("\nEverything is done!") ##clean directory and temp file tmp_wav = os.path.join(base_directory,filename + ".wav") if os.path.exists(tmp_wav): os.remove(tmp_wav) shutil.rmtree(audio_directory) os.mkdir(audio_directory)
def main(): global line_count print("AutoSub v0.1\n") parser = argparse.ArgumentParser(description="AutoSub v0.1") parser.add_argument('--model', required=True, help='DeepSpeech model file') parser.add_argument('--scorer', help='DeepSpeech scorer file') parser.add_argument('--file', required=True, help='Input video file') parser.add_argument('--vtt', dest="vtt", action="store_true", help='Output a vtt file with cue points for individual words instead of a srt file') args = parser.parse_args() ds_model = args.model if not ds_model.endswith(".pbmm"): print("Invalid model file. Exiting\n") exit(1) # Load DeepSpeech model ds = Model(ds_model) if args.scorer: ds_scorer = args.scorer if not ds_scorer.endswith(".scorer"): print("Invalid scorer file. Running inference using only model file\n") else: ds.enableExternalScorer(ds_scorer) input_file = args.file print("\nInput file:", input_file) base_directory = os.getcwd() output_directory = os.path.join(base_directory, "output") audio_directory = os.path.join(base_directory, "audio") video_file_name = input_file.split(os.sep)[-1].split(".")[0] audio_file_name = os.path.join(audio_directory, video_file_name + ".wav") srt_extension = ".srt" if not args.vtt else ".vtt" srt_file_name = os.path.join(output_directory, video_file_name + srt_extension) # Extract audio from input video file extract_audio(input_file, audio_file_name) print("Splitting on silent parts in audio file") silenceRemoval(audio_file_name) # Output SRT or VTT file file_handle = open(srt_file_name, "a+") if args.vtt: file_handle.write("WEBVTT\n") file_handle.write("Kind: captions\n\n") print("\nRunning inference:") for file in tqdm(sort_alphanumeric(os.listdir(audio_directory))): audio_segment_path = os.path.join(audio_directory, file) # Dont run inference on the original audio file if audio_segment_path.split(os.sep)[-1] != audio_file_name.split(os.sep)[-1]: ds_process_audio(ds, audio_segment_path, file_handle, args.vtt) if not args.vtt: print("\nSRT file saved to", srt_file_name) else: print("\nVTT file saved to", srt_file_name) file_handle.close() # Clean audio/ directory shutil.rmtree(audio_directory) os.mkdir(audio_directory)