json.dump(scene_stamps[show][episode_name], fp2) # process and store the plot on the drive plot_sentences = {} for show in list_of_shows: plot_sentences[show] = {} for show in list_of_shows: for episode_name in video_file_names[show]: # storing the processed part in json file_path = DIR_PLOTS[show]+'/'+episode_name+'_proc_plot.json' try: with open(file_path, 'r') as fp: plot_sentences[show][episode_name] = json.load(fp) except IOError: plot_sentences[show][episode_name] = preprocessor.fetch_plot_data(DIR_PLOTS[show]+'/'+episode_name+'_plot.txt') with open(file_path, 'w') as fp: json.dump(plot_sentences[show][episode_name], fp) # should run only the first time! # preprocess the srt files and convert them to utf-8 # is destructive # for show in list_of_shows: # for f in video_file_names[show]: # file_path = DIR_SUBS[show]+'/'+f+'.srt' # if chardet.detect(file_path)['encoding'] not in ['utf-8', 'ascii']: # data = open(file_path).read() # with open(file_path, 'w') as fp: # fp.write(data.decode(char.detect(file_path)['encoding']).encode('utf-8')) # alternatively for the last line -> instead of windows use detected value
t1, t2 = get_scene_stamps(DIR_VIDS + "/" + vid_file + ".mp4") time_stamps.append(t1) scene_stamps.append(t2) json.dump(time_stamps[-1], fp1) json.dump(scene_stamps[-1], fp2) plot_sentences = [] for plot in file_names: # storing the processed part in json file_path = DIR_PLOTS + "/" + plot + "_proc_plot.json" try: with open(file_path, "r") as fp: plot_sentences.append(json.load(fp)) except IOError: with open(file_path, "w") as fp: plot_sentences.append(fetch_plot_data(DIR_PLOTS + "/" + plot + "_plot.txt")) json.dump(plot_sentences[-1], fp) # should run only the first time! # preprocess the srt files and convert them to utf-8 # supposed to be non destructive (but f that) # for f in file_names: # file_path = DIR_SUBS+"/"+f+".srt" # if chardet.detect("file_path")["encoding"] != "utf-8" # data = open(file_path).read() # with open(file_path, "w") as fp: # fp.write(data.decode('Windows-1252').encode('utf-8')) # fp.write(data.decode(char.detect("file_path")["encoding"]).encode("utf-8")) # alternatively for the last line -> instead of windows use detected value sub_stamps, sub_text, untouched_sub_text = [], [], []