def simplify_ass(cfg): # subs = SSAFile.load('S03E09.ass', encoding="utf-16-le") subs = SSAFile.load(cfg.OUT) last_txt = None last_index = 0 i = 0 length = len(subs) while i < length: line = subs[i] if not is_contain_chinese(line.text): subs.__delitem__(i) length -= 1 elif line.text == last_txt: subs[last_index].end = line.end subs.__delitem__(i) length -= 1 else: last_index = i last_txt = line.text i += 1 subs.save(cfg.OUT)
subtitle_ass: SSAFile = None # work_dir = 'D:\BaiduYunDownload\Hyouka\Hyouka-01' # work_dir = 'D:\BaiduYunDownload\Oreimo\Oreimo-02' # work_dir = 'D:\BaiduYunDownload\GirlsLastTour\GirlsLastTour-01' # work_dir = 'D:\BaiduYunDownload\SAOII\mkv\SAOII-08' work_dir = 'D:\BaiduYunDownload\Children\Children-02' file_list = os.listdir(work_dir) i = 1 for filename in file_list: file_path = os.path.join(work_dir, filename) if os.path.isfile(file_path): i += 1 sub = SSAFile.load(file_path) if subtitle_ass: if filename == 'c.ass' or filename == 'm.ass': sub.shift(s=1) subtitle_ass += sub else: if filename == 'c.ass' or filename == 'm.ass': sub.shift(s=1) subtitle_ass = sub # for parent, dirnames, filenames in os.walk(work_dir, followlinks=True): # for filename in filenames: # file_path = os.path.join(parent, filename) # sub = SSAFile.load(file_path) # if subtitle_ass: # subtitle_ass += sub # else:
def main(argv=None): parser = argparse.ArgumentParser( prog=ENTRYPOINT, description= "Checks whether all characters in ASS file exist in declared fonts") parser.add_argument("files", nargs="+") parser.add_argument("--log-level", "-l", default="info", choices=("debug", "info", "warn", "error")) args = parser.parse_args(argv) logging.basicConfig(level=args.log_level.upper()) log = logging.getLogger(ENTRYPOINT) contexts = {} have_nonexistent_char = False def get_context(key): if key not in contexts: log.debug("New font: %s", key) contexts[key] = FontContext(**key._asdict()) return contexts[key] for name in args.files: ssa = SSAFile.load(name) line_number = 0 for event in ssa.events: line_number += 1 if "type=Comment" in str(event): continue style = ssa.styles[event.style] key = key_from_style(style) context = get_context(key) text = event.text while text: m = TAG_RE.search(text) display_text = text if not m else text[:m.start()] log.debug("Text block: %s", display_text) result = context.check(display_text) if result: have_nonexistent_char = True log.warning("%s Dialogue #%s: [%s] does not exist in %s", name, line_number, "".join(result), key) text = "" if not m else text[m.end():] if not text: break ovr_tags = m.group(0)[1:-1].split("\\") log.debug("Override tags: %s", ovr_tags) for tag in ovr_tags: tag = tag.rstrip() if tag[:1].lower() == "r": if len(tag) == 1: key = key_from_style(style) else: style_str = tag[1:] key = key_from_style(ssa.styles[style_str]) continue tag_match = TAG_PART_RE.match(tag) if not tag_match: continue type = tag_match.group(1).lower() if type == "fn": key = key._replace(name=tag_match.group(2)) elif type == "b": key = key._replace(bold=bool(int(tag_match.group(2)))) elif type == "i": key = key._replace( italics=bool(int(tag_match.group(2)))) context = get_context(key) sys.exit(1 if have_nonexistent_char else 0)
def add_word(self, word, collection, start, end, name, add_type, word_type, group, word_id='', wordset_id=''): clean_word = word.strip() puresave_filename = name.split('.')[0] + "~" + clean_word # row = { # "videaname":puresave_filename, # "wordbase_type":"video" # } # w = WordbaseHelper() # w.init_word(row,clean_word) # w.insert(row,collection) data = { 'filename': puresave_filename, 'wordbase_collection': collection, 'word': word, 'add_type': add_type, 'word_type': word_type, 'group': group, 'word_id': word_id, 'wordset_id': wordset_id, } work_dir = "D:\BaiduYunDownload" file_path = "" parent_path = "" double_loop_flag = False for parent, dirnames, filenames in os.walk(work_dir, followlinks=True): for filename in filenames: if filename == name: parent_path = parent file_path = os.path.join(parent, filename) double_loop_flag = True break if double_loop_flag: break start_time = float(start) end_time = float(end) pure_filename = name.split('.')[0] subfile_path = os.path.join(parent_path, pure_filename + ".srt") video_clip = VideoFileClip(file_path) clip = video_clip.subclip(start_time, end_time) target = "D:\BaiduYunDownload\\videos\\" + puresave_filename + ".mp4" clip.write_videofile(target, codec='libx264', verbose=False, audio=True) video_clip.close() subtitle = SSAFile.load(subfile_path) text = ''' 1 00:00:00,000 --> 00:00:00,000 ''' temp = SSAFile().from_string(text) for sub in subtitle: if sub.start >= start_time * 1000 and sub.end <= end_time * 1000: text = sub.text.replace( clean_word, '<c.video-heightlight>' + clean_word + '</c>') sub.text = text sub.shift(s=-start_time) temp.append(sub) sub_target = "D:\BaiduYunDownload\\videos\\" + puresave_filename temp.save(sub_target + '.srt') vtt = WebVTT().from_srt(sub_target + '.srt') vtt.save(sub_target + '.vtt') files = { "video": open(target, "rb"), "subtitle": open(sub_target + '.vtt', "rb") } # print(files) # r = requests.post('http://127.0.0.1:5000/video', data=data,files=files) r = requests.post('http://' + server_ip + '/video', data=data, files=files) # print(r.request) return "true"
def getalltext(filename): relustarray = [] subs = SSAFile.load(filename) for line in subs: relustarray.append(line.text) return relustarray
def ocr_with_timeline(video, video_path, box, ocr_reader, lang, main_window, progress_bar): fps = video.get(5) _, video_name = os.path.split(video_path) video_name = video_name.split('.')[0] frame_dir = 'frame/'+video_name+'/' subs = SSAFile.load(frame_dir+'/split_vision.ass') prefix = video_path[:-len(video_path.split('.')[-1])] total = len(subs) start = time.time() count = 0 re_chinese = re.compile(u"[\u4e00-\u9fa5]+") re_ascii = re.compile(r'\w+', re.ASCII) i = 0 length = len(subs) while i < length: line = subs[i] srt_start = line.start / 1000 srt_end = line.end / 1000 # print(fmt_time(srt_start), fmt_time(srt_end)) srt_mid = (srt_start + srt_end) / 2 frame_id = int(srt_mid * fps) video.set(cv.CAP_PROP_POS_FRAMES, frame_id) # 设置要获取的帧号 _, frame = video.read() clipped_frame = frame[box[0][0]:box[0][1], box[1][0]:box[1][1]] result = ocr_reader.ocr(clipped_frame) print(result) if len(result) == 0: subs.__delitem__(i) length -= 1 else: # subs[i].text = result[0] if len(lang) == 2: split = list(map(lambda x: len(x), [re.findall(re_chinese,result[i]) for i in range(len(result))])) eng_str = [] ch_str = [] iseng = 1 for str_ind in reversed(range(len(result))): iseng += split[str_ind] if iseng == 1: eng_str.append(result[str_ind]) else: ch_str.append(result[str_ind]) eng_str = ' '.join(reversed(eng_str)) ch_str = ''.join(reversed(ch_str)) subs[i].text = re.findall(re_chinese, ch_str)[0] subs[i+1].text = ' '.join(re.findall(re_ascii, eng_str)) else: subtitle = ''.join(result) subs[i].text = re.findall(re_chinese, subtitle)[0] i += len(lang) count += len(lang) progress_bar.setValue(int(count / total * 100) + 1) if (count % 1) == 0 or count == total: elapsed = time.time() - start eta = (total - count) / count * elapsed print("[{}/{}], Elapsed: {}, ETA: {}".format(count, total, fmt_time(elapsed), fmt_time(eta))) subs.save(prefix+'ass', format_='ass') progress_bar.setValue(100) QMessageBox.information(main_window, "提示", "字幕生成成功!", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) progress_bar.setValue(0)
def split_subtitle(): path = './CHS_test.srt' subs = SSAFile.load(path) print(subs[0].plaintext)
cv.imwrite( "{}/f{}_l{}.jpg".format(frame_dir, fc_start, fc - fc_start), last_srt_frame) if output_segged_frame: cv.imwrite( "{}/f{}_l{}_segged.jpg".format( frame_dir, fc_start, fc - fc_start), last_srt_seg_frame) srt_count += 1 fc_start = 0 progress_bar.setValue(int(fc / frames_num * 100) + 1) # progress_bar.format(Q) elapsed = time.time() - time_start eta = (frames_num - fc) * elapsed / fc if fc > 0 else 0 print('[%d/%d] Elapsed: %s, ETA: %s' % (fc, frames_num, fmt_time(elapsed), fmt_time(eta))) video_path.split('/') subs.save(frame_dir + '/split_vision.ass') progress_bar.setValue(100) QMessageBox.information(main_window, "提示", "时间轴生成成功!", QMessageBox.Yes | QMessageBox.No, QMessageBox.Yes) progress_bar.setValue(0) if __name__ == "__main__": sub = SSAFile.load('demo/empty.ass') sub.append( SSAEvent(start=0, end=make_time(s=2.5), text="New first subtitle")) print("test")