def tosrt(sub): for file in sub: with open(file, 'r') as f: srtfile = asstosrt.convert(f) new_name = '.'.join(file.split('.')[:-1]) + '.srt' with open(new_name, 'w') as f: f.write(srtfile) os.remove(file)
def duplicate_srt_sub(ass_filename): srt_filename = ass_filename.replace(".ass", ".srt") with open(srt_filename, "w+", encoding="utf8") as srt_file: try: with open(ass_filename, encoding="utf8") as ass_file: srt_file.write(asstosrt.convert(ass_file)) print("duplicate srt {}".format(srt_filename)) except Exception as e: print(e)
def _convert_files(files, args): if args.encoding: in_codec = codecs.lookup(args.encoding) else: in_codec = None if args.srt_encoding: out_codec = codecs.lookup(args.srt_encoding) else: out_codec = in_codec sum = len(files) done = 0 fail = 0 ignore = 0 print("Found {} file(s), converting...".format(sum)) for in_path, out_path in _combine_output_file_path(files, args.output_dir): print("\t({:02d}/{:02d}) is converting... " \ .format(done + fail + ignore + 1, sum), end='') if not args.force and os.path.exists(out_path): print('[ignore] (SRT exists)') ignore += 1 continue try: with open(in_path, 'rb') as in_file: if args.encoding is None: # Detect file charset. in_codec = codecs.lookup(_detect_charset(in_file)) if args.srt_encoding is None: out_codec = in_codec out_str = asstosrt.convert(in_codec.streamreader(in_file), args.translator, args.no_effact, args.only_first_line) with open(out_path, 'wb') as out_file: out_file.write(get_bom(out_codec)) out_file.write(out_codec.encode(out_str)[0]) done += 1 print('[done]') except (UnicodeDecodeError, UnicodeEncodeError, LookupError) as e: print('[fail] (codec error)') print(e, file=sys.stderr) fail += 1 except ValueError as e: print('[fail] (irregular format)') print(e, file=sys.stderr) fail += 1 except IOError as e: print('[fail] (IO error)') print(e, file=sys.stderr) fail += 1 print("All done:\n\t{} success, {} ignore, {} fail." \ .format(done, ignore, sum - done - ignore))
def convert_ass_to_srt(file_string): file_name, file_extension = os.path.splitext(file_string) if file_extension.endswith('ass') and os.path.isfile(file_string): # https://www.zhihu.com/question/36368902 with open(file_string, 'r', encoding='utf-8', errors='ignore') as ass_file: srt_str = asstosrt.convert(ass_file, no_effect=True) srt_file_name = file_name + '_converted_ass.srt' if os.path.isfile(srt_file_name): os.remove(srt_file_name) with open(srt_file_name, "w", encoding='utf-8', newline='\n') as srt_file: srt_file.write(srt_str)
def main(path): subs = [] # 如果是目录 if os.path.isdir(path): for root, _, files in os.walk(path): for ass_file in files: if re.match("(?i).*(ass)$", ass_file): subs.append(os.path.join(root, ass_file)) # 如果是文件 else: subs = {path} path = os.path.split(path)[0] for sub_file in subs: with open(sub_file, "rb") as ass_file: charset = chardet.detect(ass_file.read())["encoding"].lower() ass_file_name = '.'.join(sub_file.split('.')[:-1]) + '.srt' with open(sub_file, "r", encoding=charset) as ass_file: with open(ass_file_name, "wb") as srt_file: srt_file.write(convert(asstosrt.convert(ass_file), 'zh-cn').encode('utf8')) print(ass_file_name, "done")
def run(self, fold): """ 主要逻辑控制 :param fold: 输入文件夹 :return: """ if os.path.isdir(fold): files = list_file(folder=fold) elif os.path.isfile(fold): files = [fold] else: print("不是文件夹不是文件, 你到底是啥") files = [] for file in files: coding_msg = self.check_char(file) encoding_format = coding_msg.get("encoding") if encoding_format == "Windows-1254": encoding_format = "utf8" with open(file, 'r', encoding=encoding_format) as f: # if file.endswith("srt") or file.endswith('txt'): # subtitle_content = self.parse_srt(f) # elif file.endswith("ass"): # srt_str = asstosrt.convert(f) # srt = srt_str.split('\n') # 如果是ass文件将ass文件转化为 srt格式,这次的txt内容上是srt文件所以不做区分 if file.endswith("ass"): srt_str = asstosrt.convert(f) f = srt_str.split('\n') subtitle_content = self.parse_srt(f) with open(r'C:\Users\Administrator\Desktop\vietnam_speaking.txt', 'a', encoding='utf8') as f: f.write(subtitle_content + "\n")
# -*- coding: utf-8 -*- import glob import chardet import asstosrt for ass_fn in glob.iglob('*.ass'): with open(ass_fn, 'rb') as f: ass_bytes = f.read() encoding = chardet.detect(ass_bytes)['encoding'] with open(ass_fn, 'r', encoding=encoding) as f: srt_str = asstosrt.convert(f) srt_fn = ass_fn[:-4] + '.srt' with open(srt_fn, 'w', encoding='utf-8') as f: f.write(srt_str)
def baks_mkv(self): failedfiles = [] print("------------") print('get srt.bak file from vedio!') for postfix in self.vediotypes: if postfix in self.files: for fi, f in enumerate(self.files[postfix]): mkvfile = os.path.join(f[0], f[1]) print("------------") print(fi, mkvfile) srtbak_path = os.path.splitext(mkvfile)[0] + ".srt.bak" if os.path.exists(srtbak_path): print(srtbak_path, "exists") print("------------") continue print("1. Check streams in mkv!") res = subprocess.run(["ffmpeg", "-i", mkvfile], stderr=subprocess.PIPE) """ print(res) print("------------") print(res.stderr) print(res.stdout) print(res.args) print("------------over") """ beg = False streams = [] stream = None for i, v in enumerate(res.stderr.decode("utf-8").split("\n")): if "Stream" in v: beg = True m = re.match(r" *Stream #(\d+):(\d+)(\(\w+\))?: (\w+): (\w+)", v) if m: # print(m.groups()[1:]) # c if stream is not None: streams.append(stream) stream = {"id": m.group(2), "type": m.group(4), "filetype": m.group(5)} if m.group(3) is not None: # subhead 副标题 stream['subhead'] = m.group(3) continue else: print("err: reg not work on this sentence") print(v) sys.exit(2) elif not beg: continue else: m = re.match(r" *title *: (.*)", v) if m: stream['title'] = m.group(1) pass pass streams_df = pd.DataFrame(streams) print("streams_df:\n", streams_df) # c # print(streams_df.dtypes) # c if len(streams_df) < 2 or len(streams_df[streams_df["type"] == "Subtitle"]) <= 0: print("not have enough stream") failedfiles.append(mkvfile) continue print("2. get srt.bak file from mkv!") engchs = [] if 'title' in streams_df: criterion = streams_df['title']. \ map(lambda t: re.match(r"(英.中)|(英中)|(中英字幕)", str(t)) is not None) engchs = streams_df[criterion] if len(engchs) > 0: # todo: deal engchs print("engchs:\n", engchs) # print( os.path.splitext(mkvfile)[0] + ".srt.bak") # c base = os.path.splitext(mkvfile)[0] srt = base + ".srt" if 'ass' == str(engchs.iloc[0].filetype): ass = base + ".ass" ass_file = open(ass) srt_str = asstosrt.convert(ass_file) f = open(srt, "w") f.write(srt_str) f.close() """ subprocess.run( ["mkvextract", "tracks", mkvfile, "{0}:{1}".format( str(engchs.iloc[0].id), ass ) ]) time.sleep(2) res = subprocess.run(["ffmpeg", "-i", ass, srt]) if os.path.getsize(srt) <= 10: print(res) print("------------") print(res.stderr) print(res.stdout) print(res.args) print("------------over") sys.exit(8) """ if os.path.exists(srt): shutil.move(srt, srtbak_path) else: # todo: log err print("error: {} failed to create!".format(srt)) failedfiles.append(mkvfile) pass elif 'subrip' == str(engchs.iloc[0].filetype): print("filetype:", str(engchs.iloc[0].filetype)) subprocess.run( ["mkvextract", "tracks", mkvfile, "{0}:{1}".format( str(engchs.iloc[0].id), srtbak_path ) ]) pass else: criterion = streams_df['subhead']. \ map(lambda t: re.match(r"eng", str(t)) is not None) eng = streams_df[ criterion & (streams_df.type == "Subtitle") ] if len(eng) > 0: # todo: deal eng print("eng:\n", eng) pass else: print('-------------') print(f[1], "has no suitable subtitle stream!") print(streams_df[streams_df.type == "Subtitle"]) failedfiles.append(mkvfile) print('-------------') pass print("files failed to create .srt.bak:\n") for f in failedfiles: print(f) print('-------------') pass